diff --git "a/dense-baseline/metrics.jsonl" "b/dense-baseline/metrics.jsonl" new file mode 100644--- /dev/null +++ "b/dense-baseline/metrics.jsonl" @@ -0,0 +1,5024 @@ +{"step": 0, "train/loss": 3.6113941073417664, "train/lm_loss": 3.6113941073417664, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.0000000000000001e-07, "perf/tokens_per_sec": 5494.704409198039, "train/loss_prose": 3.731065273284912, "train/loss_math": 3.636972427368164, "train/loss_code": 3.055974006652832} +{"step": 1, "train/loss": 3.8202253580093384, "train/lm_loss": 3.8202253580093384, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.0000000000000002e-07, "perf/tokens_per_sec": 26443.87127678078, "train/loss_math": 3.8233697414398193, "train/loss_code": 2.6807167530059814, "train/loss_prose": 4.0468690395355225} +{"step": 2, "train/loss": 2.892220437526703, "train/lm_loss": 2.892220437526703, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.0000000000000004e-07, "perf/tokens_per_sec": 25820.60581794681, "train/loss_code": 2.28594708442688, "train/loss_prose": 3.9026757081349692} +{"step": 3, "train/loss": 3.1834592819213867, "train/lm_loss": 3.1834592819213867, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.0000000000000003e-07, "perf/tokens_per_sec": 25582.06975708761, "train/loss_prose": 3.972584327061971, "train/loss_code": 2.38269180059433, "train/loss_math": 4.0191545486450195} +{"step": 4, "train/loss": 3.148709774017334, "train/lm_loss": 3.148709774017334, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.000000000000001e-07, "perf/tokens_per_sec": 26199.808126882457, "train/loss_prose": 4.33818244934082, "train/loss_code": 2.387345016002655, "train/loss_math": 3.767371495564779} +{"step": 5, "train/loss": 3.5591331124305725, "train/lm_loss": 3.5591331124305725, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.000000000000001e-07, "perf/tokens_per_sec": 26359.438045642986, "train/loss_code": 2.9404710133870444, "train/loss_prose": 4.007102131843567, "train/loss_math": 3.8791493574778237} +{"step": 6, "train/loss": 3.684957981109619, "train/lm_loss": 3.684957981109619, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.000000000000001e-07, "perf/tokens_per_sec": 26127.80698734054, "train/loss_code": 2.0316321849823, "train/loss_prose": 3.849726438522339, "train/loss_math": 4.0996994972229} +{"step": 7, "train/loss": 3.529506802558899, "train/lm_loss": 3.529506802558899, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.000000000000001e-07, "perf/tokens_per_sec": 26143.830486860268, "train/loss_math": 3.944546699523926, "train/loss_prose": 3.552019476890564, "train/loss_code": 2.676914691925049} +{"step": 8, "train/loss": 3.212507486343384, "train/lm_loss": 3.212507486343384, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 9e-07, "perf/tokens_per_sec": 25463.99394076834, "train/loss_math": 3.7708399295806885, "train/loss_prose": 3.7898341019948325, "train/loss_code": 2.262959043184916} +{"step": 9, "train/loss": 3.264478087425232, "train/lm_loss": 3.264478087425232, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.0000000000000002e-06, "perf/tokens_per_sec": 25543.8429413409, "train/loss_code": 2.3611276547114053, "train/loss_math": 3.8064881801605224} +{"step": 10, "train/loss": 3.5961992740631104, "train/lm_loss": 3.5961992740631104, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.1e-06, "perf/tokens_per_sec": 26380.4451296383, "train/loss_prose": 3.8809814453125, "train/loss_math": 3.5569385290145874, "train/loss_code": 2.250809907913208} +{"step": 11, "train/loss": 3.7748607993125916, "train/lm_loss": 3.7748607993125916, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.2000000000000002e-06, "perf/tokens_per_sec": 24662.67177724758, "train/loss_prose": 4.061878824234009, "train/loss_code": 2.2012391090393066, "train/loss_math": 3.8441261053085327} +{"step": 12, "train/loss": 3.0682194232940674, "train/lm_loss": 3.0682194232940674, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.3e-06, "perf/tokens_per_sec": 23802.085377815954, "train/loss_prose": 3.8092188239097595, "train/loss_code": 2.327220141887665} +{"step": 13, "train/loss": 3.4281166195869446, "train/lm_loss": 3.4281166195869446, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.4000000000000001e-06, "perf/tokens_per_sec": 25236.456504239406, "train/loss_code": 2.3504638274510703, "train/loss_math": 3.8804968992869058, "train/loss_prose": 4.36602520942688} +{"step": 14, "train/loss": 3.357207179069519, "train/lm_loss": 3.357207179069519, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.5e-06, "perf/tokens_per_sec": 26609.6303184816, "train/loss_prose": 3.9555389881134033, "train/loss_math": 3.858330726623535, "train/loss_code": 2.424792925516764} +{"step": 15, "train/loss": 3.215243339538574, "train/lm_loss": 3.215243339538574, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.6000000000000001e-06, "perf/tokens_per_sec": 26081.556622306427, "train/loss_code": 2.160367409388224, "train/loss_prose": 3.8144587874412537, "train/loss_math": 3.983009099960327} +{"step": 16, "train/loss": 3.632804036140442, "train/lm_loss": 3.632804036140442, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.7000000000000002e-06, "perf/tokens_per_sec": 26092.01405759727, "train/loss_math": 3.9931899070739747, "train/loss_code": 2.690961718559265, "train/loss_prose": 3.714559555053711} +{"step": 17, "train/loss": 3.2953672409057617, "train/lm_loss": 3.2953672409057617, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.8e-06, "perf/tokens_per_sec": 25670.139459548303, "train/loss_prose": 3.8235837618509927, "train/loss_code": 2.6751983165740967, "train/loss_math": 4.1913933753967285} +{"step": 18, "train/loss": 3.765999972820282, "train/lm_loss": 3.765999972820282, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.9e-06, "perf/tokens_per_sec": 23268.267264765913, "train/loss_prose": 3.6821285088857016, "train/loss_math": 3.6917598247528076, "train/loss_code": 4.314574241638184} +{"step": 19, "train/loss": 3.5411702394485474, "train/lm_loss": 3.5411702394485474, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.0000000000000003e-06, "perf/tokens_per_sec": 26097.087348436744, "train/loss_code": 2.8772168159484863, "train/loss_math": 3.7057101726531982, "train/loss_prose": 3.7738433361053465} +{"step": 20, "train/loss": 3.3178592920303345, "train/lm_loss": 3.3178592920303345, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.1000000000000002e-06, "perf/tokens_per_sec": 25324.28626979857, "train/loss_math": 4.18449068069458, "train/loss_code": 2.4013435443242392, "train/loss_prose": 3.7885883450508118} +{"step": 21, "train/loss": 3.2984142899513245, "train/lm_loss": 3.2984142899513245, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.2e-06, "perf/tokens_per_sec": 25830.194771375474, "train/loss_code": 2.3640918334325156, "train/loss_math": 3.782646735509237, "train/loss_prose": 3.9735488891601562} +{"step": 22, "train/loss": 3.517558991909027, "train/lm_loss": 3.517558991909027, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.3e-06, "perf/tokens_per_sec": 24258.533525181483, "train/loss_math": 3.815894921620687, "train/loss_prose": 3.6767297983169556, "train/loss_code": 1.9858688116073608} +{"step": 23, "train/loss": 3.56562077999115, "train/lm_loss": 3.56562077999115, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.4000000000000003e-06, "perf/tokens_per_sec": 25319.807497255035, "train/loss_code": 2.452388286590576, "train/loss_math": 3.6838666598002114, "train/loss_prose": 4.189529895782471} +{"step": 24, "train/loss": 3.4175736904144287, "train/lm_loss": 3.4175736904144287, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.5e-06, "perf/tokens_per_sec": 26426.827562868024, "train/loss_code": 1.958580732345581, "train/loss_math": 3.8408336639404297, "train/loss_prose": 4.030047178268433} +{"step": 25, "train/loss": 3.6236671805381775, "train/lm_loss": 3.6236671805381775, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.6e-06, "perf/tokens_per_sec": 23802.61302032243, "train/loss_math": 3.6118674278259277, "train/loss_prose": 3.8116992314656577, "train/loss_code": 3.10676908493042} +{"step": 26, "train/loss": 3.1056344509124756, "train/lm_loss": 3.1056344509124756, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.7e-06, "perf/tokens_per_sec": 25921.04008437227, "train/loss_code": 2.486840069293976, "train/loss_prose": 3.9433069229125977, "train/loss_math": 3.06779408454895} +{"step": 27, "train/loss": 3.205208420753479, "train/lm_loss": 3.205208420753479, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.8000000000000003e-06, "perf/tokens_per_sec": 25142.351258005223, "train/loss_math": 3.7073827385902405, "train/loss_code": 2.324313203493754, "train/loss_prose": 3.839198350906372} +{"step": 28, "train/loss": 3.1974347829818726, "train/lm_loss": 3.1974347829818726, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.9e-06, "perf/tokens_per_sec": 24959.13119479312, "train/loss_code": 2.4649434884389243, "train/loss_math": 3.799896001815796, "train/loss_prose": 3.3924795389175415} +{"step": 29, "train/loss": 3.2572076320648193, "train/lm_loss": 3.2572076320648193, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3e-06, "perf/tokens_per_sec": 25850.833212705224, "train/loss_math": 3.638614058494568, "train/loss_prose": 3.519366145133972, "train/loss_code": 2.232236623764038} +{"step": 30, "train/loss": 3.065861403942108, "train/lm_loss": 3.065861403942108, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.1e-06, "perf/tokens_per_sec": 26407.167162637415, "train/loss_math": 3.7509952386220298, "train/loss_code": 2.265477269887924, "train/loss_prose": 4.211996078491211} +{"step": 31, "train/loss": 3.615939199924469, "train/lm_loss": 3.615939199924469, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.2000000000000003e-06, "perf/tokens_per_sec": 25388.16882226373, "train/loss_prose": 3.6313902537027993, "train/loss_math": 3.898405075073242, "train/loss_code": 3.169063925743103} +{"step": 32, "train/loss": 3.0892271399497986, "train/lm_loss": 3.0892271399497986, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.3e-06, "perf/tokens_per_sec": 25732.88994537336, "train/loss_code": 2.514287769794464, "train/loss_prose": 3.1032001972198486, "train/loss_math": 3.8511549631754556} +{"step": 33, "train/loss": 3.4868866205215454, "train/lm_loss": 3.4868866205215454, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.4000000000000005e-06, "perf/tokens_per_sec": 25391.883675318397, "train/loss_code": 2.5634204149246216, "train/loss_math": 3.8238682746887207, "train/loss_prose": 3.7655489444732666} +{"step": 34, "train/loss": 3.0494498014450073, "train/lm_loss": 3.0494498014450073, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.5000000000000004e-06, "perf/tokens_per_sec": 26354.100539051306, "train/loss_code": 2.3403928677241006, "train/loss_math": 3.3601781129837036, "train/loss_prose": 3.5513551235198975} +{"step": 35, "train/loss": 2.93130362033844, "train/lm_loss": 2.93130362033844, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.6e-06, "perf/tokens_per_sec": 25842.74492354671, "train/loss_code": 2.27245454788208, "train/loss_prose": 4.029385566711426} +{"step": 36, "train/loss": 3.708447575569153, "train/lm_loss": 3.708447575569153, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.7e-06, "perf/tokens_per_sec": 26796.902242034957, "train/loss_math": 3.7547417879104614, "train/loss_prose": 4.141663630803426, "train/loss_code": 2.223621368408203} +{"step": 37, "train/loss": 3.4489675760269165, "train/lm_loss": 3.4489675760269165, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.8e-06, "perf/tokens_per_sec": 26461.10001386215, "train/loss_prose": 3.8263746897379556, "train/loss_math": 3.674792766571045, "train/loss_code": 2.5441192388534546} +{"step": 38, "train/loss": 3.4086560010910034, "train/lm_loss": 3.4086560010910034, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.9e-06, "perf/tokens_per_sec": 26126.61495646038, "train/loss_code": 2.380624771118164, "train/loss_math": 3.756845712661743, "train/loss_prose": 3.7458205223083496} +{"step": 39, "train/loss": 3.605338931083679, "train/lm_loss": 3.605338931083679, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.000000000000001e-06, "perf/tokens_per_sec": 26113.112870913905, "train/loss_code": 3.1079413890838623, "train/loss_prose": 3.7993316650390625, "train/loss_math": 3.7147510051727295} +{"step": 40, "train/loss": 3.011544704437256, "train/lm_loss": 3.011544704437256, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.1000000000000006e-06, "perf/tokens_per_sec": 24871.903198626387, "train/loss_code": 2.0367438793182373, "train/loss_prose": 3.5325059294700623, "train/loss_math": 3.8521018028259277} +{"step": 41, "train/loss": 3.088020920753479, "train/lm_loss": 3.088020920753479, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.2000000000000004e-06, "perf/tokens_per_sec": 25330.484517766123, "train/loss_math": 3.7073513984680178, "train/loss_code": 2.0558032989501953} +{"step": 42, "train/loss": 3.1462512016296387, "train/lm_loss": 3.1462512016296387, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.2999999999999995e-06, "perf/tokens_per_sec": 25091.089796991382, "train/loss_math": 4.098025798797607, "train/loss_code": 2.3420645892620087, "train/loss_prose": 3.9012413024902344} +{"step": 43, "train/loss": 3.2056540846824646, "train/lm_loss": 3.2056540846824646, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.4e-06, "perf/tokens_per_sec": 26451.72935887171, "train/loss_prose": 3.7781143188476562, "train/loss_code": 2.2515536546707153} +{"step": 44, "train/loss": 2.6912340223789215, "train/lm_loss": 2.6912340223789215, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.5e-06, "perf/tokens_per_sec": 25925.57760906406, "train/loss_math": 3.266446352005005, "train/loss_code": 2.1160216331481934} +{"step": 45, "train/loss": 3.1440955996513367, "train/lm_loss": 3.1440955996513367, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.6e-06, "perf/tokens_per_sec": 25445.43805440152, "train/loss_prose": 3.611839771270752, "train/loss_code": 2.0140691995620728, "train/loss_math": 3.429701805114746} +{"step": 46, "train/loss": 3.4398671984672546, "train/lm_loss": 3.4398671984672546, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.7e-06, "perf/tokens_per_sec": 26435.32633412476, "train/loss_prose": 4.409040689468384, "train/loss_math": 4.083031415939331, "train/loss_code": 2.0419171253840127} +{"step": 47, "train/loss": 3.1186091601848602, "train/lm_loss": 3.1186091601848602, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.800000000000001e-06, "perf/tokens_per_sec": 27227.28007860788, "train/loss_math": 3.7584911982218423, "train/loss_prose": 3.8360302448272705, "train/loss_code": 2.000446359316508} +{"step": 48, "train/loss": 2.8063590824604034, "train/lm_loss": 2.8063590824604034, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9000000000000005e-06, "perf/tokens_per_sec": 25909.15617247365, "train/loss_code": 1.316724916299184, "train/loss_math": 3.725465774536133, "train/loss_prose": 3.5988354682922363} +{"step": 49, "train/loss": 2.995345890522003, "train/lm_loss": 2.995345890522003, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5e-06, "perf/tokens_per_sec": 26067.94283824096, "train/loss_math": 3.4700480699539185, "train/loss_prose": 4.247398257255554, "train/loss_code": 2.1319682896137238} +{"step": 50, "train/loss": 3.2398310899734497, "train/lm_loss": 3.2398310899734497, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.1e-06, "perf/tokens_per_sec": 25772.65165400777, "train/loss_prose": 3.79207181930542, "train/loss_code": 2.5057543913523355, "train/loss_math": 3.60574738184611} +{"step": 51, "train/loss": 3.0622022449970245, "train/lm_loss": 3.0622022449970245, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.2e-06, "perf/tokens_per_sec": 26468.15275229287, "train/loss_prose": 3.680949846903483, "train/loss_code": 1.9155761003494263, "train/loss_math": 3.8540194034576416} +{"step": 52, "train/loss": 2.8381579220294952, "train/lm_loss": 2.8381579220294952, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.3e-06, "perf/tokens_per_sec": 26488.06745720325, "train/loss_code": 1.735049843788147, "train/loss_prose": 3.4003751277923584, "train/loss_math": 3.524934768676758} +{"step": 53, "train/loss": 2.572875201702118, "train/lm_loss": 2.572875201702118, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.4e-06, "perf/tokens_per_sec": 26801.458934017675, "train/loss_math": 3.726457953453064, "train/loss_code": 2.1883474588394165} +{"step": 54, "train/loss": 3.4897311329841614, "train/lm_loss": 3.4897311329841614, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.500000000000001e-06, "perf/tokens_per_sec": 26098.712804113846, "train/loss_math": 3.603645849227905, "train/loss_prose": 3.8048195838928223, "train/loss_code": 2.28998064994812} +{"step": 55, "train/loss": 3.286530077457428, "train/lm_loss": 3.286530077457428, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.600000000000001e-06, "perf/tokens_per_sec": 27206.626533155755, "train/loss_prose": 3.92032527923584, "train/loss_code": 2.2302045822143555} +{"step": 56, "train/loss": 3.3458199501037598, "train/lm_loss": 3.3458199501037598, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.7000000000000005e-06, "perf/tokens_per_sec": 25354.147875725357, "train/loss_prose": 3.4242817401885985, "train/loss_code": 2.7367851734161377, "train/loss_math": 3.454182744026184} +{"step": 57, "train/loss": 3.143489360809326, "train/lm_loss": 3.143489360809326, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.8e-06, "perf/tokens_per_sec": 25194.081228800074, "train/loss_prose": 3.583306407928467, "train/loss_math": 3.578552722930908, "train/loss_code": 1.8264154195785522} +{"step": 58, "train/loss": 3.284069776535034, "train/lm_loss": 3.284069776535034, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.9e-06, "perf/tokens_per_sec": 25964.603211594993, "train/loss_code": 2.40673037370046, "train/loss_math": 3.697227954864502, "train/loss_prose": 3.88597043355306} +{"step": 59, "train/loss": 3.3459017276763916, "train/lm_loss": 3.3459017276763916, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6e-06, "perf/tokens_per_sec": 26186.110561710066, "train/loss_code": 2.0758703351020813, "train/loss_math": 3.8223346869150796, "train/loss_prose": 3.716156323750814} +{"step": 60, "train/loss": 3.375851571559906, "train/lm_loss": 3.375851571559906, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.1e-06, "perf/tokens_per_sec": 25942.098221337008, "train/loss_prose": 3.6888134479522705, "train/loss_math": 3.58155620098114, "train/loss_code": 2.544223189353943} +{"step": 61, "train/loss": 3.0476909279823303, "train/lm_loss": 3.0476909279823303, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.2e-06, "perf/tokens_per_sec": 26593.566078858727, "train/loss_code": 1.4647670984268188, "train/loss_prose": 3.6109227657318117, "train/loss_math": 3.397379159927368} +{"step": 62, "train/loss": 3.0237298011779785, "train/lm_loss": 3.0237298011779785, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.300000000000001e-06, "perf/tokens_per_sec": 26213.520029540025, "train/loss_code": 1.819211721420288, "train/loss_prose": 3.703648487726847, "train/loss_math": 3.810628652572632} +{"step": 63, "train/loss": 3.0653210282325745, "train/lm_loss": 3.0653210282325745, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.4000000000000006e-06, "perf/tokens_per_sec": 25208.461749756425, "train/loss_code": 1.940628210703532, "train/loss_math": 3.5575407346089682, "train/loss_prose": 4.014030933380127} +{"step": 64, "train/loss": 3.2984752655029297, "train/lm_loss": 3.2984752655029297, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.5000000000000004e-06, "perf/tokens_per_sec": 24481.117738258094, "train/loss_math": 3.6966548760732016, "train/loss_prose": 3.7856438159942627, "train/loss_code": 2.5755165417989097} +{"step": 65, "train/loss": 2.3989460170269012, "train/lm_loss": 2.3989460170269012, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.6e-06, "perf/tokens_per_sec": 26961.079054001035, "train/loss_code": 1.8118347883224488, "train/loss_math": 3.6349711418151855, "train/loss_prose": 3.248711109161377} +{"step": 66, "train/loss": 3.642922878265381, "train/lm_loss": 3.642922878265381, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.700000000000001e-06, "perf/tokens_per_sec": 26274.296543940196, "train/loss_code": 3.191771070162455, "train/loss_prose": 4.826027870178223, "train/loss_math": 3.305337429046631} +{"step": 67, "train/loss": 2.689916253089905, "train/lm_loss": 2.689916253089905, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.800000000000001e-06, "perf/tokens_per_sec": 26288.32789965678, "train/loss_math": 3.3579529523849487, "train/loss_prose": 3.85034441947937, "train/loss_code": 1.7756839096546173} +{"step": 68, "train/loss": 3.4227396845817566, "train/lm_loss": 3.4227396845817566, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.900000000000001e-06, "perf/tokens_per_sec": 26219.84112914057, "train/loss_code": 1.6133966445922852, "train/loss_prose": 3.811695957183838, "train/loss_math": 3.355019688606262} +{"step": 69, "train/loss": 3.104639083147049, "train/lm_loss": 3.104639083147049, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.000000000000001e-06, "perf/tokens_per_sec": 26653.307616884485, "train/loss_code": 2.1971973180770874, "train/loss_prose": 3.788289944330851, "train/loss_math": 3.440324902534485} +{"step": 70, "train/loss": 3.2564597725868225, "train/lm_loss": 3.2564597725868225, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.1e-06, "perf/tokens_per_sec": 26492.478860598847, "train/loss_math": 3.5831740856170655, "train/loss_code": 1.2199468612670898, "train/loss_prose": 3.4579309225082397} +{"step": 71, "train/loss": 3.509410262107849, "train/lm_loss": 3.509410262107849, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.2e-06, "perf/tokens_per_sec": 24991.445214785505, "train/loss_math": 3.9472883542378745, "train/loss_prose": 3.7368879914283752, "train/loss_code": 1.2858664989471436} +{"step": 72, "train/loss": 3.253457546234131, "train/lm_loss": 3.253457546234131, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.2999999999999996e-06, "perf/tokens_per_sec": 25169.166779962965, "train/loss_prose": 3.898192048072815, "train/loss_math": 3.3403075695037843, "train/loss_code": 1.5297385454177856} +{"step": 73, "train/loss": 3.1023199558258057, "train/lm_loss": 3.1023199558258057, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.4e-06, "perf/tokens_per_sec": 25118.89777788175, "train/loss_prose": 3.74148291349411, "train/loss_code": 2.1070229212443032, "train/loss_math": 3.531558036804199} +{"step": 74, "train/loss": 3.4959248900413513, "train/lm_loss": 3.4959248900413513, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.5e-06, "perf/tokens_per_sec": 24411.094463697813, "train/loss_code": 2.9183966318766275, "train/loss_prose": 3.8424417972564697} +{"step": 75, "train/loss": 3.1209598183631897, "train/lm_loss": 3.1209598183631897, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.6e-06, "perf/tokens_per_sec": 24616.41400595785, "train/loss_prose": 3.721921741962433, "train/loss_math": 3.208139181137085, "train/loss_code": 1.8318565487861633} +{"step": 76, "train/loss": 2.872336745262146, "train/lm_loss": 2.872336745262146, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.7e-06, "perf/tokens_per_sec": 24773.881358449646, "train/loss_math": 3.59765625, "train/loss_prose": 4.096513509750366, "train/loss_code": 2.237601947784424} +{"step": 77, "train/loss": 3.4670355319976807, "train/lm_loss": 3.4670355319976807, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.8e-06, "perf/tokens_per_sec": 24198.055385516833, "train/loss_prose": 3.780536617551531, "train/loss_code": 1.272530198097229} +{"step": 78, "train/loss": 2.5208176970481873, "train/lm_loss": 2.5208176970481873, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.9e-06, "perf/tokens_per_sec": 25376.58096564682, "train/loss_prose": 3.8473055362701416, "train/loss_code": 1.9703649997711181, "train/loss_math": 3.233704924583435} +{"step": 79, "train/loss": 2.9235225915908813, "train/lm_loss": 2.9235225915908813, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.000000000000001e-06, "perf/tokens_per_sec": 23676.22200692652, "train/loss_math": 3.6109402974446616, "train/loss_code": 1.6520508527755737, "train/loss_prose": 3.79960298538208} +{"step": 80, "train/loss": 3.437472641468048, "train/lm_loss": 3.437472641468048, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.1e-06, "perf/tokens_per_sec": 25435.53004021154, "train/loss_prose": 3.8616448640823364, "train/loss_code": 1.77740478515625, "train/loss_math": 3.4252657890319824} +{"step": 81, "train/loss": 3.1204444766044617, "train/lm_loss": 3.1204444766044617, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.200000000000001e-06, "perf/tokens_per_sec": 26101.964322992237, "train/loss_code": 2.141224503517151, "train/loss_prose": 3.6548988223075867, "train/loss_math": 3.920287609100342} +{"step": 82, "train/loss": 3.163422018289566, "train/lm_loss": 3.163422018289566, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.3e-06, "perf/tokens_per_sec": 26028.093562467144, "train/loss_math": 3.6339489618937173, "train/loss_prose": 3.8781410853068032, "train/loss_code": 1.3855526745319366} +{"step": 83, "train/loss": 2.9485879838466644, "train/lm_loss": 2.9485879838466644, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.400000000000001e-06, "perf/tokens_per_sec": 22897.913160535853, "train/loss_math": 3.324153184890747, "train/loss_prose": 3.5066606998443604, "train/loss_code": 1.548130989074707} +{"step": 84, "train/loss": 2.6916222870349884, "train/lm_loss": 2.6916222870349884, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.500000000000002e-06, "perf/tokens_per_sec": 25256.045302457413, "train/loss_math": 3.418997287750244, "train/loss_prose": 3.641462961832682, "train/loss_code": 1.7973978519439697} +{"step": 85, "train/loss": 2.282759964466095, "train/lm_loss": 2.282759964466095, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.599999999999999e-06, "perf/tokens_per_sec": 25023.952363820157, "train/loss_code": 1.9801193277041118, "train/loss_math": 3.190682053565979} +{"step": 86, "train/loss": 1.8661621510982513, "train/lm_loss": 1.8661621510982513, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.7e-06, "perf/tokens_per_sec": 24470.93546346994, "train/loss_code": 1.6654939651489258, "train/loss_prose": 3.2708399295806885} +{"step": 87, "train/loss": 3.6476281881332397, "train/lm_loss": 3.6476281881332397, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.8e-06, "perf/tokens_per_sec": 23257.588805947922, "train/loss_prose": 3.884701681137085, "train/loss_code": 2.8191263675689697, "train/loss_math": 3.4691954851150513} +{"step": 88, "train/loss": 2.9073020815849304, "train/lm_loss": 2.9073020815849304, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.9e-06, "perf/tokens_per_sec": 24008.31379300707, "train/loss_prose": 3.6380361318588257, "train/loss_code": 2.5268087148666383, "train/loss_math": 3.3483006954193115} +{"step": 89, "train/loss": 2.5933738350868225, "train/lm_loss": 2.5933738350868225, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 9e-06, "perf/tokens_per_sec": 23837.059255939537, "train/loss_code": 1.819765865802765, "train/loss_math": 3.400069077809652, "train/loss_prose": 3.2677199840545654} +{"step": 90, "train/loss": 2.737801432609558, "train/lm_loss": 2.737801432609558, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 9.100000000000001e-06, "perf/tokens_per_sec": 24626.54122611867, "train/loss_math": 3.5193938414255777, "train/loss_code": 2.268845963478088} +{"step": 91, "train/loss": 2.986532986164093, "train/lm_loss": 2.986532986164093, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 9.2e-06, "perf/tokens_per_sec": 25086.9495526516, "train/loss_math": 3.3909446597099304, "train/loss_code": 2.130547126134237, "train/loss_prose": 3.9368443489074707} +{"step": 92, "train/loss": 3.265972852706909, "train/lm_loss": 3.265972852706909, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 9.3e-06, "perf/tokens_per_sec": 25200.40072169334, "train/loss_math": 3.298188269138336, "train/loss_prose": 4.224174976348877, "train/loss_code": 2.243340253829956} +{"step": 93, "train/loss": 2.7740292847156525, "train/lm_loss": 2.7740292847156525, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 9.4e-06, "perf/tokens_per_sec": 24651.701215662415, "train/loss_code": 1.913068026304245, "train/loss_math": 3.4776249726613364, "train/loss_prose": 4.1070876121521} +{"step": 94, "train/loss": 3.3835930228233337, "train/lm_loss": 3.3835930228233337, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 9.5e-06, "perf/tokens_per_sec": 24559.40574362816, "train/loss_code": 1.9235846996307373, "train/loss_prose": 3.642391800880432, "train/loss_math": 3.525197426478068} +{"step": 95, "train/loss": 3.1376948952674866, "train/lm_loss": 3.1376948952674866, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 9.600000000000001e-06, "perf/tokens_per_sec": 23548.6159037981, "train/loss_prose": 3.8817320664723716, "train/loss_math": 3.5856734116872153, "train/loss_code": 1.349672555923462} +{"step": 96, "train/loss": 2.8534099459648132, "train/lm_loss": 2.8534099459648132, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 9.7e-06, "perf/tokens_per_sec": 24500.424530844673, "train/loss_math": 2.9010748863220215, "train/loss_prose": 3.7419830560684204, "train/loss_code": 2.488447570800781} +{"step": 97, "train/loss": 3.2151450514793396, "train/lm_loss": 3.2151450514793396, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 9.800000000000001e-06, "perf/tokens_per_sec": 25553.18938954483, "train/loss_code": 2.6905369758605957, "train/loss_math": 3.581050475438436, "train/loss_prose": 3.4531989097595215} +{"step": 98, "train/loss": 2.5970493257045746, "train/lm_loss": 2.5970493257045746, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 9.900000000000002e-06, "perf/tokens_per_sec": 26111.20781822327, "train/loss_prose": 3.7547625303268433, "train/loss_code": 1.639508068561554, "train/loss_math": 3.3544187545776367} +{"step": 99, "train/loss": 3.0619643330574036, "train/lm_loss": 3.0619643330574036, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1e-05, "perf/tokens_per_sec": 26063.31580696008, "train/loss_prose": 3.7288358211517334, "train/loss_math": 3.276853561401367, "train/loss_code": 2.5080885887145996} +{"step": 100, "train/loss": 3.1015920639038086, "train/lm_loss": 3.1015920639038086, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.0100000000000002e-05, "perf/tokens_per_sec": 26318.491690769155, "train/loss_code": 2.343049556016922, "train/loss_prose": 4.039903481801351, "train/loss_math": 3.320826530456543} +{"step": 101, "train/loss": 2.661092758178711, "train/lm_loss": 2.661092758178711, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.02e-05, "perf/tokens_per_sec": 25886.201079152408, "train/loss_code": 1.6455156803131104, "train/loss_prose": 3.8581008911132812, "train/loss_math": 3.4952383041381836} +{"step": 102, "train/loss": 3.4558337330818176, "train/lm_loss": 3.4558337330818176, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.03e-05, "perf/tokens_per_sec": 25972.178994726914, "train/loss_prose": 3.6028623580932617, "train/loss_math": 3.367616558074951} +{"step": 103, "train/loss": 2.892028331756592, "train/lm_loss": 2.892028331756592, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.04e-05, "perf/tokens_per_sec": 26296.617053746224, "train/loss_math": 3.3683672547340393, "train/loss_code": 2.0905699729919434, "train/loss_prose": 3.391047716140747} +{"step": 104, "train/loss": 3.1097740530967712, "train/lm_loss": 3.1097740530967712, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.05e-05, "perf/tokens_per_sec": 26345.371098386448, "train/loss_code": 2.0989778637886047, "train/loss_prose": 3.49884295463562, "train/loss_math": 3.394568999608358} +{"step": 105, "train/loss": 3.319238841533661, "train/lm_loss": 3.319238841533661, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.06e-05, "perf/tokens_per_sec": 26411.389243503178, "train/loss_math": 3.320385456085205, "train/loss_prose": 3.7079222202301025, "train/loss_code": 1.761065125465393} +{"step": 106, "train/loss": 3.3860049843788147, "train/lm_loss": 3.3860049843788147, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.0700000000000001e-05, "perf/tokens_per_sec": 25013.058698146284, "train/loss_prose": 3.581420087814331, "train/loss_code": 2.5239028930664062, "train/loss_math": 3.3285186290740967} +{"step": 107, "train/loss": 3.3166623711586, "train/lm_loss": 3.3166623711586, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.08e-05, "perf/tokens_per_sec": 26069.40642938976, "train/loss_prose": 3.9774091839790344, "train/loss_math": 3.4287188053131104, "train/loss_code": 2.398314038912455} +{"step": 108, "train/loss": 2.7860186100006104, "train/lm_loss": 2.7860186100006104, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.09e-05, "perf/tokens_per_sec": 25841.928598719023, "train/loss_prose": 3.7220124006271362, "train/loss_code": 2.022386282682419, "train/loss_math": 3.3772897720336914} +{"step": 109, "train/loss": 2.805084764957428, "train/lm_loss": 2.805084764957428, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.1000000000000001e-05, "perf/tokens_per_sec": 26141.085614989697, "train/loss_math": 3.17391574382782, "train/loss_code": 1.6683288017908733, "train/loss_prose": 3.695953289667765} +{"step": 110, "train/loss": 2.8346123695373535, "train/lm_loss": 2.8346123695373535, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.11e-05, "perf/tokens_per_sec": 26270.479713622066, "train/loss_code": 1.583695689837138, "train/loss_prose": 3.9601415395736694, "train/loss_math": 3.3351763089497886} +{"step": 111, "train/loss": 2.671650469303131, "train/lm_loss": 2.671650469303131, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.1200000000000001e-05, "perf/tokens_per_sec": 26365.9915775516, "train/loss_code": 1.974089115858078, "train/loss_math": 3.3310585816701255, "train/loss_prose": 3.483670711517334} +{"step": 112, "train/loss": 2.8709646463394165, "train/lm_loss": 2.8709646463394165, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.13e-05, "perf/tokens_per_sec": 25079.075868431657, "train/loss_prose": 3.4687384764353433, "train/loss_math": 3.3242567777633667, "train/loss_code": 1.9709960619608562} +{"step": 113, "train/loss": 3.160433053970337, "train/lm_loss": 3.160433053970337, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.1400000000000001e-05, "perf/tokens_per_sec": 26772.014124760404, "train/loss_prose": 3.5113220810890198, "train/loss_math": 3.421386957168579, "train/loss_code": 2.1977016925811768} +{"step": 114, "train/loss": 2.1022502779960632, "train/lm_loss": 2.1022502779960632, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.1500000000000002e-05, "perf/tokens_per_sec": 26169.716295570764, "train/loss_code": 1.6952002048492432, "train/loss_prose": 3.1151440143585205, "train/loss_math": 3.5316572189331055} +{"step": 115, "train/loss": 3.542598843574524, "train/lm_loss": 3.542598843574524, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.16e-05, "perf/tokens_per_sec": 26747.08891061609, "train/loss_prose": 4.159875710805257, "train/loss_math": 3.4337673783302307, "train/loss_code": 2.1260929107666016} +{"step": 116, "train/loss": 3.491206407546997, "train/lm_loss": 3.491206407546997, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.1700000000000001e-05, "perf/tokens_per_sec": 26489.57860587894, "train/loss_math": 3.364108991622925, "train/loss_code": 2.8504526615142822, "train/loss_prose": 4.129325985908508} +{"step": 117, "train/loss": 3.2557352781295776, "train/lm_loss": 3.2557352781295776, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.18e-05, "perf/tokens_per_sec": 26039.1001497487, "train/loss_code": 2.547378420829773, "train/loss_prose": 3.9052698612213135, "train/loss_math": 3.3439682722091675} +{"step": 118, "train/loss": 2.979167640209198, "train/lm_loss": 2.979167640209198, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.19e-05, "perf/tokens_per_sec": 26531.715036709178, "train/loss_code": 2.2891157070795694, "train/loss_math": 3.4151864846547446, "train/loss_prose": 3.3602172136306763} +{"step": 119, "train/loss": 2.6473454236984253, "train/lm_loss": 2.6473454236984253, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.2e-05, "perf/tokens_per_sec": 26431.82857028566, "train/loss_code": 1.9939852952957153, "train/loss_prose": 3.3747215270996094, "train/loss_math": 3.2760337193806968} +{"step": 120, "train/loss": 2.9307847023010254, "train/lm_loss": 2.9307847023010254, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.2100000000000001e-05, "perf/tokens_per_sec": 26745.964619707785, "train/loss_prose": 3.4559848308563232, "train/loss_math": 3.158868392308553, "train/loss_code": 2.6284219324588776} +{"step": 121, "train/loss": 3.156856894493103, "train/lm_loss": 3.156856894493103, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.22e-05, "perf/tokens_per_sec": 26459.795872979692, "train/loss_math": 3.181234073638916, "train/loss_prose": 3.529986262321472, "train/loss_code": 2.2887113094329834} +{"step": 122, "train/loss": 3.0328741371631622, "train/lm_loss": 3.0328741371631622, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.23e-05, "perf/tokens_per_sec": 26558.785620425995, "train/loss_code": 2.5732180178165436, "train/loss_math": 3.42481005191803, "train/loss_prose": 3.5602505207061768} +{"step": 123, "train/loss": 2.960310012102127, "train/lm_loss": 2.960310012102127, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.24e-05, "perf/tokens_per_sec": 26574.314575457396, "train/loss_code": 2.677784729003906, "train/loss_prose": 3.4395264387130737, "train/loss_math": 3.414501905441284} +{"step": 124, "train/loss": 2.8557429909706116, "train/lm_loss": 2.8557429909706116, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.25e-05, "perf/tokens_per_sec": 26227.48652968629, "train/loss_math": 3.243331956863403, "train/loss_code": 2.209761102994283} +{"step": 125, "train/loss": 3.2816657423973083, "train/lm_loss": 3.2816657423973083, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.2600000000000001e-05, "perf/tokens_per_sec": 26904.332576414836, "train/loss_math": 3.521658778190613, "train/loss_prose": 3.8116596937179565, "train/loss_code": 1.9816848635673523} +{"step": 126, "train/loss": 2.7950273156166077, "train/lm_loss": 2.7950273156166077, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.27e-05, "perf/tokens_per_sec": 26313.775295802476, "train/loss_math": 3.1117292046546936, "train/loss_code": 1.2760667204856873, "train/loss_prose": 3.680583953857422} +{"step": 127, "train/loss": 3.369444966316223, "train/lm_loss": 3.369444966316223, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.2800000000000001e-05, "perf/tokens_per_sec": 26342.42217809637, "train/loss_math": 3.249312400817871, "train/loss_prose": 3.4895771741867065} +{"step": 128, "train/loss": 3.0982720851898193, "train/lm_loss": 3.0982720851898193, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.29e-05, "perf/tokens_per_sec": 26323.653444471685, "train/loss_code": 1.4514133930206299, "train/loss_math": 3.3419933319091797, "train/loss_prose": 3.282802104949951} +{"step": 129, "train/loss": 3.0101085901260376, "train/lm_loss": 3.0101085901260376, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.3000000000000001e-05, "perf/tokens_per_sec": 26687.5899959611, "train/loss_math": 3.2279250621795654, "train/loss_prose": 3.4895146687825522, "train/loss_code": 1.9642747640609741} +{"step": 130, "train/loss": 2.9882062673568726, "train/lm_loss": 2.9882062673568726, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.3100000000000002e-05, "perf/tokens_per_sec": 26103.193766789434, "train/loss_math": 3.2597477436065674, "train/loss_code": 2.073131024837494, "train/loss_prose": 3.4606504440307617} +{"step": 131, "train/loss": 3.2772874236106873, "train/lm_loss": 3.2772874236106873, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.32e-05, "perf/tokens_per_sec": 26393.576362974356, "train/loss_code": 1.7436372637748718, "train/loss_prose": 4.058956682682037, "train/loss_math": 3.247599720954895} +{"step": 132, "train/loss": 3.4667052626609802, "train/lm_loss": 3.4667052626609802, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.3300000000000001e-05, "perf/tokens_per_sec": 26224.96425562055, "train/loss_math": 3.3283113479614257, "train/loss_prose": 3.697361866633097} +{"step": 133, "train/loss": 2.5125882625579834, "train/lm_loss": 2.5125882625579834, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.3400000000000002e-05, "perf/tokens_per_sec": 25684.070349174457, "train/loss_code": 1.9010333776474, "train/loss_math": 3.065396308898926, "train/loss_prose": 3.765071153640747} +{"step": 134, "train/loss": 3.2852468490600586, "train/lm_loss": 3.2852468490600586, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.3500000000000001e-05, "perf/tokens_per_sec": 26293.920493190773, "train/loss_math": 3.161225914955139, "train/loss_prose": 3.7727982997894287, "train/loss_code": 2.3186769485473633} +{"step": 135, "train/loss": 3.417395293712616, "train/lm_loss": 3.417395293712616, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.3600000000000002e-05, "perf/tokens_per_sec": 25642.32296736773, "train/loss_math": 3.33370954649789, "train/loss_prose": 4.0031938552856445} +{"step": 136, "train/loss": 3.3565831184387207, "train/lm_loss": 3.3565831184387207, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.3700000000000001e-05, "perf/tokens_per_sec": 26299.555727009145, "train/loss_math": 3.1550408601760864, "train/loss_code": 4.218325614929199, "train/loss_prose": 3.704094171524048} +{"step": 137, "train/loss": 2.914873242378235, "train/lm_loss": 2.914873242378235, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.3800000000000002e-05, "perf/tokens_per_sec": 27225.68359331048, "train/loss_code": 1.842440515756607, "train/loss_prose": 3.98730605840683} +{"step": 138, "train/loss": 2.8398184776306152, "train/lm_loss": 2.8398184776306152, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.3900000000000002e-05, "perf/tokens_per_sec": 26254.581496033512, "train/loss_prose": 3.223050832748413, "train/loss_math": 3.1545984148979187, "train/loss_code": 1.8270252346992493} +{"step": 139, "train/loss": 2.7891680896282196, "train/lm_loss": 2.7891680896282196, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.4000000000000001e-05, "perf/tokens_per_sec": 26552.587326684315, "train/loss_code": 1.8927524089813232, "train/loss_prose": 3.783913016319275, "train/loss_math": 3.587254285812378} +{"step": 140, "train/loss": 3.5036680102348328, "train/lm_loss": 3.5036680102348328, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.4099999999999999e-05, "perf/tokens_per_sec": 26827.532916290587, "train/loss_prose": 4.193904399871826, "train/loss_math": 3.2416067123413086, "train/loss_code": 1.5289050340652466} +{"step": 141, "train/loss": 3.1954203248023987, "train/lm_loss": 3.1954203248023987, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.42e-05, "perf/tokens_per_sec": 25831.476425966997, "train/loss_prose": 3.529376983642578, "train/loss_math": 3.209353526433309, "train/loss_code": 1.8177944421768188} +{"step": 142, "train/loss": 3.0703726410865784, "train/lm_loss": 3.0703726410865784, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.43e-05, "perf/tokens_per_sec": 26061.892435470538, "train/loss_code": 2.5294518768787384, "train/loss_math": 3.4901539087295532, "train/loss_prose": 3.7324328422546387} +{"step": 143, "train/loss": 2.5122495889663696, "train/lm_loss": 2.5122495889663696, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.44e-05, "perf/tokens_per_sec": 26756.920071269265, "train/loss_code": 2.0788347482681275, "train/loss_math": 3.188587188720703, "train/loss_prose": 3.326648235321045} +{"step": 144, "train/loss": 3.1006178855895996, "train/lm_loss": 3.1006178855895996, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.45e-05, "perf/tokens_per_sec": 26340.7662246863, "train/loss_prose": 3.663874328136444, "train/loss_code": 2.2538294792175293, "train/loss_math": 3.3879570960998535} +{"step": 145, "train/loss": 2.7237051725387573, "train/lm_loss": 2.7237051725387573, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.4599999999999999e-05, "perf/tokens_per_sec": 26177.37237519561, "train/loss_math": 3.4258447170257567, "train/loss_code": 1.5534720023473103} +{"step": 146, "train/loss": 3.066991865634918, "train/lm_loss": 3.066991865634918, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.47e-05, "perf/tokens_per_sec": 25572.511847099628, "train/loss_prose": 3.486196756362915, "train/loss_math": 3.253722071647644, "train/loss_code": 1.5274062156677246} +{"step": 147, "train/loss": 2.886360764503479, "train/lm_loss": 2.886360764503479, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.48e-05, "perf/tokens_per_sec": 26001.503170754593, "train/loss_math": 3.278838793436686, "train/loss_code": 1.869040886561076, "train/loss_prose": 3.823623776435852} +{"step": 148, "train/loss": 2.61094731092453, "train/lm_loss": 2.61094731092453, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.49e-05, "perf/tokens_per_sec": 26322.322486145436, "train/loss_math": 3.2716000080108643, "train/loss_code": 1.9159404933452606, "train/loss_prose": 3.4090168476104736} +{"step": 149, "train/loss": 3.1358057260513306, "train/lm_loss": 3.1358057260513306, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.5e-05, "perf/tokens_per_sec": 26267.74834755034, "train/loss_math": 3.3058247566223145, "train/loss_code": 2.4313089847564697, "train/loss_prose": 3.500264525413513} +{"step": 150, "train/loss": 3.2200132608413696, "train/lm_loss": 3.2200132608413696, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.51e-05, "perf/tokens_per_sec": 26707.75336647732, "train/loss_code": 2.1491949558258057, "train/loss_prose": 3.954064428806305, "train/loss_math": 3.4962637424468994} +{"step": 151, "train/loss": 3.067461907863617, "train/lm_loss": 3.067461907863617, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.52e-05, "perf/tokens_per_sec": 26299.95833613991, "train/loss_math": 3.0616756677627563, "train/loss_code": 1.6280890703201294, "train/loss_prose": 3.554967721303304} +{"step": 152, "train/loss": 2.781259000301361, "train/lm_loss": 2.781259000301361, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.53e-05, "perf/tokens_per_sec": 25962.28818057075, "train/loss_code": 1.8106110493342082, "train/loss_math": 3.1233914693196616, "train/loss_prose": 3.724031925201416} +{"step": 153, "train/loss": 3.3762494921684265, "train/lm_loss": 3.3762494921684265, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.54e-05, "perf/tokens_per_sec": 26034.009925731283, "train/loss_math": 3.303830623626709, "train/loss_prose": 3.723068416118622, "train/loss_code": 2.7550299167633057} +{"step": 154, "train/loss": 3.0811038613319397, "train/lm_loss": 3.0811038613319397, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.55e-05, "perf/tokens_per_sec": 26150.47625890082, "train/loss_prose": 4.100311517715454, "train/loss_math": 3.092501163482666, "train/loss_code": 2.054297844568888} +{"step": 155, "train/loss": 2.741518020629883, "train/lm_loss": 2.741518020629883, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.56e-05, "perf/tokens_per_sec": 26747.6719134851, "train/loss_prose": 3.581296920776367, "train/loss_math": 2.9884465535481772, "train/loss_code": 1.9347368478775024} +{"step": 156, "train/loss": 2.7089529037475586, "train/lm_loss": 2.7089529037475586, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.5700000000000002e-05, "perf/tokens_per_sec": 26342.704922657966, "train/loss_code": 1.5315136512120564, "train/loss_prose": 3.703556537628174, "train/loss_math": 3.2233232657114663} +{"step": 157, "train/loss": 2.646926462650299, "train/lm_loss": 2.646926462650299, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.58e-05, "perf/tokens_per_sec": 25525.92904985766, "train/loss_prose": 3.706378936767578, "train/loss_code": 2.1125871419906614, "train/loss_math": 3.199718952178955} +{"step": 158, "train/loss": 3.455862283706665, "train/lm_loss": 3.455862283706665, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.59e-05, "perf/tokens_per_sec": 26901.383574685573, "train/loss_math": 3.0393877029418945, "train/loss_prose": 3.7989954948425293, "train/loss_code": 1.8135372400283813} +{"step": 159, "train/loss": 2.5996761322021484, "train/lm_loss": 2.5996761322021484, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.6000000000000003e-05, "perf/tokens_per_sec": 26293.598552769727, "train/loss_math": 2.776254892349243, "train/loss_code": 1.6705833276112874, "train/loss_prose": 3.728447198867798} +{"step": 160, "train/loss": 3.750719904899597, "train/lm_loss": 3.750719904899597, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.6100000000000002e-05, "perf/tokens_per_sec": 26626.291701150923, "train/loss_prose": 4.100833940505981, "train/loss_math": 3.16719651222229} +{"step": 161, "train/loss": 2.6063368916511536, "train/lm_loss": 2.6063368916511536, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.62e-05, "perf/tokens_per_sec": 26590.026317825977, "train/loss_math": 3.3825018405914307, "train/loss_prose": 3.1402034759521484, "train/loss_code": 1.8907464146614075} +{"step": 162, "train/loss": 2.74117249250412, "train/lm_loss": 2.74117249250412, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.63e-05, "perf/tokens_per_sec": 26683.279154577816, "train/loss_prose": 3.5373023748397827, "train/loss_math": 3.2193473180135093, "train/loss_code": 1.7322444121042888} +{"step": 163, "train/loss": 3.1553388237953186, "train/lm_loss": 3.1553388237953186, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.6400000000000002e-05, "perf/tokens_per_sec": 26546.679065512697, "train/loss_prose": 3.8410990238189697, "train/loss_code": 2.1336411237716675, "train/loss_math": 3.150710185368856} +{"step": 164, "train/loss": 3.124879837036133, "train/lm_loss": 3.124879837036133, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.65e-05, "perf/tokens_per_sec": 25905.600936107072, "train/loss_code": 2.5248007774353027, "train/loss_math": 3.145579195022583, "train/loss_prose": 4.221540451049805} +{"step": 165, "train/loss": 3.2781062722206116, "train/lm_loss": 3.2781062722206116, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.66e-05, "perf/tokens_per_sec": 27153.477084535203, "train/loss_prose": 3.928733968734741, "train/loss_code": 1.7574040293693542, "train/loss_math": 3.0663726329803467} +{"step": 166, "train/loss": 3.0379048585891724, "train/lm_loss": 3.0379048585891724, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.6700000000000003e-05, "perf/tokens_per_sec": 26363.523507142607, "train/loss_code": 1.4688439965248108, "train/loss_prose": 3.7796429991722107, "train/loss_math": 3.1234902143478394} +{"step": 167, "train/loss": 2.8251177072525024, "train/lm_loss": 2.8251177072525024, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.6800000000000002e-05, "perf/tokens_per_sec": 26280.486183518125, "train/loss_math": 3.172456741333008, "train/loss_code": 1.7197424968083699, "train/loss_prose": 3.6989336013793945} +{"step": 168, "train/loss": 3.056449830532074, "train/lm_loss": 3.056449830532074, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.69e-05, "perf/tokens_per_sec": 26298.710287956234, "train/loss_code": 2.2259558836619058, "train/loss_math": 3.320542097091675, "train/loss_prose": 3.7108825047810874} +{"step": 169, "train/loss": 3.3149476051330566, "train/lm_loss": 3.3149476051330566, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.7000000000000003e-05, "perf/tokens_per_sec": 26360.246947374235, "train/loss_prose": 3.691526770591736, "train/loss_code": 1.7847964763641357, "train/loss_math": 3.322892506917318} +{"step": 170, "train/loss": 2.555959314107895, "train/lm_loss": 2.555959314107895, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.7100000000000002e-05, "perf/tokens_per_sec": 26501.83753231768, "train/loss_math": 3.212730328241984, "train/loss_prose": 3.3579535484313965, "train/loss_code": 1.8628826141357422} +{"step": 171, "train/loss": 2.495914548635483, "train/lm_loss": 2.495914548635483, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.7199999999999998e-05, "perf/tokens_per_sec": 26284.426563690497, "train/loss_prose": 3.424477696418762, "train/loss_code": 1.9406627655029296, "train/loss_math": 3.415045976638794} +{"step": 172, "train/loss": 2.5630083680152893, "train/lm_loss": 2.5630083680152893, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.73e-05, "perf/tokens_per_sec": 26399.173266254133, "train/loss_math": 2.97653591632843, "train/loss_code": 1.7661259174346924, "train/loss_prose": 3.743246078491211} +{"step": 173, "train/loss": 3.4510366320610046, "train/lm_loss": 3.4510366320610046, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.74e-05, "perf/tokens_per_sec": 26345.65390625599, "train/loss_prose": 3.8538656711578367, "train/loss_math": 3.2466694116592407, "train/loss_code": 1.845625638961792} +{"step": 174, "train/loss": 3.053522527217865, "train/lm_loss": 3.053522527217865, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.75e-05, "perf/tokens_per_sec": 26354.343106093147, "train/loss_code": 2.326202154159546, "train/loss_math": 3.1763461430867515, "train/loss_prose": 3.415579160054525} +{"step": 175, "train/loss": 2.9226731657981873, "train/lm_loss": 2.9226731657981873, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.76e-05, "perf/tokens_per_sec": 26492.233744084308, "train/loss_prose": 3.5491042137145996, "train/loss_math": 3.1427011013031008, "train/loss_code": 2.059387505054474} +{"step": 176, "train/loss": 2.791577398777008, "train/lm_loss": 2.791577398777008, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.77e-05, "perf/tokens_per_sec": 27297.884769634606, "train/loss_math": 3.132240116596222, "train/loss_code": 2.147451480229696, "train/loss_prose": 3.3613040447235107} +{"step": 177, "train/loss": 3.079277455806732, "train/lm_loss": 3.079277455806732, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.78e-05, "perf/tokens_per_sec": 26355.839034859622, "train/loss_math": 3.1372724771499634, "train/loss_code": 2.319621443748474, "train/loss_prose": 3.722943067550659} +{"step": 178, "train/loss": 2.6800513863563538, "train/lm_loss": 2.6800513863563538, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.79e-05, "perf/tokens_per_sec": 27021.210056118984, "train/loss_prose": 3.4650468826293945, "train/loss_math": 3.083520293235779, "train/loss_code": 1.626076579093933} +{"step": 179, "train/loss": 3.0921883583068848, "train/lm_loss": 3.0921883583068848, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.8e-05, "perf/tokens_per_sec": 25998.04057420091, "train/loss_code": 1.9551504850387573, "train/loss_prose": 3.8046344916025796, "train/loss_math": 3.1377676328023276} +{"step": 180, "train/loss": 3.009089946746826, "train/lm_loss": 3.009089946746826, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.81e-05, "perf/tokens_per_sec": 26341.57398082785, "train/loss_math": 2.988514041900635, "train/loss_prose": 3.750813841819763, "train/loss_code": 1.6285223960876465} +{"step": 181, "train/loss": 3.561522364616394, "train/lm_loss": 3.561522364616394, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.8200000000000002e-05, "perf/tokens_per_sec": 26893.803395094583, "train/loss_prose": 3.804562520980835, "train/loss_math": 3.1564558347066245} +{"step": 182, "train/loss": 2.7424333095550537, "train/lm_loss": 2.7424333095550537, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.83e-05, "perf/tokens_per_sec": 26798.15622289747, "train/loss_code": 2.2324968576431274, "train/loss_math": 3.2208127975463867, "train/loss_prose": 3.283926248550415} +{"step": 183, "train/loss": 2.7638400495052338, "train/lm_loss": 2.7638400495052338, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.84e-05, "perf/tokens_per_sec": 26594.14240822784, "train/loss_code": 1.7501335144042969, "train/loss_prose": 3.622663974761963, "train/loss_math": 3.2049971421559653} +{"step": 184, "train/loss": 2.5141546428203583, "train/lm_loss": 2.5141546428203583, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.85e-05, "perf/tokens_per_sec": 26305.23365441887, "train/loss_code": 1.9702147722244263, "train/loss_math": 3.2157806158065796, "train/loss_prose": 3.830601453781128} +{"step": 185, "train/loss": 3.147935450077057, "train/lm_loss": 3.147935450077057, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.86e-05, "perf/tokens_per_sec": 25947.779596250683, "train/loss_math": 3.191707968711853, "train/loss_prose": 3.7573350270589194, "train/loss_code": 2.509353995323181} +{"step": 186, "train/loss": 2.9830848574638367, "train/lm_loss": 2.9830848574638367, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.87e-05, "perf/tokens_per_sec": 26493.908797340104, "train/loss_code": 2.186255931854248, "train/loss_prose": 3.387182076772054, "train/loss_math": 3.1102067629496255} +{"step": 187, "train/loss": 3.504627525806427, "train/lm_loss": 3.504627525806427, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.88e-05, "perf/tokens_per_sec": 26367.731642593266, "train/loss_prose": 3.7628154158592224, "train/loss_math": 3.178426663080851, "train/loss_code": 3.4504787921905518} +{"step": 188, "train/loss": 3.3171905875205994, "train/lm_loss": 3.3171905875205994, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.8900000000000002e-05, "perf/tokens_per_sec": 26364.25174215627, "train/loss_math": 3.0519661903381348, "train/loss_code": 2.363633155822754, "train/loss_prose": 3.754498243331909} +{"step": 189, "train/loss": 3.0721487402915955, "train/lm_loss": 3.0721487402915955, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.9e-05, "perf/tokens_per_sec": 27407.23534826438, "train/loss_prose": 3.9141197999318442, "train/loss_math": 3.2132041454315186, "train/loss_code": 2.136140545209249} +{"step": 190, "train/loss": 3.22525691986084, "train/lm_loss": 3.22525691986084, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.91e-05, "perf/tokens_per_sec": 26750.87888361722, "train/loss_code": 2.4777750968933105, "train/loss_prose": 3.647010246912638, "train/loss_math": 3.3018248875935874} +{"step": 191, "train/loss": 2.4386181831359863, "train/lm_loss": 2.4386181831359863, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.9200000000000003e-05, "perf/tokens_per_sec": 24758.85039660579, "train/loss_math": 3.259881575902303, "train/loss_prose": 3.7319869995117188, "train/loss_code": 1.4993282556533813} +{"step": 192, "train/loss": 2.2197163105010986, "train/lm_loss": 2.2197163105010986, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.93e-05, "perf/tokens_per_sec": 26574.725640938384, "train/loss_code": 1.8571606079737346, "train/loss_prose": 3.505178451538086, "train/loss_math": 3.1095876693725586} +{"step": 193, "train/loss": 3.0706608295440674, "train/lm_loss": 3.0706608295440674, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.94e-05, "perf/tokens_per_sec": 26982.59026043581, "train/loss_prose": 3.8753538131713867, "train/loss_code": 1.963690459728241, "train/loss_math": 3.003948370615641} +{"step": 194, "train/loss": 3.191811740398407, "train/lm_loss": 3.191811740398407, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.9500000000000003e-05, "perf/tokens_per_sec": 26320.30613905133, "train/loss_math": 3.124305566151937, "train/loss_prose": 3.5328123569488525, "train/loss_code": 2.030327796936035} +{"step": 195, "train/loss": 2.6592589616775513, "train/lm_loss": 2.6592589616775513, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.9600000000000002e-05, "perf/tokens_per_sec": 26250.569837284955, "train/loss_code": 1.3784899512926738, "train/loss_math": 3.0618860721588135, "train/loss_prose": 3.9764715433120728} +{"step": 196, "train/loss": 3.262199282646179, "train/lm_loss": 3.262199282646179, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.97e-05, "perf/tokens_per_sec": 26287.28206713254, "train/loss_prose": 4.126025676727295, "train/loss_math": 3.1490654349327087, "train/loss_code": 2.6246408224105835} +{"step": 197, "train/loss": 3.284999966621399, "train/lm_loss": 3.284999966621399, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.9800000000000004e-05, "perf/tokens_per_sec": 27261.282153245116, "train/loss_math": 2.986119190851847, "train/loss_code": 1.8672696352005005, "train/loss_prose": 3.863593280315399} +{"step": 198, "train/loss": 2.539630562067032, "train/lm_loss": 2.539630562067032, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.9900000000000003e-05, "perf/tokens_per_sec": 25315.777956243746, "train/loss_math": 3.088167667388916, "train/loss_code": 1.6254020134607952} +{"step": 199, "train/loss": 3.0873206853866577, "train/lm_loss": 3.0873206853866577, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2e-05, "perf/tokens_per_sec": 25303.62248710875, "train/loss_math": 3.0413784980773926, "train/loss_prose": 3.546206831932068, "train/loss_code": 2.2154905796051025} +{"step": 200, "train/loss": 3.129272937774658, "train/lm_loss": 3.129272937774658, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.01e-05, "perf/tokens_per_sec": 25178.4993089783, "train/loss_math": 3.0969653924306235, "train/loss_code": 1.5617212057113647, "train/loss_prose": 3.5453914999961853} +{"step": 200, "eval/loss": 2.5521052025554076, "eval/lm_loss": 2.5521052025554076, "eval/lb_loss": 0.0, "eval/z_loss": 0.0, "eval/perplexity": 12.834093729958662, "eval/loss_code": 1.8347071358874507, "eval/ppl_code": 6.263299581136344, "eval/loss_prose": 3.5192759988600746, "eval/ppl_prose": 33.75997735151466, "eval/loss_math": 2.8729562263718176, "eval/ppl_math": 17.68923436152941} +{"step": 201, "train/loss": 2.673208475112915, "train/lm_loss": 2.673208475112915, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.0200000000000003e-05, "perf/tokens_per_sec": 25573.006702917697, "train/loss_code": 2.130324512720108, "train/loss_math": 2.848116636276245, "train/loss_prose": 3.5840680599212646} +{"step": 202, "train/loss": 2.765286773443222, "train/lm_loss": 2.765286773443222, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.0300000000000002e-05, "perf/tokens_per_sec": 26305.757274715044, "train/loss_code": 2.1220866243044534, "train/loss_prose": 3.7351391315460205, "train/loss_math": 3.005223870277405} +{"step": 203, "train/loss": 2.841176986694336, "train/lm_loss": 2.841176986694336, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.04e-05, "perf/tokens_per_sec": 25973.553314979355, "train/loss_math": 3.2082131703694663, "train/loss_code": 2.2201376259326935, "train/loss_prose": 4.224224090576172} +{"step": 204, "train/loss": 2.9168200492858887, "train/lm_loss": 2.9168200492858887, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.05e-05, "perf/tokens_per_sec": 26429.022894046193, "train/loss_code": 1.8590808510780334, "train/loss_math": 3.08063143491745, "train/loss_prose": 3.6469361782073975} +{"step": 205, "train/loss": 3.0957208275794983, "train/lm_loss": 3.0957208275794983, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.06e-05, "perf/tokens_per_sec": 26394.671222521985, "train/loss_math": 3.253389835357666, "train/loss_prose": 3.679595708847046, "train/loss_code": 1.9834049940109253} +{"step": 206, "train/loss": 2.8630149960517883, "train/lm_loss": 2.8630149960517883, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.07e-05, "perf/tokens_per_sec": 26646.52783482129, "train/loss_prose": 3.405821164449056, "train/loss_math": 2.983839670817057, "train/loss_code": 1.867568850517273} +{"step": 207, "train/loss": 3.0961974263191223, "train/lm_loss": 3.0961974263191223, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.08e-05, "perf/tokens_per_sec": 26559.196205623542, "train/loss_code": 2.4676891565322876, "train/loss_prose": 3.823471705118815, "train/loss_math": 3.4284069538116455} +{"step": 208, "train/loss": 3.1883832216262817, "train/lm_loss": 3.1883832216262817, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.09e-05, "perf/tokens_per_sec": 26015.165812359268, "train/loss_prose": 3.462163305282593, "train/loss_math": 2.8851022720336914, "train/loss_code": 2.4260451793670654} +{"step": 209, "train/loss": 3.195222854614258, "train/lm_loss": 3.195222854614258, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.1e-05, "perf/tokens_per_sec": 26071.54277632176, "train/loss_code": 1.830281138420105, "train/loss_math": 2.8932467699050903, "train/loss_prose": 4.0286818742752075} +{"step": 210, "train/loss": 2.971528559923172, "train/lm_loss": 2.971528559923172, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.11e-05, "perf/tokens_per_sec": 27034.434049169056, "train/loss_code": 1.9985548257827759, "train/loss_prose": 3.9184155464172363, "train/loss_math": 3.0106587409973145} +{"step": 211, "train/loss": 2.747187316417694, "train/lm_loss": 2.747187316417694, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.12e-05, "perf/tokens_per_sec": 24598.684982660514, "train/loss_prose": 3.707542657852173, "train/loss_math": 3.1873821020126343, "train/loss_code": 1.840142289797465} +{"step": 212, "train/loss": 3.106627583503723, "train/lm_loss": 3.106627583503723, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.13e-05, "perf/tokens_per_sec": 27075.20784714211, "train/loss_math": 3.135449707508087, "train/loss_prose": 3.4244198004404702, "train/loss_code": 2.0379621982574463} +{"step": 213, "train/loss": 3.3044312596321106, "train/lm_loss": 3.3044312596321106, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.1400000000000002e-05, "perf/tokens_per_sec": 25298.59219772517, "train/loss_math": 3.1990079085032144, "train/loss_prose": 3.8690622448921204, "train/loss_code": 1.3621762990951538} +{"step": 214, "train/loss": 2.900142788887024, "train/lm_loss": 2.900142788887024, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.15e-05, "perf/tokens_per_sec": 26733.81165746226, "train/loss_math": 2.7080692052841187, "train/loss_prose": 3.4599660634994507, "train/loss_code": 1.9725695848464966} +{"step": 215, "train/loss": 2.6002704799175262, "train/lm_loss": 2.6002704799175262, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.16e-05, "perf/tokens_per_sec": 27036.85914287153, "train/loss_math": 3.0084832668304444, "train/loss_code": 1.080360859632492, "train/loss_prose": 3.5990254878997803} +{"step": 216, "train/loss": 3.091810882091522, "train/lm_loss": 3.091810882091522, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.1700000000000002e-05, "perf/tokens_per_sec": 25496.94818499824, "train/loss_math": 2.752466320991516, "train/loss_prose": 3.7498231530189514, "train/loss_code": 2.1151309609413147} +{"step": 217, "train/loss": 3.4390405416488647, "train/lm_loss": 3.4390405416488647, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.18e-05, "perf/tokens_per_sec": 26402.824699584897, "train/loss_code": 2.3710238933563232, "train/loss_prose": 3.8602258205413817, "train/loss_math": 2.9200854301452637} +{"step": 218, "train/loss": 2.7988301515579224, "train/lm_loss": 2.7988301515579224, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.19e-05, "perf/tokens_per_sec": 25807.030417377067, "train/loss_prose": 3.789465347925822, "train/loss_math": 2.7769229412078857, "train/loss_code": 2.0613302886486053} +{"step": 219, "train/loss": 2.562319755554199, "train/lm_loss": 2.562319755554199, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.2000000000000003e-05, "perf/tokens_per_sec": 26012.881143848696, "train/loss_prose": 4.1718363761901855, "train/loss_code": 1.7990349531173706, "train/loss_math": 3.043527364730835} +{"step": 220, "train/loss": 3.119266986846924, "train/lm_loss": 3.119266986846924, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.2100000000000002e-05, "perf/tokens_per_sec": 25770.911925212786, "train/loss_math": 3.242182433605194, "train/loss_prose": 3.4612300395965576, "train/loss_code": 1.6017162799835205} +{"step": 221, "train/loss": 2.821564197540283, "train/lm_loss": 2.821564197540283, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.22e-05, "perf/tokens_per_sec": 24783.53077696528, "train/loss_math": 3.1595578789711, "train/loss_code": 2.071765422821045, "train/loss_prose": 3.718984603881836} +{"step": 222, "train/loss": 2.5806184709072113, "train/lm_loss": 2.5806184709072113, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.23e-05, "perf/tokens_per_sec": 26361.46039306188, "train/loss_code": 2.0258485674858093, "train/loss_prose": 3.763392210006714, "train/loss_math": 2.926053841908773} +{"step": 223, "train/loss": 2.840886950492859, "train/lm_loss": 2.840886950492859, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.2400000000000002e-05, "perf/tokens_per_sec": 25507.320692414705, "train/loss_code": 2.5557793617248534, "train/loss_math": 2.9155079126358032, "train/loss_prose": 4.117183208465576} +{"step": 224, "train/loss": 2.451446533203125, "train/lm_loss": 2.451446533203125, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.25e-05, "perf/tokens_per_sec": 25153.247376311843, "train/loss_code": 1.7431426346302032, "train/loss_math": 2.71476149559021, "train/loss_prose": 3.3080801169077554} +{"step": 225, "train/loss": 3.1026906967163086, "train/lm_loss": 3.1026906967163086, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.26e-05, "perf/tokens_per_sec": 26113.668563646093, "train/loss_prose": 3.3656148314476013, "train/loss_code": 2.431941032409668, "train/loss_math": 2.9757084051767984} +{"step": 226, "train/loss": 2.9325796961784363, "train/lm_loss": 2.9325796961784363, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.2700000000000003e-05, "perf/tokens_per_sec": 25001.51958006441, "train/loss_math": 3.082284132639567, "train/loss_prose": 3.8420499563217163, "train/loss_code": 2.1765616734822593} +{"step": 227, "train/loss": 3.321239709854126, "train/lm_loss": 3.321239709854126, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.2800000000000002e-05, "perf/tokens_per_sec": 26838.93265844933, "train/loss_math": 2.8427957892417908, "train/loss_prose": 3.7996841073036194} +{"step": 228, "train/loss": 2.8136178255081177, "train/lm_loss": 2.8136178255081177, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.29e-05, "perf/tokens_per_sec": 26958.582928088454, "train/loss_math": 3.0531107584635415, "train/loss_prose": 3.615808606147766, "train/loss_code": 2.039331038792928} +{"step": 229, "train/loss": 3.0507715940475464, "train/lm_loss": 3.0507715940475464, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.3000000000000003e-05, "perf/tokens_per_sec": 25627.787392166312, "train/loss_math": 2.861407661437988, "train/loss_code": 2.773444652557373, "train/loss_prose": 3.662844657897949} +{"step": 230, "train/loss": 1.9385758340358734, "train/lm_loss": 1.9385758340358734, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.3100000000000002e-05, "perf/tokens_per_sec": 25779.148717410062, "train/loss_math": 2.7078816890716553, "train/loss_code": 1.7073899110158284, "train/loss_prose": 2.5563862323760986} +{"step": 231, "train/loss": 3.13892138004303, "train/lm_loss": 3.13892138004303, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.32e-05, "perf/tokens_per_sec": 25151.663907965878, "train/loss_math": 2.7086098194122314, "train/loss_prose": 3.7042597770690917, "train/loss_code": 1.1728522777557373} +{"step": 232, "train/loss": 2.678349941968918, "train/lm_loss": 2.678349941968918, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.3300000000000004e-05, "perf/tokens_per_sec": 25865.93452164971, "train/loss_prose": 3.590106805165609, "train/loss_code": 1.592472751935323, "train/loss_math": 2.9395302534103394} +{"step": 233, "train/loss": 2.94723778963089, "train/lm_loss": 2.94723778963089, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.3400000000000003e-05, "perf/tokens_per_sec": 26929.720706260032, "train/loss_prose": 3.8042054176330566, "train/loss_math": 3.0171778202056885, "train/loss_code": 2.0436434745788574} +{"step": 234, "train/loss": 3.2273011207580566, "train/lm_loss": 3.2273011207580566, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.35e-05, "perf/tokens_per_sec": 25483.408489688634, "train/loss_math": 2.9496251344680786, "train/loss_prose": 3.6216551780700685, "train/loss_code": 1.8108829259872437} +{"step": 235, "train/loss": 3.28086918592453, "train/lm_loss": 3.28086918592453, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.36e-05, "perf/tokens_per_sec": 25500.127180997362, "train/loss_math": 3.0508450508117675, "train/loss_prose": 3.664242426554362} +{"step": 236, "train/loss": 2.892048865556717, "train/lm_loss": 2.892048865556717, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.37e-05, "perf/tokens_per_sec": 26178.13025166548, "train/loss_prose": 3.5004188418388367, "train/loss_math": 2.666422486305237, "train/loss_code": 1.9009352326393127} +{"step": 237, "train/loss": 2.685964584350586, "train/lm_loss": 2.685964584350586, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.38e-05, "perf/tokens_per_sec": 25649.788416001527, "train/loss_code": 1.90725839138031, "train/loss_math": 2.9038268327713013, "train/loss_prose": 3.319429079691569} +{"step": 238, "train/loss": 2.8580990731716156, "train/lm_loss": 2.8580990731716156, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.39e-05, "perf/tokens_per_sec": 26589.038645713066, "train/loss_code": 1.6126563946406047, "train/loss_prose": 3.6616336703300476, "train/loss_math": 3.3802876472473145} +{"step": 239, "train/loss": 2.7679152488708496, "train/lm_loss": 2.7679152488708496, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.4e-05, "perf/tokens_per_sec": 25408.33213882698, "train/loss_code": 1.605189839998881, "train/loss_prose": 3.8430585066477456, "train/loss_math": 2.8992888927459717} +{"step": 240, "train/loss": 2.6878894567489624, "train/lm_loss": 2.6878894567489624, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.41e-05, "perf/tokens_per_sec": 25316.598684646797, "train/loss_prose": 3.75615127881368, "train/loss_math": 3.06972873210907, "train/loss_code": 1.3650682369867961} +{"step": 241, "train/loss": 3.096416175365448, "train/lm_loss": 3.096416175365448, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.4200000000000002e-05, "perf/tokens_per_sec": 26463.66791694843, "train/loss_prose": 3.3800840377807617, "train/loss_math": 3.2479241689046225, "train/loss_code": 2.4436521530151367} +{"step": 242, "train/loss": 2.354200452566147, "train/lm_loss": 2.354200452566147, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.43e-05, "perf/tokens_per_sec": 25784.29454953684, "train/loss_math": 2.963016450405121, "train/loss_code": 1.7453841865062714} +{"step": 243, "train/loss": 3.0925947427749634, "train/lm_loss": 3.0925947427749634, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.44e-05, "perf/tokens_per_sec": 24575.35548565306, "train/loss_code": 1.7637975215911865, "train/loss_prose": 3.736886183420817, "train/loss_math": 2.9415754675865173} +{"step": 244, "train/loss": 2.798240900039673, "train/lm_loss": 2.798240900039673, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.45e-05, "perf/tokens_per_sec": 25914.823126214294, "train/loss_code": 1.98479159673055, "train/loss_math": 2.8194300333658853, "train/loss_prose": 3.9866310358047485} +{"step": 245, "train/loss": 2.967860996723175, "train/lm_loss": 2.967860996723175, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.46e-05, "perf/tokens_per_sec": 26333.94266269151, "train/loss_code": 1.819147765636444, "train/loss_math": 3.0801576375961304, "train/loss_prose": 3.8919806480407715} +{"step": 246, "train/loss": 2.7641478180885315, "train/lm_loss": 2.7641478180885315, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.47e-05, "perf/tokens_per_sec": 25130.545524227466, "train/loss_math": 2.9168322682380676, "train/loss_code": 1.524024486541748, "train/loss_prose": 3.6989026069641113} +{"step": 247, "train/loss": 3.0431554317474365, "train/lm_loss": 3.0431554317474365, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.48e-05, "perf/tokens_per_sec": 25991.000183057913, "train/loss_code": 2.2492071390151978, "train/loss_prose": 3.9147583643595376, "train/loss_math": 2.92667293548584} +{"step": 248, "train/loss": 2.595884144306183, "train/lm_loss": 2.595884144306183, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.4900000000000002e-05, "perf/tokens_per_sec": 25872.55663467461, "train/loss_code": 1.4864100615183513, "train/loss_prose": 3.502934137980143, "train/loss_math": 2.899519920349121} +{"step": 249, "train/loss": 2.5171810686588287, "train/lm_loss": 2.5171810686588287, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.5e-05, "perf/tokens_per_sec": 26864.61753437853, "train/loss_code": 2.031035006046295, "train/loss_math": 2.824601332346598, "train/loss_prose": 3.5395030975341797} +{"step": 250, "train/loss": 3.251744508743286, "train/lm_loss": 3.251744508743286, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.51e-05, "perf/tokens_per_sec": 27209.772126718046, "train/loss_prose": 3.649234914779663, "train/loss_math": 2.9620120525360107, "train/loss_code": 1.8437561988830566} +{"step": 251, "train/loss": 2.1415924727916718, "train/lm_loss": 2.1415924727916718, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.5200000000000003e-05, "perf/tokens_per_sec": 24974.043345849524, "train/loss_math": 2.855687975883484, "train/loss_code": 1.6941836833953858, "train/loss_prose": 2.9504456520080566} +{"step": 252, "train/loss": 2.976630389690399, "train/lm_loss": 2.976630389690399, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.5300000000000002e-05, "perf/tokens_per_sec": 25223.377145214643, "train/loss_math": 3.1145166556040444, "train/loss_code": 2.562971830368042} +{"step": 253, "train/loss": 2.6157795190811157, "train/lm_loss": 2.6157795190811157, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.54e-05, "perf/tokens_per_sec": 26197.81050512752, "train/loss_math": 2.8596657514572144, "train/loss_code": 1.9729841649532318, "train/loss_prose": 3.6574841737747192} +{"step": 254, "train/loss": 2.892490565776825, "train/lm_loss": 2.892490565776825, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.5500000000000003e-05, "perf/tokens_per_sec": 24659.41497891446, "train/loss_prose": 3.7760401566823325, "train/loss_math": 3.00524632136027, "train/loss_code": 1.3980324864387512} +{"step": 255, "train/loss": 2.9172129034996033, "train/lm_loss": 2.9172129034996033, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.5600000000000002e-05, "perf/tokens_per_sec": 24207.29547497668, "train/loss_math": 2.9745603084564207, "train/loss_prose": 3.583453416824341, "train/loss_code": 1.2979960441589355} +{"step": 256, "train/loss": 3.335943341255188, "train/lm_loss": 3.335943341255188, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.57e-05, "perf/tokens_per_sec": 26267.74834755034, "train/loss_math": 2.95776629447937, "train/loss_code": 1.955713152885437, "train/loss_prose": 3.9636706352233886} +{"step": 257, "train/loss": 3.4052239060401917, "train/lm_loss": 3.4052239060401917, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.58e-05, "perf/tokens_per_sec": 26134.1663228735, "train/loss_prose": 3.679721307754517, "train/loss_math": 2.947728077570597} +{"step": 258, "train/loss": 2.6093791127204895, "train/lm_loss": 2.6093791127204895, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.5900000000000003e-05, "perf/tokens_per_sec": 25583.898378433114, "train/loss_math": 3.094632387161255, "train/loss_code": 1.767856389284134, "train/loss_prose": 3.5696581999460855} +{"step": 259, "train/loss": 2.902795374393463, "train/lm_loss": 2.902795374393463, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.6000000000000002e-05, "perf/tokens_per_sec": 25534.7310420271, "train/loss_prose": 3.7369800408681235, "train/loss_code": 1.8823271592458088, "train/loss_math": 3.1822205781936646} +{"step": 260, "train/loss": 2.8490294814109802, "train/lm_loss": 2.8490294814109802, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.61e-05, "perf/tokens_per_sec": 25680.615000104637, "train/loss_code": 1.7072967886924744, "train/loss_math": 3.148640203475952, "train/loss_prose": 3.634442090988159} +{"step": 261, "train/loss": 2.6590417623519897, "train/lm_loss": 2.6590417623519897, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.6200000000000003e-05, "perf/tokens_per_sec": 25628.513779533434, "train/loss_prose": 3.449650446573893, "train/loss_code": 1.662824312845866, "train/loss_math": 2.9674551486968994} +{"step": 262, "train/loss": 3.4430262446403503, "train/lm_loss": 3.4430262446403503, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.6300000000000002e-05, "perf/tokens_per_sec": 26178.13025166548, "train/loss_math": 3.118165159225464, "train/loss_prose": 3.9844613075256348} +{"step": 263, "train/loss": 2.741605222225189, "train/lm_loss": 2.741605222225189, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.64e-05, "perf/tokens_per_sec": 25473.24426625229, "train/loss_prose": 3.6188714504241943, "train/loss_code": 1.8643389642238617} +{"step": 264, "train/loss": 2.9397487938404083, "train/lm_loss": 2.9397487938404083, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.6500000000000004e-05, "perf/tokens_per_sec": 25945.78103587583, "train/loss_math": 2.7534703413645425, "train/loss_code": 2.329102873802185, "train/loss_prose": 4.135135531425476} +{"step": 265, "train/loss": 3.0276829600334167, "train/lm_loss": 3.0276829600334167, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.6600000000000003e-05, "perf/tokens_per_sec": 24772.95255179216, "train/loss_math": 3.2587581475575766, "train/loss_prose": 3.691850503285726, "train/loss_code": 1.684818685054779} +{"step": 266, "train/loss": 3.0253878831863403, "train/lm_loss": 3.0253878831863403, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.6700000000000002e-05, "perf/tokens_per_sec": 25855.696399438937, "train/loss_code": 2.1651642322540283, "train/loss_prose": 3.6725622415542603, "train/loss_math": 3.0173614025115967} +{"step": 267, "train/loss": 3.007272481918335, "train/lm_loss": 3.007272481918335, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.6800000000000004e-05, "perf/tokens_per_sec": 25153.91028272742, "train/loss_code": 2.284202456474304, "train/loss_math": 2.9182090759277344, "train/loss_prose": 3.413339376449585} +{"step": 268, "train/loss": 2.1913702189922333, "train/lm_loss": 2.1913702189922333, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.6900000000000003e-05, "perf/tokens_per_sec": 26090.23094738025, "train/loss_prose": 3.179847002029419, "train/loss_math": 3.1080600023269653, "train/loss_code": 1.6269989490509034} +{"step": 269, "train/loss": 2.488007128238678, "train/lm_loss": 2.488007128238678, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.7000000000000002e-05, "perf/tokens_per_sec": 25790.332880973256, "train/loss_math": 3.130703926086426, "train/loss_code": 1.745185911655426, "train/loss_prose": 3.5312016010284424} +{"step": 270, "train/loss": 3.280120551586151, "train/lm_loss": 3.280120551586151, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.7100000000000005e-05, "perf/tokens_per_sec": 26875.039787250684, "train/loss_math": 2.9076256155967712, "train/loss_prose": 3.652615547180176} +{"step": 271, "train/loss": 2.9783496260643005, "train/lm_loss": 2.9783496260643005, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.7200000000000004e-05, "perf/tokens_per_sec": 26221.682022290126, "train/loss_prose": 3.6613985697428384, "train/loss_code": 2.3093879421552024, "train/loss_math": 2.9572185277938843} +{"step": 272, "train/loss": 2.215723752975464, "train/lm_loss": 2.215723752975464, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.7300000000000003e-05, "perf/tokens_per_sec": 25229.2667361774, "train/loss_code": 1.6791767120361327, "train/loss_prose": 3.759387969970703, "train/loss_math": 2.7852590084075928} +{"step": 273, "train/loss": 3.037730634212494, "train/lm_loss": 3.037730634212494, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.7400000000000002e-05, "perf/tokens_per_sec": 25319.919447261065, "train/loss_code": 2.083868622779846, "train/loss_prose": 3.9085396925608316, "train/loss_math": 2.802829106648763} +{"step": 274, "train/loss": 2.7633424401283264, "train/lm_loss": 2.7633424401283264, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.7500000000000004e-05, "perf/tokens_per_sec": 25462.144681809692, "train/loss_prose": 3.730356812477112, "train/loss_math": 3.026020129521688, "train/loss_code": 1.8559888402620952} +{"step": 275, "train/loss": 3.3059178590774536, "train/lm_loss": 3.3059178590774536, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.7600000000000003e-05, "perf/tokens_per_sec": 25539.589912528467, "train/loss_math": 2.9388811588287354, "train/loss_prose": 3.921599292755127, "train/loss_code": 1.950232446193695} +{"step": 276, "train/loss": 2.8296622931957245, "train/lm_loss": 2.8296622931957245, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.7700000000000002e-05, "perf/tokens_per_sec": 24937.35738059952, "train/loss_math": 2.971097946166992, "train/loss_code": 1.3146472573280334, "train/loss_prose": 3.6982367038726807} +{"step": 277, "train/loss": 2.4580198526382446, "train/lm_loss": 2.4580198526382446, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.7800000000000005e-05, "perf/tokens_per_sec": 24997.481584951343, "train/loss_code": 1.3807689348856609, "train/loss_prose": 3.9635521173477173, "train/loss_math": 2.531582514444987} +{"step": 278, "train/loss": 2.680742472410202, "train/lm_loss": 2.680742472410202, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.7900000000000004e-05, "perf/tokens_per_sec": 25977.166507647293, "train/loss_prose": 3.1207311153411865, "train/loss_code": 2.305587589740753, "train/loss_math": 2.7428064823150633} +{"step": 279, "train/loss": 3.1688653230667114, "train/lm_loss": 3.1688653230667114, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.8000000000000003e-05, "perf/tokens_per_sec": 26418.699882514316, "train/loss_prose": 3.521484708786011, "train/loss_math": 3.0396947860717773, "train/loss_code": 1.6641093492507935} +{"step": 280, "train/loss": 2.6925094723701477, "train/lm_loss": 2.6925094723701477, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.8100000000000005e-05, "perf/tokens_per_sec": 25743.378198279464, "train/loss_math": 3.008009338378906, "train/loss_code": 1.267115831375122, "train/loss_prose": 3.9657974243164062} +{"step": 281, "train/loss": 2.9108089208602905, "train/lm_loss": 2.9108089208602905, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.8199999999999998e-05, "perf/tokens_per_sec": 25754.492730071972, "train/loss_math": 3.0557463645935057, "train/loss_prose": 3.3361393213272095, "train/loss_code": 1.335461974143982} +{"step": 282, "train/loss": 3.149877190589905, "train/lm_loss": 3.149877190589905, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.83e-05, "perf/tokens_per_sec": 25058.84670666194, "train/loss_code": 1.3895654678344727, "train/loss_prose": 3.718570649623871, "train/loss_math": 2.9783895015716553} +{"step": 283, "train/loss": 2.706253468990326, "train/lm_loss": 2.706253468990326, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.84e-05, "perf/tokens_per_sec": 25599.681095281307, "train/loss_code": 1.7227287689844768, "train/loss_math": 2.8190526962280273, "train/loss_prose": 3.6145784854888916} +{"step": 284, "train/loss": 2.723115563392639, "train/lm_loss": 2.723115563392639, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.8499999999999998e-05, "perf/tokens_per_sec": 26665.55303350138, "train/loss_math": 2.9303871989250183, "train/loss_prose": 3.3034225702285767, "train/loss_code": 1.7282658219337463} +{"step": 285, "train/loss": 2.5048210620880127, "train/lm_loss": 2.5048210620880127, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.86e-05, "perf/tokens_per_sec": 25394.773757154344, "train/loss_prose": 3.638676643371582, "train/loss_math": 3.054720163345337, "train/loss_code": 1.8089328110218048} +{"step": 286, "train/loss": 2.048135668039322, "train/lm_loss": 2.048135668039322, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.87e-05, "perf/tokens_per_sec": 25076.806235677064, "train/loss_math": 2.7229764461517334, "train/loss_prose": 3.261380910873413, "train/loss_code": 1.4278693199157715} +{"step": 287, "train/loss": 2.7942010164260864, "train/lm_loss": 2.7942010164260864, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.88e-05, "perf/tokens_per_sec": 25103.078560960177, "train/loss_math": 2.846125078201294, "train/loss_code": 1.9862051010131836, "train/loss_prose": 4.150572299957275} +{"step": 288, "train/loss": 2.8473232984542847, "train/lm_loss": 2.8473232984542847, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.8899999999999998e-05, "perf/tokens_per_sec": 25880.85853999512, "train/loss_math": 2.8385231494903564, "train/loss_prose": 3.68861985206604, "train/loss_code": 1.5985788106918335} +{"step": 289, "train/loss": 2.5223991572856903, "train/lm_loss": 2.5223991572856903, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.9e-05, "perf/tokens_per_sec": 24627.77682304994, "train/loss_prose": 3.62422776222229, "train/loss_math": 2.7931512594223022, "train/loss_code": 1.836108610033989} +{"step": 290, "train/loss": 3.0091888308525085, "train/lm_loss": 3.0091888308525085, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.91e-05, "perf/tokens_per_sec": 26217.920472719725, "train/loss_math": 2.9764267603556314, "train/loss_prose": 3.9345055421193442, "train/loss_code": 1.6703562140464783} +{"step": 291, "train/loss": 2.516485810279846, "train/lm_loss": 2.516485810279846, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.9199999999999998e-05, "perf/tokens_per_sec": 24020.4653462301, "train/loss_math": 2.896894872188568, "train/loss_prose": 3.2222251892089844, "train/loss_code": 1.7740269899368286} +{"step": 292, "train/loss": 2.9911895394325256, "train/lm_loss": 2.9911895394325256, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.93e-05, "perf/tokens_per_sec": 24631.201839755464, "train/loss_prose": 3.655027389526367, "train/loss_code": 1.8847930034001668} +{"step": 293, "train/loss": 2.615256607532501, "train/lm_loss": 2.615256607532501, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.94e-05, "perf/tokens_per_sec": 25434.588615344073, "train/loss_prose": 3.506158471107483, "train/loss_code": 1.9412663578987122, "train/loss_math": 3.0723350048065186} +{"step": 294, "train/loss": 3.5482401847839355, "train/lm_loss": 3.5482401847839355, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.95e-05, "perf/tokens_per_sec": 24948.728642026585, "train/loss_prose": 3.9955214500427245, "train/loss_math": 2.802771011988322} +{"step": 295, "train/loss": 2.741461306810379, "train/lm_loss": 2.741461306810379, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.96e-05, "perf/tokens_per_sec": 24913.670406162626, "train/loss_code": 2.172498424847921, "train/loss_math": 2.9011058807373047, "train/loss_prose": 3.203994115193685} +{"step": 296, "train/loss": 2.1871786415576935, "train/lm_loss": 2.1871786415576935, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.97e-05, "perf/tokens_per_sec": 25579.02263567184, "train/loss_code": 1.026842750608921, "train/loss_math": 2.9089568853378296, "train/loss_prose": 3.786072254180908} +{"step": 297, "train/loss": 3.3641662001609802, "train/lm_loss": 3.3641662001609802, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.98e-05, "perf/tokens_per_sec": 24929.21555664547, "train/loss_math": 2.9891610741615295, "train/loss_prose": 3.7391715049743652} +{"step": 298, "train/loss": 2.92961847782135, "train/lm_loss": 2.92961847782135, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.9900000000000002e-05, "perf/tokens_per_sec": 25400.368121851727, "train/loss_code": 1.3670398592948914, "train/loss_prose": 3.705805718898773, "train/loss_math": 2.9398224353790283} +{"step": 299, "train/loss": 2.8736003041267395, "train/lm_loss": 2.8736003041267395, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3e-05, "perf/tokens_per_sec": 24373.20683760199, "train/loss_prose": 3.8697763681411743, "train/loss_code": 1.6190468867619832, "train/loss_math": 2.652555465698242} +{"step": 300, "train/loss": 2.9186981916427612, "train/lm_loss": 2.9186981916427612, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.01e-05, "perf/tokens_per_sec": 25373.095428968085, "train/loss_prose": 3.460783863067627, "train/loss_code": 2.0152223904927573} +{"step": 301, "train/loss": 2.9807183146476746, "train/lm_loss": 2.9807183146476746, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.02e-05, "perf/tokens_per_sec": 24571.62968355562, "train/loss_math": 2.838010390599569, "train/loss_code": 1.9727662801742554, "train/loss_prose": 3.7953945795694985} +{"step": 302, "train/loss": 2.5147098898887634, "train/lm_loss": 2.5147098898887634, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.03e-05, "perf/tokens_per_sec": 25834.15666024066, "train/loss_code": 2.084336686134338, "train/loss_prose": 3.610156774520874, "train/loss_math": 3.042919635772705} +{"step": 303, "train/loss": 3.2938541769981384, "train/lm_loss": 3.2938541769981384, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.04e-05, "perf/tokens_per_sec": 25748.62553150034, "train/loss_prose": 3.729706585407257, "train/loss_math": 3.1206379731496177, "train/loss_code": 2.0700924396514893} +{"step": 304, "train/loss": 2.778971254825592, "train/lm_loss": 2.778971254825592, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.05e-05, "perf/tokens_per_sec": 26083.813768046628, "train/loss_prose": 3.2573901812235513, "train/loss_math": 2.8397682309150696, "train/loss_code": 1.1005264520645142} +{"step": 305, "train/loss": 2.7169691026210785, "train/lm_loss": 2.7169691026210785, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.06e-05, "perf/tokens_per_sec": 25914.862217261118, "train/loss_code": 2.0342073837916055, "train/loss_prose": 3.7104976177215576, "train/loss_math": 2.7373785177866616} +{"step": 306, "train/loss": 2.712062567472458, "train/lm_loss": 2.712062567472458, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.07e-05, "perf/tokens_per_sec": 25397.589404482605, "train/loss_code": 2.0910678803920746, "train/loss_math": 2.95092511177063, "train/loss_prose": 3.7151896953582764} +{"step": 307, "train/loss": 2.633899211883545, "train/lm_loss": 2.633899211883545, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.08e-05, "perf/tokens_per_sec": 25834.0401167503, "train/loss_math": 2.8269099593162537, "train/loss_code": 1.1689203381538391, "train/loss_prose": 3.7128565311431885} +{"step": 308, "train/loss": 3.196778118610382, "train/lm_loss": 3.196778118610382, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.09e-05, "perf/tokens_per_sec": 25953.89475147219, "train/loss_math": 2.8752766450246177, "train/loss_prose": 3.3896791458129885} +{"step": 309, "train/loss": 2.450581341981888, "train/lm_loss": 2.450581341981888, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.1e-05, "perf/tokens_per_sec": 26197.81050512752, "train/loss_code": 1.6053316593170166, "train/loss_math": 2.8928864002227783, "train/loss_prose": 3.6987757682800293} +{"step": 310, "train/loss": 2.3691230416297913, "train/lm_loss": 2.3691230416297913, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.1100000000000004e-05, "perf/tokens_per_sec": 25846.866207597952, "train/loss_code": 2.168840479850769, "train/loss_math": 2.702927509943644} +{"step": 311, "train/loss": 3.127097189426422, "train/lm_loss": 3.127097189426422, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.12e-05, "perf/tokens_per_sec": 26285.27108460464, "train/loss_code": 1.808710515499115, "train/loss_prose": 3.9421017169952393, "train/loss_math": 2.8154749870300293} +{"step": 312, "train/loss": 3.1634591221809387, "train/lm_loss": 3.1634591221809387, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.13e-05, "perf/tokens_per_sec": 25549.427191757495, "train/loss_math": 3.1397655487060545, "train/loss_code": 2.541823148727417, "train/loss_prose": 3.5335100889205933} +{"step": 313, "train/loss": 2.9554392993450165, "train/lm_loss": 2.9554392993450165, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.1400000000000004e-05, "perf/tokens_per_sec": 26641.40394754798, "train/loss_math": 2.92050830523173, "train/loss_prose": 3.672281265258789, "train/loss_code": 1.9325730204582214} +{"step": 314, "train/loss": 2.5682528018951416, "train/lm_loss": 2.5682528018951416, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.15e-05, "perf/tokens_per_sec": 26323.774447357413, "train/loss_math": 2.927433967590332, "train/loss_code": 1.9696173270543416} +{"step": 315, "train/loss": 2.7282306253910065, "train/lm_loss": 2.7282306253910065, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.16e-05, "perf/tokens_per_sec": 25921.744078918582, "train/loss_code": 1.9652643998463948, "train/loss_math": 2.983003854751587, "train/loss_prose": 3.4905200004577637} +{"step": 316, "train/loss": 2.8973889350891113, "train/lm_loss": 2.8973889350891113, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.1700000000000005e-05, "perf/tokens_per_sec": 25198.58954881274, "train/loss_math": 2.783647656440735, "train/loss_prose": 3.7206965684890747, "train/loss_code": 1.364514708518982} +{"step": 317, "train/loss": 2.837181568145752, "train/lm_loss": 2.837181568145752, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.18e-05, "perf/tokens_per_sec": 25920.218805729648, "train/loss_code": 2.043952703475952, "train/loss_math": 2.840276300907135, "train/loss_prose": 3.6242207288742065} +{"step": 318, "train/loss": 3.2209725379943848, "train/lm_loss": 3.2209725379943848, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.19e-05, "perf/tokens_per_sec": 26720.422901349873, "train/loss_prose": 3.5752925276756287, "train/loss_code": 2.364210844039917, "train/loss_math": 3.0341334342956543} +{"step": 319, "train/loss": 2.4356516301631927, "train/lm_loss": 2.4356516301631927, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.2000000000000005e-05, "perf/tokens_per_sec": 26294.7253787357, "train/loss_code": 1.5877106587092082, "train/loss_prose": 3.2349109649658203, "train/loss_math": 2.8717923164367676} +{"step": 320, "train/loss": 2.8497432470321655, "train/lm_loss": 2.8497432470321655, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.21e-05, "perf/tokens_per_sec": 25781.85679405122, "train/loss_math": 2.6334709525108337, "train/loss_code": 1.3901127576828003, "train/loss_prose": 3.624649922053019} +{"step": 321, "train/loss": 3.0089081525802612, "train/lm_loss": 3.0089081525802612, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.2200000000000003e-05, "perf/tokens_per_sec": 25783.752786257366, "train/loss_prose": 3.7399490674336753, "train/loss_math": 2.87601105372111, "train/loss_code": 2.1116923689842224} +{"step": 322, "train/loss": 2.826781690120697, "train/lm_loss": 2.826781690120697, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.2300000000000006e-05, "perf/tokens_per_sec": 27336.847556929657, "train/loss_code": 1.1523916721343994, "train/loss_prose": 3.4431078910827635, "train/loss_math": 3.0939321517944336} +{"step": 323, "train/loss": 2.7655258774757385, "train/lm_loss": 2.7655258774757385, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.24e-05, "perf/tokens_per_sec": 26546.350905874082, "train/loss_math": 2.845073366165161, "train/loss_code": 1.0883939266204834, "train/loss_prose": 3.405223250389099} +{"step": 324, "train/loss": 2.6165924966335297, "train/lm_loss": 2.6165924966335297, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.2500000000000004e-05, "perf/tokens_per_sec": 26592.33117146457, "train/loss_math": 2.9539576371510825, "train/loss_code": 1.934403379758199, "train/loss_prose": 3.1338285207748413} +{"step": 325, "train/loss": 3.019439458847046, "train/lm_loss": 3.019439458847046, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.26e-05, "perf/tokens_per_sec": 26775.60250863434, "train/loss_math": 2.6157871087392173, "train/loss_prose": 4.139086167017619, "train/loss_code": 1.9454479217529297} +{"step": 326, "train/loss": 2.5711286067962646, "train/lm_loss": 2.5711286067962646, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.27e-05, "perf/tokens_per_sec": 26886.269562727022, "train/loss_prose": 3.7937939167022705, "train/loss_code": 1.5304074883460999, "train/loss_math": 3.0660171508789062} +{"step": 327, "train/loss": 2.5008739233016968, "train/lm_loss": 2.5008739233016968, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.2800000000000004e-05, "perf/tokens_per_sec": 25779.728971654087, "train/loss_prose": 3.246335744857788, "train/loss_code": 1.8871456980705261, "train/loss_math": 2.9828683137893677} +{"step": 328, "train/loss": 2.6548828184604645, "train/lm_loss": 2.6548828184604645, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.29e-05, "perf/tokens_per_sec": 25607.808777672442, "train/loss_prose": 3.8330847024917603, "train/loss_math": 2.9732500314712524, "train/loss_code": 1.9065980911254883} +{"step": 329, "train/loss": 2.8200109004974365, "train/lm_loss": 2.8200109004974365, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.3e-05, "perf/tokens_per_sec": 25557.256849411868, "train/loss_math": 2.761368465423584, "train/loss_prose": 3.617516875267029, "train/loss_code": 1.5182101726531982} +{"step": 330, "train/loss": 2.9249802827835083, "train/lm_loss": 2.9249802827835083, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.3100000000000005e-05, "perf/tokens_per_sec": 25037.154055452327, "train/loss_prose": 3.756253480911255, "train/loss_code": 1.6719266176223755, "train/loss_math": 2.6147887110710144} +{"step": 331, "train/loss": 2.847016990184784, "train/lm_loss": 2.847016990184784, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.32e-05, "perf/tokens_per_sec": 24840.256536160345, "train/loss_prose": 3.672670602798462, "train/loss_code": 1.9240940809249878, "train/loss_math": 2.8956512212753296} +{"step": 332, "train/loss": 2.471138834953308, "train/lm_loss": 2.471138834953308, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.33e-05, "perf/tokens_per_sec": 25246.840717380186, "train/loss_math": 2.952127695083618, "train/loss_code": 1.7721051931381226, "train/loss_prose": 3.9782280921936035} +{"step": 333, "train/loss": 2.789552092552185, "train/lm_loss": 2.789552092552185, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.3400000000000005e-05, "perf/tokens_per_sec": 25213.789294478167, "train/loss_math": 3.0136288007100425, "train/loss_prose": 3.9008939266204834, "train/loss_code": 1.8245809078216553} +{"step": 334, "train/loss": 2.234455019235611, "train/lm_loss": 2.234455019235611, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.35e-05, "perf/tokens_per_sec": 26107.0811574734, "train/loss_math": 2.7377431988716125, "train/loss_code": 1.1890358527501423, "train/loss_prose": 3.357560396194458} +{"step": 335, "train/loss": 2.8625082969665527, "train/lm_loss": 2.8625082969665527, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.3600000000000004e-05, "perf/tokens_per_sec": 26321.233620346255, "train/loss_code": 2.283631533384323, "train/loss_prose": 3.9348511695861816, "train/loss_math": 2.9479180574417114} +{"step": 336, "train/loss": 2.1658672392368317, "train/lm_loss": 2.1658672392368317, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.3700000000000006e-05, "perf/tokens_per_sec": 26334.62889657371, "train/loss_code": 1.2225446820259094, "train/loss_prose": 4.113600254058838, "train/loss_math": 2.9870128631591797} +{"step": 337, "train/loss": 2.74156191945076, "train/lm_loss": 2.74156191945076, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.38e-05, "perf/tokens_per_sec": 26171.390680293767, "train/loss_code": 1.9444412887096405, "train/loss_prose": 3.861653486887614, "train/loss_math": 2.5697693824768066} +{"step": 338, "train/loss": 3.2315743565559387, "train/lm_loss": 3.2315743565559387, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.3900000000000004e-05, "perf/tokens_per_sec": 27316.679017037277, "train/loss_prose": 3.8822047114372253, "train/loss_math": 2.877244710922241, "train/loss_code": 1.6920416355133057} +{"step": 339, "train/loss": 2.570225238800049, "train/lm_loss": 2.570225238800049, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.4000000000000007e-05, "perf/tokens_per_sec": 26687.341255712658, "train/loss_math": 2.837579071521759, "train/loss_prose": 3.72786545753479, "train/loss_code": 1.827873706817627} +{"step": 340, "train/loss": 2.615566849708557, "train/lm_loss": 2.615566849708557, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.41e-05, "perf/tokens_per_sec": 26466.317863205993, "train/loss_code": 2.177976667881012, "train/loss_math": 2.816571831703186, "train/loss_prose": 3.289742112159729} +{"step": 341, "train/loss": 2.6998819708824158, "train/lm_loss": 2.6998819708824158, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.4200000000000005e-05, "perf/tokens_per_sec": 26516.604157180762, "train/loss_math": 2.8365781784057615, "train/loss_code": 2.4720547993977866} +{"step": 342, "train/loss": 2.94193834066391, "train/lm_loss": 2.94193834066391, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.430000000000001e-05, "perf/tokens_per_sec": 27091.51764746697, "train/loss_math": 2.8417670726776123, "train/loss_code": 1.2865133881568909, "train/loss_prose": 3.819736659526825} +{"step": 343, "train/loss": 2.7874273657798767, "train/lm_loss": 2.7874273657798767, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.4399999999999996e-05, "perf/tokens_per_sec": 26428.331731930375, "train/loss_code": 1.9449487527211506, "train/loss_prose": 3.347778856754303, "train/loss_math": 3.0734570026397705} +{"step": 344, "train/loss": 2.848714828491211, "train/lm_loss": 2.848714828491211, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.45e-05, "perf/tokens_per_sec": 26221.84211217173, "train/loss_math": 2.738162120183309, "train/loss_code": 0.8482044339179993, "train/loss_prose": 3.431757092475891} +{"step": 345, "train/loss": 2.742781937122345, "train/lm_loss": 2.742781937122345, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.46e-05, "perf/tokens_per_sec": 26133.76877371148, "train/loss_math": 2.6446661949157715, "train/loss_prose": 3.3816274404525757, "train/loss_code": 1.5632066428661346} +{"step": 346, "train/loss": 2.36110720038414, "train/lm_loss": 2.36110720038414, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.4699999999999996e-05, "perf/tokens_per_sec": 27113.109215528766, "train/loss_math": 2.7332029938697815, "train/loss_code": 1.6482481559117634, "train/loss_prose": 3.011300563812256} +{"step": 347, "train/loss": 2.6354547441005707, "train/lm_loss": 2.6354547441005707, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.48e-05, "perf/tokens_per_sec": 26279.601770750796, "train/loss_prose": 3.7200701236724854, "train/loss_code": 2.1234348714351654, "train/loss_math": 2.574879288673401} +{"step": 348, "train/loss": 2.8477815985679626, "train/lm_loss": 2.8477815985679626, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.49e-05, "perf/tokens_per_sec": 26303.421283579988, "train/loss_prose": 3.745518445968628, "train/loss_math": 2.8976413011550903, "train/loss_code": 2.4820565780003867} +{"step": 349, "train/loss": 2.6239330172538757, "train/lm_loss": 2.6239330172538757, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.5e-05, "perf/tokens_per_sec": 26280.92841222516, "train/loss_math": 2.746609330177307, "train/loss_code": 1.5462981462478638, "train/loss_prose": 3.619783798853556} +{"step": 350, "train/loss": 2.772268056869507, "train/lm_loss": 2.772268056869507, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.51e-05, "perf/tokens_per_sec": 26543.76693220936, "train/loss_prose": 3.4661301374435425, "train/loss_code": 1.7450215021769206, "train/loss_math": 3.0785584449768066} +{"step": 351, "train/loss": 2.659717381000519, "train/lm_loss": 2.659717381000519, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.52e-05, "perf/tokens_per_sec": 26052.762833130633, "train/loss_math": 2.654318928718567, "train/loss_code": 1.5914048552513123, "train/loss_prose": 3.738827109336853} +{"step": 352, "train/loss": 2.942622423171997, "train/lm_loss": 2.942622423171997, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.53e-05, "perf/tokens_per_sec": 26361.864899784254, "train/loss_prose": 3.764049847920736, "train/loss_math": 2.8003013928731284, "train/loss_code": 1.9239623546600342} +{"step": 353, "train/loss": 2.7978990375995636, "train/lm_loss": 2.7978990375995636, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.54e-05, "perf/tokens_per_sec": 26144.785360026177, "train/loss_math": 2.895648181438446, "train/loss_code": 2.1475704511006675, "train/loss_prose": 4.357887268066406} +{"step": 354, "train/loss": 3.0482612252235413, "train/lm_loss": 3.0482612252235413, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.55e-05, "perf/tokens_per_sec": 26314.37986735491, "train/loss_prose": 3.698622465133667, "train/loss_code": 1.7650929689407349, "train/loss_math": 2.881282150745392} +{"step": 355, "train/loss": 2.9489510655403137, "train/lm_loss": 2.9489510655403137, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.56e-05, "perf/tokens_per_sec": 27299.229456696703, "train/loss_prose": 3.6613175868988037, "train/loss_code": 1.3607923984527588, "train/loss_math": 2.81171578168869} +{"step": 356, "train/loss": 2.6846718192100525, "train/lm_loss": 2.6846718192100525, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.57e-05, "perf/tokens_per_sec": 25835.671821308424, "train/loss_math": 2.495404154062271, "train/loss_prose": 3.767119288444519, "train/loss_code": 1.9807597398757935} +{"step": 357, "train/loss": 2.4801703691482544, "train/lm_loss": 2.4801703691482544, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.58e-05, "perf/tokens_per_sec": 26498.85348243551, "train/loss_math": 2.852018197377523, "train/loss_code": 1.5708161294460297, "train/loss_prose": 3.28643000125885} +{"step": 358, "train/loss": 3.0110197961330414, "train/lm_loss": 3.0110197961330414, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.59e-05, "perf/tokens_per_sec": 26948.01124986079, "train/loss_prose": 3.450161600112915, "train/loss_math": 2.711664080619812, "train/loss_code": 1.4140228033065796} +{"step": 359, "train/loss": 2.53569296002388, "train/lm_loss": 2.53569296002388, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.6e-05, "perf/tokens_per_sec": 26229.92919391974, "train/loss_code": 1.4204399188359578, "train/loss_prose": 3.5526276429494223, "train/loss_math": 2.6831705570220947} +{"step": 360, "train/loss": 3.07484233379364, "train/lm_loss": 3.07484233379364, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.61e-05, "perf/tokens_per_sec": 27251.72534873908, "train/loss_code": 1.7458077669143677, "train/loss_prose": 3.7730754017829895, "train/loss_math": 3.007410407066345} +{"step": 361, "train/loss": 2.817875623703003, "train/lm_loss": 2.817875623703003, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.62e-05, "perf/tokens_per_sec": 26493.418515925474, "train/loss_math": 2.774831255276998, "train/loss_prose": 4.06678581237793, "train/loss_code": 1.8272305727005005} +{"step": 362, "train/loss": 2.2303550243377686, "train/lm_loss": 2.2303550243377686, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.63e-05, "perf/tokens_per_sec": 26539.871415043184, "train/loss_code": 1.716067612171173, "train/loss_prose": 2.967557430267334, "train/loss_math": 2.670337359110514} +{"step": 363, "train/loss": 2.6853795051574707, "train/lm_loss": 2.6853795051574707, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.6400000000000004e-05, "perf/tokens_per_sec": 26462.118963379413, "train/loss_prose": 3.391832733154297, "train/loss_code": 1.5079574982325237} +{"step": 364, "train/loss": 2.566787451505661, "train/lm_loss": 2.566787451505661, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.65e-05, "perf/tokens_per_sec": 26513.08485332041, "train/loss_math": 2.826633393764496, "train/loss_prose": 3.772851824760437, "train/loss_code": 0.841031402349472} +{"step": 365, "train/loss": 3.0402060747146606, "train/lm_loss": 3.0402060747146606, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.66e-05, "perf/tokens_per_sec": 26726.28402882354, "train/loss_code": 1.6041644215583801, "train/loss_math": 2.8444992899894714, "train/loss_prose": 4.867661237716675} +{"step": 366, "train/loss": 3.2088390588760376, "train/lm_loss": 3.2088390588760376, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.6700000000000004e-05, "perf/tokens_per_sec": 26871.676886740806, "train/loss_math": 2.881362497806549, "train/loss_prose": 3.924092690149943, "train/loss_code": 2.3729846477508545} +{"step": 367, "train/loss": 3.1369259357452393, "train/lm_loss": 3.1369259357452393, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.68e-05, "perf/tokens_per_sec": 27303.78502635042, "train/loss_prose": 3.4758580327033997, "train/loss_math": 3.2094104290008545, "train/loss_code": 2.6608547369639077} +{"step": 368, "train/loss": 3.3476243019104004, "train/lm_loss": 3.3476243019104004, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.69e-05, "perf/tokens_per_sec": 26947.081339082506, "train/loss_prose": 3.550032615661621, "train/loss_math": 2.9407835006713867, "train/loss_code": 2.54001522064209} +{"step": 369, "train/loss": 2.5376678109169006, "train/lm_loss": 2.5376678109169006, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.7e-05, "perf/tokens_per_sec": 25964.17156433058, "train/loss_code": 2.092223620414734, "train/loss_prose": 3.5428913831710815, "train/loss_math": 2.754441261291504} +{"step": 370, "train/loss": 2.817989230155945, "train/lm_loss": 2.817989230155945, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.71e-05, "perf/tokens_per_sec": 26611.608797077966, "train/loss_prose": 3.6247671445210776, "train/loss_code": 1.6987374424934387, "train/loss_math": 2.7573792139689126} +{"step": 371, "train/loss": 2.3898452520370483, "train/lm_loss": 2.3898452520370483, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.72e-05, "perf/tokens_per_sec": 27281.585558117524, "train/loss_math": 2.9203200340270996, "train/loss_code": 1.505720853805542} +{"step": 372, "train/loss": 2.097241848707199, "train/lm_loss": 2.097241848707199, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.73e-05, "perf/tokens_per_sec": 27301.05150212546, "train/loss_math": 2.95532763004303, "train/loss_code": 1.8112131754557292} +{"step": 373, "train/loss": 3.032144546508789, "train/lm_loss": 3.032144546508789, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.74e-05, "perf/tokens_per_sec": 26576.534480144794, "train/loss_prose": 3.5145193099975587, "train/loss_code": 0.9571823477745056, "train/loss_math": 2.863688588142395} +{"step": 374, "train/loss": 2.6505890488624573, "train/lm_loss": 2.6505890488624573, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.7500000000000003e-05, "perf/tokens_per_sec": 26948.349415147346, "train/loss_math": 3.0272208054860434, "train/loss_prose": 3.6865525245666504, "train/loss_code": 2.109124481678009} +{"step": 375, "train/loss": 2.5408937335014343, "train/lm_loss": 2.5408937335014343, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.76e-05, "perf/tokens_per_sec": 26382.470701874274, "train/loss_prose": 3.630173444747925, "train/loss_code": 1.8873259782791139} +{"step": 376, "train/loss": 2.6645577549934387, "train/lm_loss": 2.6645577549934387, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.77e-05, "perf/tokens_per_sec": 26605.839056677993, "train/loss_math": 2.728711986541748, "train/loss_code": 1.5315064191818237, "train/loss_prose": 4.609889030456543} +{"step": 377, "train/loss": 2.4175376892089844, "train/lm_loss": 2.4175376892089844, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.7800000000000004e-05, "perf/tokens_per_sec": 27309.6010097317, "train/loss_math": 2.7064393043518065, "train/loss_code": 1.9360347986221313} +{"step": 378, "train/loss": 2.9654253721237183, "train/lm_loss": 2.9654253721237183, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.79e-05, "perf/tokens_per_sec": 27365.41256938969, "train/loss_prose": 3.6642584204673767, "train/loss_code": 1.7959426641464233, "train/loss_math": 2.7372416257858276} +{"step": 379, "train/loss": 2.753809690475464, "train/lm_loss": 2.753809690475464, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.8e-05, "perf/tokens_per_sec": 26256.627914156372, "train/loss_math": 2.920197288195292, "train/loss_code": 2.254647135734558} +{"step": 380, "train/loss": 3.403421640396118, "train/lm_loss": 3.403421640396118, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.8100000000000005e-05, "perf/tokens_per_sec": 26197.890404148995, "train/loss_prose": 3.8093850215276084, "train/loss_code": 1.5925157070159912, "train/loss_math": 2.7785465717315674} +{"step": 381, "train/loss": 2.0675790905952454, "train/lm_loss": 2.0675790905952454, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.82e-05, "perf/tokens_per_sec": 26439.109841641144, "train/loss_math": 2.798584540685018, "train/loss_code": 1.6289755702018738} +{"step": 382, "train/loss": 2.533802419900894, "train/lm_loss": 2.533802419900894, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.83e-05, "perf/tokens_per_sec": 26843.671429709826, "train/loss_code": 1.9902740716934204, "train/loss_prose": 3.7432055473327637, "train/loss_math": 2.8326377868652344} +{"step": 383, "train/loss": 2.379263550043106, "train/lm_loss": 2.379263550043106, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.8400000000000005e-05, "perf/tokens_per_sec": 27049.161491651383, "train/loss_prose": 3.731032967567444, "train/loss_math": 2.621930480003357, "train/loss_code": 1.5820454061031342} +{"step": 384, "train/loss": 2.9849981665611267, "train/lm_loss": 2.9849981665611267, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.85e-05, "perf/tokens_per_sec": 27131.949747077135, "train/loss_prose": 3.895641883214315, "train/loss_code": 2.003733277320862, "train/loss_math": 2.7285308837890625} +{"step": 385, "train/loss": 2.573973596096039, "train/lm_loss": 2.573973596096039, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.86e-05, "perf/tokens_per_sec": 27007.44704424979, "train/loss_math": 2.925528860092163, "train/loss_code": 1.9880481163660686} +{"step": 386, "train/loss": 2.8292033672332764, "train/lm_loss": 2.8292033672332764, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.8700000000000006e-05, "perf/tokens_per_sec": 27141.937272893876, "train/loss_math": 2.859210252761841, "train/loss_prose": 3.3322978615760803, "train/loss_code": 1.7930073142051697} +{"step": 387, "train/loss": 2.3936954140663147, "train/lm_loss": 2.3936954140663147, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.88e-05, "perf/tokens_per_sec": 26896.540189341813, "train/loss_math": 2.783118645350138, "train/loss_code": 1.543135166168213, "train/loss_prose": 3.085400700569153} +{"step": 388, "train/loss": 2.2160991728305817, "train/lm_loss": 2.2160991728305817, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.8900000000000004e-05, "perf/tokens_per_sec": 26111.20781822327, "train/loss_code": 1.5432507246732712, "train/loss_prose": 3.0789644718170166, "train/loss_math": 2.825609048207601} +{"step": 389, "train/loss": 3.0713725090026855, "train/lm_loss": 3.0713725090026855, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.9000000000000006e-05, "perf/tokens_per_sec": 26253.498227340522, "train/loss_math": 2.78289794921875, "train/loss_prose": 3.762923300266266, "train/loss_code": 1.976745843887329} +{"step": 390, "train/loss": 2.995162159204483, "train/lm_loss": 2.995162159204483, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.91e-05, "perf/tokens_per_sec": 27356.52315369929, "train/loss_math": 3.144229769706726, "train/loss_code": 1.9685670534769695, "train/loss_prose": 3.92237917582194} +{"step": 391, "train/loss": 2.7644044756889343, "train/lm_loss": 2.7644044756889343, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.9200000000000004e-05, "perf/tokens_per_sec": 26680.461280423413, "train/loss_prose": 3.587113698323568, "train/loss_math": 2.4701643586158752, "train/loss_code": 1.4732370376586914} +{"step": 392, "train/loss": 2.4418786764144897, "train/lm_loss": 2.4418786764144897, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.9300000000000007e-05, "perf/tokens_per_sec": 27182.56312953112, "train/loss_code": 1.4746017456054688, "train/loss_prose": 3.7313512563705444, "train/loss_math": 3.0869596004486084} +{"step": 393, "train/loss": 2.768480658531189, "train/lm_loss": 2.768480658531189, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.94e-05, "perf/tokens_per_sec": 26622.454435430758, "train/loss_prose": 3.591243346532186, "train/loss_code": 1.8172727425893147, "train/loss_math": 2.9611483812332153} +{"step": 394, "train/loss": 2.7715859413146973, "train/lm_loss": 2.7715859413146973, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.9500000000000005e-05, "perf/tokens_per_sec": 27247.57606382135, "train/loss_prose": 4.03837513923645, "train/loss_code": 1.5372722546259563, "train/loss_math": 2.722872257232666} +{"step": 395, "train/loss": 2.6819096207618713, "train/lm_loss": 2.6819096207618713, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.960000000000001e-05, "perf/tokens_per_sec": 27320.284565670958, "train/loss_code": 1.6217626333236694, "train/loss_math": 2.8057663440704346, "train/loss_prose": 3.494342803955078} +{"step": 396, "train/loss": 3.060495913028717, "train/lm_loss": 3.060495913028717, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.97e-05, "perf/tokens_per_sec": 27313.76920802185, "train/loss_code": 1.8415269255638123, "train/loss_math": 2.7845250368118286, "train/loss_prose": 3.8079657554626465} +{"step": 397, "train/loss": 2.827485203742981, "train/lm_loss": 2.827485203742981, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.9800000000000005e-05, "perf/tokens_per_sec": 27322.239779130294, "train/loss_code": 1.8145895600318909, "train/loss_prose": 3.604441285133362, "train/loss_math": 2.758347511291504} +{"step": 398, "train/loss": 2.9939109683036804, "train/lm_loss": 2.9939109683036804, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.99e-05, "perf/tokens_per_sec": 26749.87922602263, "train/loss_prose": 3.9241036772727966, "train/loss_code": 1.3716897368431091, "train/loss_math": 2.7557467222213745} +{"step": 399, "train/loss": 2.7305792570114136, "train/lm_loss": 2.7305792570114136, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4e-05, "perf/tokens_per_sec": 26598.383313799837, "train/loss_prose": 3.7080175081888833, "train/loss_code": 1.7043219010035198, "train/loss_math": 2.803808093070984} +{"step": 400, "train/loss": 2.4762965738773346, "train/lm_loss": 2.4762965738773346, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.0100000000000006e-05, "perf/tokens_per_sec": 26632.52445699776, "train/loss_prose": 3.7893989086151123, "train/loss_code": 1.5643379092216492, "train/loss_math": 2.9871116876602173} +{"step": 400, "eval/loss": 2.4290697799843888, "eval/lm_loss": 2.4290697799843888, "eval/lb_loss": 0.0, "eval/z_loss": 0.0, "eval/perplexity": 11.3483207334852, "eval/loss_code": 1.737302694010408, "eval/ppl_code": 5.681996650907522, "eval/loss_prose": 3.5020799490443446, "eval/ppl_prose": 33.184402092872084, "eval/loss_math": 2.6286719504910234, "eval/ppl_math": 13.855357078100932} +{"step": 401, "train/loss": 2.463477313518524, "train/lm_loss": 2.463477313518524, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.02e-05, "perf/tokens_per_sec": 22169.861642492127, "train/loss_code": 2.095621029535929, "train/loss_prose": 3.5670461654663086} +{"step": 402, "train/loss": 3.2614229321479797, "train/lm_loss": 3.2614229321479797, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.0300000000000004e-05, "perf/tokens_per_sec": 26242.510110622647, "train/loss_prose": 3.6113988876342775, "train/loss_code": 2.319213628768921, "train/loss_math": 2.857587695121765} +{"step": 403, "train/loss": 3.415133059024811, "train/lm_loss": 3.415133059024811, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.0400000000000006e-05, "perf/tokens_per_sec": 24823.99707542767, "train/loss_code": 2.208089053630829, "train/loss_math": 3.180509567260742, "train/loss_prose": 3.9448752403259277} +{"step": 404, "train/loss": 2.79066926240921, "train/lm_loss": 2.79066926240921, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.05e-05, "perf/tokens_per_sec": 25336.125318916114, "train/loss_prose": 3.4472378889719644, "train/loss_code": 1.7704786658287048, "train/loss_math": 2.814227739969889} +{"step": 405, "train/loss": 2.3822978138923645, "train/lm_loss": 2.3822978138923645, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.0600000000000004e-05, "perf/tokens_per_sec": 25700.169167640277, "train/loss_code": 1.7237544655799866, "train/loss_math": 2.8204545974731445, "train/loss_prose": 3.261228084564209} +{"step": 406, "train/loss": 2.3906845450401306, "train/lm_loss": 2.3906845450401306, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.07e-05, "perf/tokens_per_sec": 25594.41802647359, "train/loss_code": 1.3036430180072784, "train/loss_prose": 3.7978745301564536, "train/loss_math": 2.5172812938690186} +{"step": 407, "train/loss": 2.581658363342285, "train/lm_loss": 2.581658363342285, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.08e-05, "perf/tokens_per_sec": 26709.58036100185, "train/loss_code": 2.0853649775187173, "train/loss_math": 2.9022355874379477, "train/loss_prose": 2.8452327251434326} +{"step": 408, "train/loss": 2.9428398609161377, "train/lm_loss": 2.9428398609161377, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.09e-05, "perf/tokens_per_sec": 26343.674378664833, "train/loss_code": 1.7197535037994385, "train/loss_math": 2.787784194946289, "train/loss_prose": 3.9420218467712402} +{"step": 409, "train/loss": 2.8681691884994507, "train/lm_loss": 2.8681691884994507, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.1e-05, "perf/tokens_per_sec": 26001.10964742348, "train/loss_prose": 3.904460827509562, "train/loss_math": 2.6663010120391846, "train/loss_code": 2.141417443752289} +{"step": 410, "train/loss": 3.2125076055526733, "train/lm_loss": 3.2125076055526733, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.11e-05, "perf/tokens_per_sec": 26228.847958546503, "train/loss_prose": 4.495076020558675, "train/loss_code": 2.094991445541382, "train/loss_math": 2.674949804941813} +{"step": 411, "train/loss": 3.013374626636505, "train/lm_loss": 3.013374626636505, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.12e-05, "perf/tokens_per_sec": 26902.77373780714, "train/loss_prose": 3.5948005318641663, "train/loss_code": 2.1201120018959045, "train/loss_math": 2.7437855005264282} +{"step": 412, "train/loss": 2.7698925733566284, "train/lm_loss": 2.7698925733566284, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.13e-05, "perf/tokens_per_sec": 26280.12437110021, "train/loss_math": 2.725778341293335, "train/loss_code": 1.6618794004122417, "train/loss_prose": 3.9073155721028647} +{"step": 413, "train/loss": 2.4863621592521667, "train/lm_loss": 2.4863621592521667, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.14e-05, "perf/tokens_per_sec": 26031.643057695834, "train/loss_code": 1.836080551147461, "train/loss_prose": 3.475116491317749, "train/loss_math": 2.7981714010238647} +{"step": 414, "train/loss": 3.0151408910751343, "train/lm_loss": 3.0151408910751343, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.15e-05, "perf/tokens_per_sec": 26396.252846683925, "train/loss_math": 2.580010175704956, "train/loss_prose": 3.6794078946113586, "train/loss_code": 1.6634647846221924} +{"step": 415, "train/loss": 2.928374171257019, "train/lm_loss": 2.928374171257019, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.16e-05, "perf/tokens_per_sec": 26526.63520503485, "train/loss_prose": 3.5616579055786133, "train/loss_code": 1.6241408586502075, "train/loss_math": 2.518740177154541} +{"step": 416, "train/loss": 2.24473437666893, "train/lm_loss": 2.24473437666893, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.17e-05, "perf/tokens_per_sec": 26155.771416913685, "train/loss_code": 1.3466938734054565, "train/loss_math": 2.7256694436073303, "train/loss_prose": 3.015115976333618} +{"step": 417, "train/loss": 2.176492601633072, "train/lm_loss": 2.176492601633072, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.18e-05, "perf/tokens_per_sec": 25929.49054351222, "train/loss_code": 2.0648735761642456, "train/loss_math": 2.9578261375427246} +{"step": 418, "train/loss": 2.733404040336609, "train/lm_loss": 2.733404040336609, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.19e-05, "perf/tokens_per_sec": 25800.63553546488, "train/loss_prose": 3.616478522618612, "train/loss_code": 1.9200860261917114, "train/loss_math": 2.6287689208984375} +{"step": 419, "train/loss": 2.362096756696701, "train/lm_loss": 2.362096756696701, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.2e-05, "perf/tokens_per_sec": 26304.83088376024, "train/loss_prose": 3.1279380321502686, "train/loss_math": 2.748627185821533, "train/loss_code": 1.4650054772694905} +{"step": 420, "train/loss": 2.447284460067749, "train/lm_loss": 2.447284460067749, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.21e-05, "perf/tokens_per_sec": 24532.85712000914, "train/loss_code": 1.540895660718282, "train/loss_prose": 3.9703428745269775, "train/loss_math": 2.7463112473487854} +{"step": 421, "train/loss": 2.816608190536499, "train/lm_loss": 2.816608190536499, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.22e-05, "perf/tokens_per_sec": 25459.91837944459, "train/loss_math": 2.912724773089091, "train/loss_prose": 3.617199420928955, "train/loss_code": 1.4393174648284912} +{"step": 422, "train/loss": 2.72809237241745, "train/lm_loss": 2.72809237241745, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.23e-05, "perf/tokens_per_sec": 24997.44521254538, "train/loss_prose": 3.5558621088663735, "train/loss_code": 1.7184105515480042, "train/loss_math": 2.57344381014506} +{"step": 423, "train/loss": 2.6713158190250397, "train/lm_loss": 2.6713158190250397, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.24e-05, "perf/tokens_per_sec": 25485.6767092074, "train/loss_code": 1.738497257232666, "train/loss_prose": 3.5861972173055015, "train/loss_math": 2.698221206665039} +{"step": 424, "train/loss": 2.583581805229187, "train/lm_loss": 2.583581805229187, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.25e-05, "perf/tokens_per_sec": 25669.525773565565, "train/loss_code": 1.7172006368637085, "train/loss_prose": 3.66168212890625, "train/loss_math": 2.731229623158773} +{"step": 425, "train/loss": 2.5333101749420166, "train/lm_loss": 2.5333101749420166, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.26e-05, "perf/tokens_per_sec": 25497.32659533891, "train/loss_math": 2.77647442817688, "train/loss_code": 1.2844347953796387, "train/loss_prose": 3.8152387142181396} +{"step": 426, "train/loss": 3.1767539381980896, "train/lm_loss": 3.1767539381980896, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.27e-05, "perf/tokens_per_sec": 24960.944373574323, "train/loss_prose": 3.496799111366272, "train/loss_code": 2.0927505493164062, "train/loss_math": 2.3404860496520996} +{"step": 427, "train/loss": 2.7464233338832855, "train/lm_loss": 2.7464233338832855, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.2800000000000004e-05, "perf/tokens_per_sec": 25146.914698908047, "train/loss_prose": 3.9890012741088867, "train/loss_code": 1.9087591767311096, "train/loss_math": 2.3693461418151855} +{"step": 428, "train/loss": 2.9253767132759094, "train/lm_loss": 2.9253767132759094, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.29e-05, "perf/tokens_per_sec": 25772.14904156278, "train/loss_code": 1.3402522802352905, "train/loss_prose": 3.6482656002044678, "train/loss_math": 2.7794912457466125} +{"step": 429, "train/loss": 2.7975319623947144, "train/lm_loss": 2.7975319623947144, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.3e-05, "perf/tokens_per_sec": 26223.963487661058, "train/loss_prose": 3.539330164591471, "train/loss_math": 2.9325106938680015, "train/loss_code": 1.4823659658432007} +{"step": 430, "train/loss": 2.7705305218696594, "train/lm_loss": 2.7705305218696594, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.3100000000000004e-05, "perf/tokens_per_sec": 25761.251040654504, "train/loss_math": 2.5290517807006836, "train/loss_code": 1.678809404373169, "train/loss_prose": 3.437130331993103} +{"step": 431, "train/loss": 2.7490957975387573, "train/lm_loss": 2.7490957975387573, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.32e-05, "perf/tokens_per_sec": 26117.638485347863, "train/loss_prose": 3.317119002342224, "train/loss_code": 1.7569287419319153, "train/loss_math": 2.9611676931381226} +{"step": 432, "train/loss": 2.6043785214424133, "train/lm_loss": 2.6043785214424133, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.33e-05, "perf/tokens_per_sec": 25559.576261251208, "train/loss_math": 2.7722545623779298, "train/loss_prose": 3.5859057903289795, "train/loss_code": 1.693924903869629} +{"step": 433, "train/loss": 3.2268356680870056, "train/lm_loss": 3.2268356680870056, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.3400000000000005e-05, "perf/tokens_per_sec": 26398.848743279228, "train/loss_prose": 3.9301501274108888, "train/loss_math": 3.011903762817383, "train/loss_code": 1.576015055179596} +{"step": 434, "train/loss": 2.890672743320465, "train/lm_loss": 2.890672743320465, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.35e-05, "perf/tokens_per_sec": 25433.722566012708, "train/loss_code": 1.7491379181543987, "train/loss_math": 3.0800771713256836, "train/loss_prose": 3.905937592188517} +{"step": 435, "train/loss": 2.5402508080005646, "train/lm_loss": 2.5402508080005646, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.36e-05, "perf/tokens_per_sec": 25434.814550956627, "train/loss_prose": 3.700403928756714, "train/loss_math": 2.6918492913246155, "train/loss_code": 1.0769011676311493} +{"step": 436, "train/loss": 2.8879499435424805, "train/lm_loss": 2.8879499435424805, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.3700000000000005e-05, "perf/tokens_per_sec": 25263.027445444386, "train/loss_prose": 3.2320850491523743, "train/loss_code": 2.4467248916625977, "train/loss_math": 2.6409047842025757} +{"step": 437, "train/loss": 2.8401360511779785, "train/lm_loss": 2.8401360511779785, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.38e-05, "perf/tokens_per_sec": 25181.341017756058, "train/loss_math": 2.823522925376892, "train/loss_code": 1.9437376260757446, "train/loss_prose": 3.2966418862342834} +{"step": 438, "train/loss": 3.1001734137535095, "train/lm_loss": 3.1001734137535095, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.39e-05, "perf/tokens_per_sec": 26366.598550900355, "train/loss_code": 1.945180892944336, "train/loss_math": 2.599991202354431, "train/loss_prose": 3.9277607202529907} +{"step": 439, "train/loss": 2.2552585005760193, "train/lm_loss": 2.2552585005760193, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.4000000000000006e-05, "perf/tokens_per_sec": 25571.902820075436, "train/loss_code": 1.664201831817627, "train/loss_math": 2.755832314491272, "train/loss_prose": 4.209394931793213} +{"step": 440, "train/loss": 2.642350494861603, "train/lm_loss": 2.642350494861603, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.41e-05, "perf/tokens_per_sec": 25431.802406121442, "train/loss_code": 1.8349335193634033, "train/loss_math": 2.9583508173624673, "train/loss_prose": 3.379475712776184} +{"step": 441, "train/loss": 2.7233628034591675, "train/lm_loss": 2.7233628034591675, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.4200000000000004e-05, "perf/tokens_per_sec": 24805.323199362392, "train/loss_math": 2.6558748483657837, "train/loss_code": 1.7744826078414917, "train/loss_prose": 3.717235008875529} +{"step": 442, "train/loss": 2.751699924468994, "train/lm_loss": 2.751699924468994, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.43e-05, "perf/tokens_per_sec": 24694.754673424475, "train/loss_prose": 3.6819803714752197, "train/loss_math": 2.7384398778279624, "train/loss_code": 1.3761691451072693} +{"step": 443, "train/loss": 2.7790108919143677, "train/lm_loss": 2.7790108919143677, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.44e-05, "perf/tokens_per_sec": 25664.92406381323, "train/loss_code": 2.1673460801442466, "train/loss_prose": 3.4346413612365723, "train/loss_math": 2.713062286376953} +{"step": 444, "train/loss": 2.9788392186164856, "train/lm_loss": 2.9788392186164856, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.4500000000000004e-05, "perf/tokens_per_sec": 26986.447271567722, "train/loss_prose": 3.657421271006266, "train/loss_code": 1.8495397567749023, "train/loss_math": 2.752227485179901} +{"step": 445, "train/loss": 2.076539784669876, "train/lm_loss": 2.076539784669876, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.46e-05, "perf/tokens_per_sec": 23183.738850331498, "train/loss_math": 2.7123868465423584, "train/loss_code": 1.8645907640457153} +{"step": 446, "train/loss": 2.889038324356079, "train/lm_loss": 2.889038324356079, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.47e-05, "perf/tokens_per_sec": 24437.345304594342, "train/loss_math": 2.6863428751627603, "train/loss_prose": 3.4971251487731934} +{"step": 447, "train/loss": 2.904910445213318, "train/lm_loss": 2.904910445213318, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.4800000000000005e-05, "perf/tokens_per_sec": 24956.049395341404, "train/loss_prose": 3.6131697297096252, "train/loss_math": 2.730809211730957, "train/loss_code": 1.6624930500984192} +{"step": 448, "train/loss": 2.9770877361297607, "train/lm_loss": 2.9770877361297607, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.49e-05, "perf/tokens_per_sec": 25942.098221337008, "train/loss_math": 2.8158413569132485, "train/loss_prose": 3.5781737168629966, "train/loss_code": 2.3173282146453857} +{"step": 449, "train/loss": 2.8078453540802, "train/lm_loss": 2.8078453540802, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.5e-05, "perf/tokens_per_sec": 26162.263896714616, "train/loss_prose": 3.6206417083740234, "train/loss_math": 2.5411593914031982, "train/loss_code": 2.172839323679606} +{"step": 450, "train/loss": 2.066158950328827, "train/lm_loss": 2.066158950328827, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.5100000000000005e-05, "perf/tokens_per_sec": 24924.76694004808, "train/loss_math": 2.6595351696014404, "train/loss_code": 1.376966953277588, "train/loss_prose": 3.492450714111328} +{"step": 451, "train/loss": 2.864343047142029, "train/lm_loss": 2.864343047142029, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.52e-05, "perf/tokens_per_sec": 26251.853434694578, "train/loss_prose": 3.8113142251968384, "train/loss_math": 2.4566702842712402, "train/loss_code": 1.3780729472637177} +{"step": 452, "train/loss": 2.373948574066162, "train/lm_loss": 2.373948574066162, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.53e-05, "perf/tokens_per_sec": 27128.43657219014, "train/loss_code": 1.7158802986145019, "train/loss_prose": 3.8371126651763916, "train/loss_math": 2.7379605770111084} +{"step": 453, "train/loss": 2.599282443523407, "train/lm_loss": 2.599282443523407, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.5400000000000006e-05, "perf/tokens_per_sec": 25698.24700720843, "train/loss_math": 2.4943204522132874, "train/loss_code": 1.783394992351532, "train/loss_prose": 3.6250938177108765} +{"step": 454, "train/loss": 3.044694662094116, "train/lm_loss": 3.044694662094116, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.55e-05, "perf/tokens_per_sec": 24802.064425224384, "train/loss_prose": 3.6850916147232056, "train/loss_code": 1.6319509148597717, "train/loss_math": 3.176644802093506} +{"step": 455, "train/loss": 2.7345087230205536, "train/lm_loss": 2.7345087230205536, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.5600000000000004e-05, "perf/tokens_per_sec": 24762.704580702815, "train/loss_code": 1.7613621950149536, "train/loss_prose": 3.5376463731129966, "train/loss_math": 2.9895217418670654} +{"step": 456, "train/loss": 3.332221746444702, "train/lm_loss": 3.332221746444702, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.5700000000000006e-05, "perf/tokens_per_sec": 26479.69799967324, "train/loss_math": 2.7732423146565757, "train/loss_prose": 4.0170509815216064, "train/loss_code": 2.2698426246643066} +{"step": 457, "train/loss": 2.8840609788894653, "train/lm_loss": 2.8840609788894653, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.58e-05, "perf/tokens_per_sec": 25868.621911702703, "train/loss_prose": 3.4093821048736572, "train/loss_code": 1.7997336983680725, "train/loss_math": 2.426110029220581} +{"step": 458, "train/loss": 2.5814335346221924, "train/lm_loss": 2.5814335346221924, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.5900000000000004e-05, "perf/tokens_per_sec": 24878.42234267319, "train/loss_code": 1.6423945824305217, "train/loss_math": 2.8338067531585693, "train/loss_prose": 3.6114319562911987} +{"step": 459, "train/loss": 2.32875919342041, "train/lm_loss": 2.32875919342041, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.600000000000001e-05, "perf/tokens_per_sec": 25927.299154715318, "train/loss_code": 1.5976234674453735, "train/loss_math": 2.891878286997477, "train/loss_prose": 3.5639448165893555} +{"step": 460, "train/loss": 3.016094386577606, "train/lm_loss": 3.016094386577606, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.61e-05, "perf/tokens_per_sec": 25965.584281482377, "train/loss_code": 1.8568724393844604, "train/loss_prose": 3.691113233566284, "train/loss_math": 2.8252785205841064} +{"step": 461, "train/loss": 3.043841063976288, "train/lm_loss": 3.043841063976288, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.6200000000000005e-05, "perf/tokens_per_sec": 25772.303689457876, "train/loss_prose": 3.881237030029297, "train/loss_code": 1.920705497264862, "train/loss_math": 2.955202023188273} +{"step": 462, "train/loss": 2.425405591726303, "train/lm_loss": 2.425405591726303, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.630000000000001e-05, "perf/tokens_per_sec": 26003.628402596143, "train/loss_code": 1.6306976477305095, "train/loss_math": 2.671114206314087, "train/loss_prose": 3.248904585838318} +{"step": 463, "train/loss": 2.625950813293457, "train/lm_loss": 2.625950813293457, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.64e-05, "perf/tokens_per_sec": 26344.926698286952, "train/loss_math": 2.886598587036133, "train/loss_code": 1.4214815497398376, "train/loss_prose": 3.731649875640869} +{"step": 464, "train/loss": 2.898664951324463, "train/lm_loss": 2.898664951324463, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.6500000000000005e-05, "perf/tokens_per_sec": 25979.79848717181, "train/loss_prose": 3.6371185779571533, "train/loss_math": 2.6277509927749634, "train/loss_code": 1.7669602632522583} +{"step": 465, "train/loss": 3.1673397421836853, "train/lm_loss": 3.1673397421836853, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.660000000000001e-05, "perf/tokens_per_sec": 26269.67631169713, "train/loss_prose": 3.672967314720154, "train/loss_math": 2.7439656257629395, "train/loss_code": 2.6342944701512656} +{"step": 466, "train/loss": 2.431589663028717, "train/lm_loss": 2.431589663028717, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.6700000000000003e-05, "perf/tokens_per_sec": 26066.400311949517, "train/loss_math": 2.8480710983276367, "train/loss_code": 1.5460295677185059, "train/loss_prose": 3.4223451614379883} +{"step": 467, "train/loss": 2.428854912519455, "train/lm_loss": 2.428854912519455, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.6800000000000006e-05, "perf/tokens_per_sec": 25511.714516097894, "train/loss_code": 1.7863289912541707, "train/loss_prose": 3.3818559646606445, "train/loss_math": 2.6724990010261536} +{"step": 468, "train/loss": 2.757694721221924, "train/lm_loss": 2.757694721221924, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.69e-05, "perf/tokens_per_sec": 26470.314170200178, "train/loss_math": 2.7978310585021973, "train/loss_prose": 3.686886191368103, "train/loss_code": 1.7482306361198425} +{"step": 469, "train/loss": 2.6829760372638702, "train/lm_loss": 2.6829760372638702, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.7e-05, "perf/tokens_per_sec": 25816.531698374056, "train/loss_prose": 3.8133944869041443, "train/loss_code": 1.1002355615297954, "train/loss_math": 2.9095232486724854} +{"step": 470, "train/loss": 2.819932669401169, "train/lm_loss": 2.819932669401169, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.71e-05, "perf/tokens_per_sec": 25733.737942273903, "train/loss_prose": 3.6211180686950684, "train/loss_code": 2.1553213596343994, "train/loss_math": 3.074822425842285} +{"step": 471, "train/loss": 2.027467668056488, "train/lm_loss": 2.027467668056488, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.72e-05, "perf/tokens_per_sec": 25758.85626208861, "train/loss_math": 2.596861982345581, "train/loss_code": 1.0784772038459778} +{"step": 472, "train/loss": 2.6808930933475494, "train/lm_loss": 2.6808930933475494, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.73e-05, "perf/tokens_per_sec": 26935.04056592237, "train/loss_prose": 3.5468907356262207, "train/loss_code": 1.4555068016052246, "train/loss_math": 2.893062114715576} +{"step": 473, "train/loss": 2.9490965604782104, "train/lm_loss": 2.9490965604782104, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.74e-05, "perf/tokens_per_sec": 25972.45384714233, "train/loss_code": 1.9073189496994019, "train/loss_prose": 3.6244126160939536, "train/loss_math": 2.703053832054138} +{"step": 474, "train/loss": 2.6563770473003387, "train/lm_loss": 2.6563770473003387, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.75e-05, "perf/tokens_per_sec": 25798.89202676017, "train/loss_code": 1.7983818451563518, "train/loss_math": 2.4857152700424194, "train/loss_prose": 3.628146489461263} +{"step": 475, "train/loss": 2.4930935502052307, "train/lm_loss": 2.4930935502052307, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.76e-05, "perf/tokens_per_sec": 27543.494657996333, "train/loss_math": 2.616511881351471, "train/loss_code": 1.8931700388590496, "train/loss_prose": 3.7991907596588135} +{"step": 476, "train/loss": 2.545240640640259, "train/lm_loss": 2.545240640640259, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.77e-05, "perf/tokens_per_sec": 25843.017043283624, "train/loss_prose": 2.998857577641805, "train/loss_math": 2.5032559633255005, "train/loss_code": 2.119613250096639} +{"step": 477, "train/loss": 2.7087780237197876, "train/lm_loss": 2.7087780237197876, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.78e-05, "perf/tokens_per_sec": 26121.133015052455, "train/loss_prose": 3.1430073976516724, "train/loss_math": 2.607270097732544, "train/loss_code": 2.3478593826293945} +{"step": 478, "train/loss": 2.9838494062423706, "train/lm_loss": 2.9838494062423706, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.79e-05, "perf/tokens_per_sec": 26854.119409517498, "train/loss_prose": 3.4430274168650308, "train/loss_math": 2.708342742919922} +{"step": 479, "train/loss": 2.505813241004944, "train/lm_loss": 2.505813241004944, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.8e-05, "perf/tokens_per_sec": 26034.917339265743, "train/loss_math": 2.721460521221161, "train/loss_prose": 3.6485095024108887, "train/loss_code": 1.8373850584030151} +{"step": 480, "train/loss": 2.6359171867370605, "train/lm_loss": 2.6359171867370605, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.8100000000000004e-05, "perf/tokens_per_sec": 25770.757294019877, "train/loss_code": 1.6704365909099579, "train/loss_prose": 3.857174873352051, "train/loss_math": 2.8340680599212646} +{"step": 481, "train/loss": 2.3968382477760315, "train/lm_loss": 2.3968382477760315, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.82e-05, "perf/tokens_per_sec": 26070.078945291873, "train/loss_code": 1.813934048016866, "train/loss_prose": 3.2018537521362305, "train/loss_math": 2.6327625513076782} +{"step": 482, "train/loss": 2.136258840560913, "train/lm_loss": 2.136258840560913, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.83e-05, "perf/tokens_per_sec": 25977.48074582248, "train/loss_prose": 4.066087245941162, "train/loss_code": 1.5214516162872314, "train/loss_math": 2.7083630561828613} +{"step": 483, "train/loss": 2.786572754383087, "train/lm_loss": 2.786572754383087, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.8400000000000004e-05, "perf/tokens_per_sec": 25360.248443757537, "train/loss_math": 2.694899876912435, "train/loss_code": 2.3463752269744873, "train/loss_prose": 3.5843781232833862} +{"step": 484, "train/loss": 2.6787370443344116, "train/lm_loss": 2.6787370443344116, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.85e-05, "perf/tokens_per_sec": 25764.9986037604, "train/loss_code": 1.305809994538625, "train/loss_prose": 4.050610939661662, "train/loss_math": 2.6803165674209595} +{"step": 485, "train/loss": 2.561886966228485, "train/lm_loss": 2.561886966228485, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.86e-05, "perf/tokens_per_sec": 25759.04937288682, "train/loss_code": 1.5368462006251018, "train/loss_math": 2.690502405166626, "train/loss_prose": 3.5011843045552573} +{"step": 486, "train/loss": 3.3105584383010864, "train/lm_loss": 3.3105584383010864, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.87e-05, "perf/tokens_per_sec": 25744.651251727064, "train/loss_prose": 3.620286703109741, "train/loss_code": 1.4826232194900513, "train/loss_math": 3.2801239490509033} +{"step": 487, "train/loss": 2.279681980609894, "train/lm_loss": 2.279681980609894, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.88e-05, "perf/tokens_per_sec": 26218.80073864937, "train/loss_math": 2.7775720755259194, "train/loss_prose": 4.003277778625488, "train/loss_code": 1.4753652811050415} +{"step": 488, "train/loss": 2.3189145028591156, "train/lm_loss": 2.3189145028591156, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.89e-05, "perf/tokens_per_sec": 24788.286772938718, "train/loss_math": 2.7217479944229126, "train/loss_code": 1.2641396721204121, "train/loss_prose": 3.871904134750366} +{"step": 489, "train/loss": 2.9268540740013123, "train/lm_loss": 2.9268540740013123, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9e-05, "perf/tokens_per_sec": 26667.332864555927, "train/loss_math": 2.671211338043213, "train/loss_prose": 4.309479713439941, "train/loss_code": 1.4398155212402344} +{"step": 490, "train/loss": 2.911200761795044, "train/lm_loss": 2.911200761795044, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.91e-05, "perf/tokens_per_sec": 25813.738759368443, "train/loss_math": 2.729399561882019, "train/loss_prose": 3.5515056848526, "train/loss_code": 1.8123923540115356} +{"step": 491, "train/loss": 3.0982471108436584, "train/lm_loss": 3.0982471108436584, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.92e-05, "perf/tokens_per_sec": 25743.416773806846, "train/loss_prose": 3.7923248410224915, "train/loss_math": 2.826213240623474, "train/loss_code": 1.9821254014968872} +{"step": 492, "train/loss": 2.8083476424217224, "train/lm_loss": 2.8083476424217224, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.93e-05, "perf/tokens_per_sec": 26137.625511003596, "train/loss_math": 2.707715320587158, "train/loss_code": 1.9052741527557373, "train/loss_prose": 3.511465311050415} +{"step": 493, "train/loss": 2.273400753736496, "train/lm_loss": 2.273400753736496, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.94e-05, "perf/tokens_per_sec": 25428.151137318982, "train/loss_code": 1.699243426322937, "train/loss_prose": 4.558650493621826, "train/loss_math": 2.566169023513794} +{"step": 494, "train/loss": 2.2371857166290283, "train/lm_loss": 2.2371857166290283, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9500000000000004e-05, "perf/tokens_per_sec": 26626.456769649947, "train/loss_code": 1.5992182970046998, "train/loss_prose": 3.570805072784424, "train/loss_math": 2.7597837448120117} +{"step": 495, "train/loss": 2.960600972175598, "train/lm_loss": 2.960600972175598, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.96e-05, "perf/tokens_per_sec": 25950.248831248082, "train/loss_code": 1.8163853287696838, "train/loss_prose": 3.466877841949463, "train/loss_math": 2.7176473140716553} +{"step": 496, "train/loss": 2.7309644520282745, "train/lm_loss": 2.7309644520282745, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.97e-05, "perf/tokens_per_sec": 25931.369171623803, "train/loss_code": 1.9572830498218536, "train/loss_prose": 3.8052085240681968, "train/loss_math": 2.6029584407806396} +{"step": 497, "train/loss": 3.3413432240486145, "train/lm_loss": 3.3413432240486145, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9800000000000004e-05, "perf/tokens_per_sec": 25033.141018673, "train/loss_prose": 3.7480727195739747, "train/loss_code": 2.488677978515625, "train/loss_math": 3.0130274295806885} +{"step": 498, "train/loss": 2.6870627999305725, "train/lm_loss": 2.6870627999305725, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.99e-05, "perf/tokens_per_sec": 25181.414836920514, "train/loss_code": 1.6096487045288086, "train/loss_math": 2.7514765858650208, "train/loss_prose": 3.635649561882019} +{"step": 499, "train/loss": 3.1793956756591797, "train/lm_loss": 3.1793956756591797, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5e-05, "perf/tokens_per_sec": 26458.940100384567, "train/loss_math": 2.56152081489563, "train/loss_prose": 3.7315456867218018, "train/loss_code": 1.6543951034545898} +{"step": 500, "train/loss": 3.045466721057892, "train/lm_loss": 3.045466721057892, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.999999390765185e-05, "perf/tokens_per_sec": 26122.24510698342, "train/loss_math": 2.9199658632278442, "train/loss_code": 0.9921855926513672, "train/loss_prose": 3.8972280025482178} +{"step": 501, "train/loss": 2.535791337490082, "train/lm_loss": 2.535791337490082, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.999997563061038e-05, "perf/tokens_per_sec": 25224.599287010555, "train/loss_code": 1.8125478506088257, "train/loss_prose": 4.2504719495773315, "train/loss_math": 2.7226476669311523} +{"step": 502, "train/loss": 2.6967084407806396, "train/lm_loss": 2.6967084407806396, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.999994516888449e-05, "perf/tokens_per_sec": 24212.822441708384, "train/loss_math": 2.928487221399943, "train/loss_prose": 3.257183869679769, "train/loss_code": 1.5083270072937012} +{"step": 503, "train/loss": 2.639864683151245, "train/lm_loss": 2.639864683151245, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9999902522489015e-05, "perf/tokens_per_sec": 26433.0079453488, "train/loss_code": 2.2733925580978394, "train/loss_math": 2.6491968631744385, "train/loss_prose": 4.077754974365234} +{"step": 504, "train/loss": 2.5244649052619934, "train/lm_loss": 2.5244649052619934, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.999984769144476e-05, "perf/tokens_per_sec": 25705.668134005657, "train/loss_math": 2.733077883720398, "train/loss_prose": 4.027119159698486, "train/loss_code": 1.7454291184743245} +{"step": 505, "train/loss": 2.6343434751033783, "train/lm_loss": 2.6343434751033783, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.999978067577844e-05, "perf/tokens_per_sec": 25625.493807622373, "train/loss_code": 1.792616883913676, "train/loss_prose": 3.8360081911087036, "train/loss_math": 2.674960136413574} +{"step": 506, "train/loss": 2.5335566699504852, "train/lm_loss": 2.5335566699504852, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.999970147552272e-05, "perf/tokens_per_sec": 25857.875700636367, "train/loss_prose": 3.4399259090423584, "train/loss_math": 2.6646607875823975, "train/loss_code": 1.7526119947433472} +{"step": 507, "train/loss": 2.4539681673049927, "train/lm_loss": 2.4539681673049927, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.999961009071621e-05, "perf/tokens_per_sec": 26388.711419271895, "train/loss_prose": 3.661652445793152, "train/loss_code": 1.6626774470011394, "train/loss_math": 2.440136273701986} +{"step": 508, "train/loss": 2.856027752161026, "train/lm_loss": 2.856027752161026, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.999950652140343e-05, "perf/tokens_per_sec": 25248.436192836034, "train/loss_code": 1.2618749141693115, "train/loss_math": 2.8387069702148438, "train/loss_prose": 3.661764442920685} +{"step": 509, "train/loss": 2.5781729221343994, "train/lm_loss": 2.5781729221343994, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.999939076763487e-05, "perf/tokens_per_sec": 25915.917720105957, "train/loss_code": 1.646891991297404, "train/loss_math": 2.7394978205362954, "train/loss_prose": 3.733107328414917} +{"step": 510, "train/loss": 3.0625322461128235, "train/lm_loss": 3.0625322461128235, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.999926282946695e-05, "perf/tokens_per_sec": 26602.955124591972, "train/loss_prose": 3.789615345001221, "train/loss_math": 2.5494918823242188, "train/loss_code": 1.5013445615768433} +{"step": 511, "train/loss": 2.7001784443855286, "train/lm_loss": 2.7001784443855286, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.999912270696202e-05, "perf/tokens_per_sec": 25667.953337158157, "train/loss_code": 1.705165982246399, "train/loss_prose": 3.4922958612442017, "train/loss_math": 2.801625967025757} +{"step": 512, "train/loss": 2.8905855119228363, "train/lm_loss": 2.8905855119228363, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.999897040018837e-05, "perf/tokens_per_sec": 25562.923039838588, "train/loss_prose": 3.4605570435523987, "train/loss_math": 2.6602584520975747, "train/loss_code": 1.301680564880371} +{"step": 513, "train/loss": 2.8127007484436035, "train/lm_loss": 2.8127007484436035, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.999880590922025e-05, "perf/tokens_per_sec": 26591.75492059561, "train/loss_code": 1.7137103080749512, "train/loss_prose": 3.6224777698516846, "train/loss_math": 2.735583782196045} +{"step": 514, "train/loss": 3.3471667766571045, "train/lm_loss": 3.3471667766571045, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.999862923413781e-05, "perf/tokens_per_sec": 25854.762525659316, "train/loss_math": 2.7484458287556968, "train/loss_prose": 3.706399345397949} +{"step": 515, "train/loss": 2.5456592440605164, "train/lm_loss": 2.5456592440605164, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9998440375027166e-05, "perf/tokens_per_sec": 25573.006702917697, "train/loss_prose": 3.506974935531616, "train/loss_math": 2.7997933626174927, "train/loss_code": 1.0760750621557236} +{"step": 516, "train/loss": 2.7298139929771423, "train/lm_loss": 2.7298139929771423, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9998239331980366e-05, "perf/tokens_per_sec": 26577.76792079208, "train/loss_code": 1.5973283449808757, "train/loss_prose": 3.6358442902565002, "train/loss_math": 2.50314998626709} +{"step": 517, "train/loss": 2.38555371761322, "train/lm_loss": 2.38555371761322, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9998026105095405e-05, "perf/tokens_per_sec": 26511.202818418482, "train/loss_code": 1.8051156997680664, "train/loss_math": 3.0322600603103638, "train/loss_prose": 3.994330644607544} +{"step": 518, "train/loss": 3.3208773732185364, "train/lm_loss": 3.3208773732185364, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.999780069447619e-05, "perf/tokens_per_sec": 25894.8239862054, "train/loss_prose": 3.518237908681234, "train/loss_math": 2.728795289993286} +{"step": 519, "train/loss": 2.5615309476852417, "train/lm_loss": 2.5615309476852417, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.999756310023261e-05, "perf/tokens_per_sec": 25556.116309478297, "train/loss_code": 1.5633035898208618, "train/loss_math": 2.6965672969818115, "train/loss_prose": 3.856317400932312} +{"step": 520, "train/loss": 3.2250892519950867, "train/lm_loss": 3.2250892519950867, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.999731332248044e-05, "perf/tokens_per_sec": 25480.98950050651, "train/loss_prose": 3.6934197902679444, "train/loss_math": 2.9101357460021973, "train/loss_code": 1.5133442878723145} +{"step": 521, "train/loss": 2.9679335951805115, "train/lm_loss": 2.9679335951805115, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9997051361341425e-05, "perf/tokens_per_sec": 25982.784661873375, "train/loss_prose": 3.8811769485473633, "train/loss_math": 2.982593218485514, "train/loss_code": 1.5760793089866638} +{"step": 522, "train/loss": 2.719471752643585, "train/lm_loss": 2.719471752643585, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.999677721694325e-05, "perf/tokens_per_sec": 25613.268286864826, "train/loss_code": 1.7657036781311035, "train/loss_prose": 3.5588926474253335, "train/loss_math": 2.890992045402527} +{"step": 523, "train/loss": 3.5074459314346313, "train/lm_loss": 3.5074459314346313, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9996490889419514e-05, "perf/tokens_per_sec": 25838.003185407884, "train/loss_code": 1.964925487836202, "train/loss_prose": 4.803957104682922, "train/loss_math": 2.948962688446045} +{"step": 524, "train/loss": 2.706032782793045, "train/lm_loss": 2.706032782793045, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9996192378909786e-05, "perf/tokens_per_sec": 25432.81892524056, "train/loss_prose": 3.3203534483909607, "train/loss_code": 1.877504785855611, "train/loss_math": 2.734332799911499} +{"step": 525, "train/loss": 2.8214258551597595, "train/lm_loss": 2.8214258551597595, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.999588168555954e-05, "perf/tokens_per_sec": 25764.80540375167, "train/loss_code": 0.8289985060691833, "train/loss_prose": 3.4903258681297302, "train/loss_math": 2.5937010447184243} +{"step": 526, "train/loss": 2.378318816423416, "train/lm_loss": 2.378318816423416, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.999555880952023e-05, "perf/tokens_per_sec": 25863.792799031682, "train/loss_code": 1.6729809522628785, "train/loss_math": 2.6235361099243164, "train/loss_prose": 4.019055247306824} +{"step": 527, "train/loss": 3.207729160785675, "train/lm_loss": 3.207729160785675, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.999522375094919e-05, "perf/tokens_per_sec": 25575.595491634274, "train/loss_math": 2.7005022366841636, "train/loss_prose": 3.9045265316963196, "train/loss_code": 1.9422208070755005} +{"step": 528, "train/loss": 2.869835138320923, "train/lm_loss": 2.869835138320923, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.999487651000975e-05, "perf/tokens_per_sec": 26162.104533479527, "train/loss_math": 2.7169607877731323, "train/loss_prose": 3.602041721343994, "train/loss_code": 1.2847129106521606} +{"step": 529, "train/loss": 2.5300233960151672, "train/lm_loss": 2.5300233960151672, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.999451708687114e-05, "perf/tokens_per_sec": 25792.53989992178, "train/loss_code": 1.9401098092397053, "train/loss_prose": 3.7725608348846436, "train/loss_math": 2.6618241667747498} +{"step": 530, "train/loss": 2.4232825338840485, "train/lm_loss": 2.4232825338840485, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.999414548170853e-05, "perf/tokens_per_sec": 25670.829891353886, "train/loss_math": 2.6121856570243835, "train/loss_code": 1.7175906499226887, "train/loss_prose": 3.7847461700439453} +{"step": 531, "train/loss": 2.5205713510513306, "train/lm_loss": 2.5205713510513306, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.999376169470306e-05, "perf/tokens_per_sec": 26367.853051050886, "train/loss_prose": 3.531672716140747, "train/loss_code": 1.138416051864624, "train/loss_math": 2.8712130069732664} +{"step": 532, "train/loss": 2.8416281044483185, "train/lm_loss": 2.8416281044483185, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.999336572604175e-05, "perf/tokens_per_sec": 26534.911411643323, "train/loss_code": 1.656696081161499, "train/loss_prose": 3.7021470864613852, "train/loss_math": 2.7710638840993247} +{"step": 533, "train/loss": 2.760167956352234, "train/lm_loss": 2.760167956352234, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.999295757591762e-05, "perf/tokens_per_sec": 25557.903200564124, "train/loss_math": 2.721946060657501, "train/loss_prose": 3.332940459251404, "train/loss_code": 2.26383900642395} +{"step": 534, "train/loss": 2.6962991058826447, "train/lm_loss": 2.6962991058826447, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.999253724452958e-05, "perf/tokens_per_sec": 26351.63469423128, "train/loss_prose": 3.5268588860829673, "train/loss_code": 1.5582837462425232, "train/loss_math": 2.6244161128997803} +{"step": 535, "train/loss": 2.88182532787323, "train/lm_loss": 2.88182532787323, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.99921047320825e-05, "perf/tokens_per_sec": 26727.36508489659, "train/loss_prose": 4.226302941640218, "train/loss_math": 2.6678220431009927, "train/loss_code": 1.1861140131950378} +{"step": 536, "train/loss": 2.8039462566375732, "train/lm_loss": 2.8039462566375732, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.999166003878718e-05, "perf/tokens_per_sec": 26148.685149473295, "train/loss_math": 2.608709394931793, "train/loss_prose": 3.391707976659139, "train/loss_code": 1.8216084241867065} +{"step": 537, "train/loss": 2.7088142037391663, "train/lm_loss": 2.7088142037391663, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9991203164860365e-05, "perf/tokens_per_sec": 23977.988630606364, "train/loss_prose": 3.6054232120513916, "train/loss_math": 2.759997844696045, "train/loss_code": 1.4962740341822307} +{"step": 538, "train/loss": 2.794206142425537, "train/lm_loss": 2.794206142425537, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.999073411052472e-05, "perf/tokens_per_sec": 26893.129808083653, "train/loss_code": 1.6340872049331665, "train/loss_math": 2.8226876258850098, "train/loss_prose": 3.657175123691559} +{"step": 539, "train/loss": 2.560528427362442, "train/lm_loss": 2.560528427362442, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.999025287600886e-05, "perf/tokens_per_sec": 26336.76856120126, "train/loss_code": 1.7126150925954182, "train/loss_math": 2.745445171991984, "train/loss_prose": 3.555023193359375} +{"step": 540, "train/loss": 2.811495006084442, "train/lm_loss": 2.811495006084442, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.998975946154734e-05, "perf/tokens_per_sec": 26579.823909646475, "train/loss_math": 2.5440067648887634, "train/loss_prose": 3.6952720483144126, "train/loss_code": 1.230116844177246} +{"step": 541, "train/loss": 2.8505311012268066, "train/lm_loss": 2.8505311012268066, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.998925386738063e-05, "perf/tokens_per_sec": 25951.660033686054, "train/loss_math": 2.698078441619873, "train/loss_prose": 3.8987298011779785, "train/loss_code": 1.516395926475525} +{"step": 542, "train/loss": 3.030476748943329, "train/lm_loss": 3.030476748943329, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.998873609375516e-05, "perf/tokens_per_sec": 26795.18865796882, "train/loss_prose": 3.2440276940663657, "train/loss_math": 2.9023461818695067} +{"step": 543, "train/loss": 2.5853776335716248, "train/lm_loss": 2.5853776335716248, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.998820614092328e-05, "perf/tokens_per_sec": 26247.842609525993, "train/loss_math": 2.5933690865834556, "train/loss_code": 1.9340943495432537, "train/loss_prose": 3.5503151416778564} +{"step": 544, "train/loss": 2.9680020809173584, "train/lm_loss": 2.9680020809173584, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.998766400914329e-05, "perf/tokens_per_sec": 25740.755333606026, "train/loss_prose": 3.621795733769735, "train/loss_code": 1.9962309002876282, "train/loss_math": 2.9620561599731445} +{"step": 545, "train/loss": 1.9942999184131622, "train/lm_loss": 1.9942999184131622, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.998710969867942e-05, "perf/tokens_per_sec": 26485.82147372056, "train/loss_math": 2.5082136392593384, "train/loss_code": 1.8229952851931255} +{"step": 546, "train/loss": 2.3068628013134003, "train/lm_loss": 2.3068628013134003, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9986543209801825e-05, "perf/tokens_per_sec": 26346.340750646395, "train/loss_math": 2.594114065170288, "train/loss_prose": 3.636783003807068, "train/loss_code": 1.7174444913864135} +{"step": 547, "train/loss": 3.104507803916931, "train/lm_loss": 3.104507803916931, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9985964542786614e-05, "perf/tokens_per_sec": 26823.260509239888, "train/loss_code": 1.4575940370559692, "train/loss_prose": 3.6499733448028566, "train/loss_math": 2.564301013946533} +{"step": 548, "train/loss": 2.323732703924179, "train/lm_loss": 2.323732703924179, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.998537369791581e-05, "perf/tokens_per_sec": 26150.118017385852, "train/loss_code": 1.8067975759506225, "train/loss_prose": 3.726299524307251, "train/loss_math": 2.914786696434021} +{"step": 549, "train/loss": 2.9179541170597076, "train/lm_loss": 2.9179541170597076, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.99847706754774e-05, "perf/tokens_per_sec": 26147.809585269446, "train/loss_math": 2.622873067855835, "train/loss_prose": 3.7571627497673035, "train/loss_code": 1.5346183776855469} +{"step": 550, "train/loss": 2.624822199344635, "train/lm_loss": 2.624822199344635, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.998415547576527e-05, "perf/tokens_per_sec": 26389.23836821716, "train/loss_prose": 3.343461195627848, "train/loss_code": 1.5244940519332886, "train/loss_math": 2.639735380808512} +{"step": 551, "train/loss": 2.481338620185852, "train/lm_loss": 2.481338620185852, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.998352809907928e-05, "perf/tokens_per_sec": 26218.80073864937, "train/loss_code": 1.704530656337738, "train/loss_prose": 3.258146584033966} +{"step": 552, "train/loss": 2.832243800163269, "train/lm_loss": 2.832243800163269, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.99828885457252e-05, "perf/tokens_per_sec": 26139.176478822235, "train/loss_math": 2.731566607952118, "train/loss_code": 2.048133969306946, "train/loss_prose": 3.8177074193954468} +{"step": 553, "train/loss": 2.4200980961322784, "train/lm_loss": 2.4200980961322784, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.998223681601473e-05, "perf/tokens_per_sec": 26147.212643293296, "train/loss_code": 1.8626242876052856, "train/loss_prose": 3.308255672454834, "train/loss_math": 2.8673434257507324} +{"step": 554, "train/loss": 2.8996260166168213, "train/lm_loss": 2.8996260166168213, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.998157291026553e-05, "perf/tokens_per_sec": 26364.37311856999, "train/loss_math": 2.6539515256881714, "train/loss_prose": 3.450327475865682, "train/loss_code": 2.230219841003418} +{"step": 555, "train/loss": 2.8467928171157837, "train/lm_loss": 2.8467928171157837, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.998089682880117e-05, "perf/tokens_per_sec": 26543.80794373535, "train/loss_prose": 3.7828786373138428, "train/loss_code": 1.7140161395072937, "train/loss_math": 2.5007786750793457} +{"step": 556, "train/loss": 2.2049162685871124, "train/lm_loss": 2.2049162685871124, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.998020857195117e-05, "perf/tokens_per_sec": 26380.769200294213, "train/loss_code": 1.7486735820770263, "train/loss_prose": 3.4654080867767334, "train/loss_math": 2.715277075767517} +{"step": 557, "train/loss": 2.7779197692871094, "train/lm_loss": 2.7779197692871094, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.997950814005098e-05, "perf/tokens_per_sec": 25376.918326469597, "train/loss_prose": 3.376349608103434, "train/loss_math": 2.58173406124115, "train/loss_code": 2.310280720392863} +{"step": 558, "train/loss": 2.8927518129348755, "train/lm_loss": 2.8927518129348755, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9978795533441966e-05, "perf/tokens_per_sec": 27095.491332688798, "train/loss_code": 1.936177134513855, "train/loss_prose": 3.453967273235321, "train/loss_math": 2.726895332336426} +{"step": 559, "train/loss": 2.6529273092746735, "train/lm_loss": 2.6529273092746735, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.997807075247146e-05, "perf/tokens_per_sec": 26531.674062542952, "train/loss_math": 2.499410390853882, "train/loss_code": 1.373202621936798, "train/loss_prose": 3.6595939795176187} +{"step": 560, "train/loss": 3.0383613109588623, "train/lm_loss": 3.0383613109588623, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.997733379749271e-05, "perf/tokens_per_sec": 27472.36221577962, "train/loss_code": 1.5513558983802795, "train/loss_prose": 3.9360673427581787, "train/loss_math": 2.729954957962036} +{"step": 561, "train/loss": 2.7043310403823853, "train/lm_loss": 2.7043310403823853, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.997658466886489e-05, "perf/tokens_per_sec": 26407.370116405436, "train/loss_math": 2.859802782535553, "train/loss_prose": 3.8014622926712036, "train/loss_code": 1.2962559461593628} +{"step": 562, "train/loss": 2.619416832923889, "train/lm_loss": 2.619416832923889, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9975823366953124e-05, "perf/tokens_per_sec": 26251.091283593403, "train/loss_math": 2.814277458190918, "train/loss_code": 1.5334483981132507, "train/loss_prose": 3.8170506954193115} +{"step": 563, "train/loss": 2.5141160786151886, "train/lm_loss": 2.5141160786151886, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9975049892128455e-05, "perf/tokens_per_sec": 27130.707178795565, "train/loss_code": 1.8371387720108032, "train/loss_math": 2.4954681396484375, "train/loss_prose": 3.88671875} +{"step": 564, "train/loss": 2.6428951025009155, "train/lm_loss": 2.6428951025009155, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.997426424476787e-05, "perf/tokens_per_sec": 26178.010584042262, "train/loss_prose": 3.125900665918986, "train/loss_code": 1.2434189319610596, "train/loss_math": 2.630509912967682} +{"step": 565, "train/loss": 3.0211360454559326, "train/lm_loss": 3.0211360454559326, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9973466425254286e-05, "perf/tokens_per_sec": 26596.81855041591, "train/loss_prose": 3.84036523103714, "train/loss_code": 1.7898322343826294, "train/loss_math": 2.6139814853668213} +{"step": 566, "train/loss": 2.384660691022873, "train/lm_loss": 2.384660691022873, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.997265643397654e-05, "perf/tokens_per_sec": 26442.609532956547, "train/loss_code": 1.6439179480075836, "train/loss_math": 2.820186456044515, "train/loss_prose": 4.041053771972656} +{"step": 567, "train/loss": 2.628164827823639, "train/lm_loss": 2.628164827823639, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.997183427132943e-05, "perf/tokens_per_sec": 25994.89355933024, "train/loss_code": 1.7219755252202351, "train/loss_math": 2.6963306665420532, "train/loss_prose": 3.488909880320231} +{"step": 568, "train/loss": 2.4301657676696777, "train/lm_loss": 2.4301657676696777, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.997099993771365e-05, "perf/tokens_per_sec": 26469.213026403166, "train/loss_math": 2.569500287373861, "train/loss_code": 1.1779346466064453, "train/loss_prose": 4.099510908126831} +{"step": 569, "train/loss": 2.9675405621528625, "train/lm_loss": 2.9675405621528625, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.997015343353585e-05, "perf/tokens_per_sec": 26002.605099455424, "train/loss_math": 2.8412633736928306, "train/loss_code": 1.9996235370635986, "train/loss_prose": 3.739095767339071} +{"step": 570, "train/loss": 2.7248973548412323, "train/lm_loss": 2.7248973548412323, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.996929475920862e-05, "perf/tokens_per_sec": 26341.93748725442, "train/loss_code": 1.8971927165985107, "train/loss_prose": 3.2354788780212402, "train/loss_math": 2.5314388275146484} +{"step": 571, "train/loss": 2.2632672488689423, "train/lm_loss": 2.2632672488689423, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.996842391515044e-05, "perf/tokens_per_sec": 26827.826170603163, "train/loss_code": 1.6129591166973114, "train/loss_prose": 3.385993719100952, "train/loss_math": 2.441157102584839} +{"step": 572, "train/loss": 2.4862419962882996, "train/lm_loss": 2.4862419962882996, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.996754090178577e-05, "perf/tokens_per_sec": 27220.765869526294, "train/loss_math": 2.630866765975952, "train/loss_code": 1.781521737575531, "train/loss_prose": 3.172558069229126} +{"step": 573, "train/loss": 2.152130365371704, "train/lm_loss": 2.152130365371704, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.996664571954497e-05, "perf/tokens_per_sec": 26070.830621774316, "train/loss_code": 1.6098089814186096, "train/loss_math": 2.694451868534088} +{"step": 574, "train/loss": 2.5557838082313538, "train/lm_loss": 2.5557838082313538, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.996573836886435e-05, "perf/tokens_per_sec": 26480.310219658775, "train/loss_math": 2.5597413380940757, "train/loss_code": 0.9593305587768555, "train/loss_prose": 3.6161282857259116} +{"step": 575, "train/loss": 2.364116072654724, "train/lm_loss": 2.364116072654724, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9964818850186135e-05, "perf/tokens_per_sec": 26348.23985976127, "train/loss_code": 1.8270509243011475, "train/loss_prose": 3.314692974090576, "train/loss_math": 3.148287534713745} +{"step": 576, "train/loss": 2.457448333501816, "train/lm_loss": 2.457448333501816, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.996388716395848e-05, "perf/tokens_per_sec": 26012.0540352301, "train/loss_prose": 3.379510998725891, "train/loss_math": 2.56244820356369, "train/loss_code": 1.3253862261772156} +{"step": 577, "train/loss": 2.7517952919006348, "train/lm_loss": 2.7517952919006348, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.99629433106355e-05, "perf/tokens_per_sec": 27160.17381378856, "train/loss_code": 1.8789115846157074, "train/loss_prose": 3.2614963054656982, "train/loss_math": 2.6052767038345337} +{"step": 578, "train/loss": 2.5846296846866608, "train/lm_loss": 2.5846296846866608, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.996198729067719e-05, "perf/tokens_per_sec": 26145.97904957577, "train/loss_code": 1.923531174659729, "train/loss_math": 2.8049957752227783} +{"step": 579, "train/loss": 2.555351972579956, "train/lm_loss": 2.555351972579956, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.996101910454953e-05, "perf/tokens_per_sec": 26013.82647525109, "train/loss_math": 2.653391218185425, "train/loss_prose": 2.9899749755859375, "train/loss_code": 2.092942476272583} +{"step": 580, "train/loss": 2.4575773775577545, "train/lm_loss": 2.4575773775577545, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.996003875272438e-05, "perf/tokens_per_sec": 26515.089923571755, "train/loss_code": 1.740291953086853, "train/loss_prose": 4.015154838562012, "train/loss_math": 2.6061471104621887} +{"step": 581, "train/loss": 2.3412235379219055, "train/lm_loss": 2.3412235379219055, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9959046235679565e-05, "perf/tokens_per_sec": 26486.474811448163, "train/loss_prose": 2.791403889656067, "train/loss_code": 1.5849913756052654, "train/loss_math": 2.797335465749105} +{"step": 582, "train/loss": 2.7093880772590637, "train/lm_loss": 2.7093880772590637, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.995804155389881e-05, "perf/tokens_per_sec": 26363.725790610883, "train/loss_math": 2.596993160247803, "train/loss_prose": 3.5360172986984253, "train/loss_code": 1.6181045770645142} +{"step": 583, "train/loss": 3.079336106777191, "train/lm_loss": 3.079336106777191, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9957024707871806e-05, "perf/tokens_per_sec": 26055.80532312933, "train/loss_math": 2.7568947315216064, "train/loss_prose": 3.6167381604512534} +{"step": 584, "train/loss": 2.174875110387802, "train/lm_loss": 2.174875110387802, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.995599569809414e-05, "perf/tokens_per_sec": 26216.240129159458, "train/loss_code": 1.4727183878421783, "train/loss_prose": 3.6843624114990234, "train/loss_math": 2.6079216798146567} +{"step": 585, "train/loss": 2.6089836955070496, "train/lm_loss": 2.6089836955070496, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9954954525067334e-05, "perf/tokens_per_sec": 26184.474201695146, "train/loss_prose": 3.089618364969889, "train/loss_code": 1.820276141166687, "train/loss_math": 2.654153744379679} +{"step": 586, "train/loss": 2.171540766954422, "train/lm_loss": 2.171540766954422, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9953901189298845e-05, "perf/tokens_per_sec": 26048.18068295878, "train/loss_code": 1.621828943490982, "train/loss_prose": 3.3552918434143066, "train/loss_math": 2.509906053543091} +{"step": 587, "train/loss": 2.9049158096313477, "train/lm_loss": 2.9049158096313477, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.995283569130207e-05, "perf/tokens_per_sec": 26189.982764481967, "train/loss_code": 2.263213574886322, "train/loss_prose": 3.640751600265503, "train/loss_math": 3.264216899871826} +{"step": 588, "train/loss": 2.995196580886841, "train/lm_loss": 2.995196580886841, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9951758031596304e-05, "perf/tokens_per_sec": 26005.517789241687, "train/loss_code": 1.805249035358429, "train/loss_prose": 3.5898452758789063, "train/loss_math": 2.401848316192627} +{"step": 589, "train/loss": 3.346616566181183, "train/lm_loss": 3.346616566181183, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.995066821070679e-05, "perf/tokens_per_sec": 26323.774447357413, "train/loss_math": 2.6721916993459067, "train/loss_prose": 4.27953314781189, "train/loss_code": 1.6382242441177368} +{"step": 590, "train/loss": 2.6058140993118286, "train/lm_loss": 2.6058140993118286, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9949566229164704e-05, "perf/tokens_per_sec": 26130.58881577947, "train/loss_code": 1.7955394089221954, "train/loss_math": 2.679586887359619, "train/loss_prose": 3.661588986714681} +{"step": 591, "train/loss": 2.773711860179901, "train/lm_loss": 2.773711860179901, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9948452087507116e-05, "perf/tokens_per_sec": 26161.18823255311, "train/loss_prose": 3.454776346683502, "train/loss_code": 1.2429839968681335, "train/loss_math": 2.942310333251953} +{"step": 592, "train/loss": 2.4954515397548676, "train/lm_loss": 2.4954515397548676, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.994732578627706e-05, "perf/tokens_per_sec": 26070.593245570773, "train/loss_code": 1.7105848491191864, "train/loss_prose": 3.63904345035553, "train/loss_math": 2.9215930700302124} +{"step": 593, "train/loss": 2.4702289700508118, "train/lm_loss": 2.4702289700508118, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.994618732602349e-05, "perf/tokens_per_sec": 26130.35035013111, "train/loss_math": 2.5736693382263183, "train/loss_code": 1.6009684205055237, "train/loss_prose": 3.6915481090545654} +{"step": 594, "train/loss": 2.103965014219284, "train/lm_loss": 2.103965014219284, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.994503670730125e-05, "perf/tokens_per_sec": 25536.93248863613, "train/loss_math": 2.536661386489868, "train/loss_code": 1.1116279661655426, "train/loss_prose": 3.6559430360794067} +{"step": 595, "train/loss": 2.4428374469280243, "train/lm_loss": 2.4428374469280243, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.994387393067117e-05, "perf/tokens_per_sec": 26104.661319070376, "train/loss_math": 2.784673055013021, "train/loss_code": 1.8772405683994293, "train/loss_prose": 3.679717540740967} +{"step": 596, "train/loss": 2.8934661746025085, "train/lm_loss": 2.8934661746025085, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.994269899669994e-05, "perf/tokens_per_sec": 26496.973467351254, "train/loss_math": 2.7006365060806274, "train/loss_prose": 3.6058905919392905, "train/loss_code": 1.5275121927261353} +{"step": 597, "train/loss": 3.3571730852127075, "train/lm_loss": 3.3571730852127075, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.994151190596025e-05, "perf/tokens_per_sec": 26001.660583422377, "train/loss_math": 2.7669392426808677, "train/loss_prose": 3.7113131046295167} +{"step": 598, "train/loss": 2.808172583580017, "train/lm_loss": 2.808172583580017, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.994031265903063e-05, "perf/tokens_per_sec": 26356.728586529123, "train/loss_prose": 3.646654963493347, "train/loss_math": 2.695460796356201, "train/loss_code": 1.7277664343516033} +{"step": 599, "train/loss": 2.6927810609340668, "train/lm_loss": 2.6927810609340668, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.993910125649561e-05, "perf/tokens_per_sec": 26257.99237014575, "train/loss_prose": 3.538697083791097, "train/loss_math": 2.600450277328491, "train/loss_code": 1.9084189335505168} +{"step": 600, "train/loss": 2.6566343307495117, "train/lm_loss": 2.6566343307495117, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.993787769894559e-05, "perf/tokens_per_sec": 26088.131152320617, "train/loss_code": 1.6757959922154744, "train/loss_math": 2.6267049312591553, "train/loss_prose": 3.657425800959269} +{"step": 600, "eval/loss": 2.37237510053997, "eval/lm_loss": 2.37237510053997, "eval/lb_loss": 0.0, "eval/z_loss": 0.0, "eval/perplexity": 10.722829862746694, "eval/loss_code": 1.7043228916925928, "eval/ppl_code": 5.497661894115097, "eval/loss_prose": 3.5021602783286783, "eval/ppl_prose": 33.187067879212165, "eval/loss_math": 2.492123819401174, "eval/ppl_math": 12.086919313789133} +{"step": 601, "train/loss": 2.8384892344474792, "train/lm_loss": 2.8384892344474792, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.993664198697694e-05, "perf/tokens_per_sec": 26425.770681630103, "train/loss_prose": 3.4712015787760415, "train/loss_code": 2.1005112528800964, "train/loss_math": 2.697762409845988} +{"step": 602, "train/loss": 2.5611168146133423, "train/lm_loss": 2.5611168146133423, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9935394121191915e-05, "perf/tokens_per_sec": 26113.073179480867, "train/loss_math": 2.7066477366856168, "train/loss_code": 1.5424007177352905} +{"step": 603, "train/loss": 2.9629388451576233, "train/lm_loss": 2.9629388451576233, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.993413410219871e-05, "perf/tokens_per_sec": 26014.45673435827, "train/loss_prose": 3.6338586807250977, "train/loss_code": 2.1720076402028403, "train/loss_math": 2.652052879333496} +{"step": 604, "train/loss": 2.4441215693950653, "train/lm_loss": 2.4441215693950653, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9932861930611454e-05, "perf/tokens_per_sec": 26134.64339783376, "train/loss_code": 1.6743878722190857, "train/loss_math": 2.5078357219696046, "train/loss_prose": 3.6650187969207764} +{"step": 605, "train/loss": 2.757684648036957, "train/lm_loss": 2.757684648036957, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9931577607050175e-05, "perf/tokens_per_sec": 26084.605841599343, "train/loss_math": 2.74129319190979, "train/loss_prose": 3.5945072174072266, "train/loss_code": 1.6473848422368367} +{"step": 606, "train/loss": 2.733545631170273, "train/lm_loss": 2.733545631170273, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9930281132140846e-05, "perf/tokens_per_sec": 26395.806728370022, "train/loss_code": 1.609984318415324, "train/loss_math": 2.543905258178711, "train/loss_prose": 3.6236268281936646} +{"step": 607, "train/loss": 2.395003706216812, "train/lm_loss": 2.395003706216812, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.992897250651535e-05, "perf/tokens_per_sec": 25974.29943545534, "train/loss_code": 1.6422450244426727, "train/loss_math": 2.4833285808563232, "train/loss_prose": 3.812196373939514} +{"step": 608, "train/loss": 2.7030282020568848, "train/lm_loss": 2.7030282020568848, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9927651730811495e-05, "perf/tokens_per_sec": 26366.841348064907, "train/loss_prose": 3.528085947036743, "train/loss_math": 2.7322280406951904, "train/loss_code": 1.819570004940033} +{"step": 609, "train/loss": 2.8212637305259705, "train/lm_loss": 2.8212637305259705, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.992631880567301e-05, "perf/tokens_per_sec": 26006.73816404352, "train/loss_code": 1.7729613184928894, "train/loss_math": 2.5978950659434, "train/loss_prose": 3.7435006300608316} +{"step": 610, "train/loss": 2.0600289404392242, "train/lm_loss": 2.0600289404392242, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.992497373174955e-05, "perf/tokens_per_sec": 26037.67942189243, "train/loss_code": 1.6029842853546143, "train/loss_prose": 3.6834473609924316, "train/loss_math": 2.3909316062927246} +{"step": 611, "train/loss": 2.841959536075592, "train/lm_loss": 2.841959536075592, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9923616509696683e-05, "perf/tokens_per_sec": 26240.94685633201, "train/loss_math": 2.779318904876709, "train/loss_prose": 3.573670744895935, "train/loss_code": 1.6917412281036377} +{"step": 612, "train/loss": 2.4564678370952606, "train/lm_loss": 2.4564678370952606, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.99222471401759e-05, "perf/tokens_per_sec": 26139.295791663055, "train/loss_math": 2.717006266117096, "train/loss_code": 1.6881190538406372, "train/loss_prose": 3.719359874725342} +{"step": 613, "train/loss": 2.6860194206237793, "train/lm_loss": 2.6860194206237793, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9920865623854615e-05, "perf/tokens_per_sec": 25395.449455575363, "train/loss_math": 2.6436736583709717, "train/loss_prose": 3.5392491817474365, "train/loss_code": 1.1912891864776611} +{"step": 614, "train/loss": 2.071913242340088, "train/lm_loss": 2.071913242340088, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.991947196140618e-05, "perf/tokens_per_sec": 26429.754752161472, "train/loss_code": 1.2267450243234634, "train/loss_math": 2.589665412902832, "train/loss_prose": 3.244497299194336} +{"step": 615, "train/loss": 2.420325458049774, "train/lm_loss": 2.420325458049774, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9918066153509834e-05, "perf/tokens_per_sec": 26371.091022401783, "train/loss_code": 1.6548528671264648, "train/loss_math": 2.8058040142059326, "train/loss_prose": 3.1748290061950684} +{"step": 616, "train/loss": 2.888494312763214, "train/lm_loss": 2.888494312763214, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.991664820085074e-05, "perf/tokens_per_sec": 26168.121842627555, "train/loss_code": 1.719585696856181, "train/loss_prose": 3.889307379722595, "train/loss_math": 2.3919677734375} +{"step": 617, "train/loss": 2.078054815530777, "train/lm_loss": 2.078054815530777, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.991521810412002e-05, "perf/tokens_per_sec": 25953.77712515164, "train/loss_math": 2.5674416422843933, "train/loss_code": 1.5886681824922562} +{"step": 618, "train/loss": 2.303607940673828, "train/lm_loss": 2.303607940673828, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9913775864014665e-05, "perf/tokens_per_sec": 25898.649557548804, "train/loss_prose": 3.3546940088272095, "train/loss_code": 1.2099618514378865, "train/loss_math": 2.696530024210612} +{"step": 619, "train/loss": 2.8693443536758423, "train/lm_loss": 2.8693443536758423, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.991232148123761e-05, "perf/tokens_per_sec": 25002.79309230729, "train/loss_math": 2.855513652165731, "train/loss_code": 2.0500833988189697, "train/loss_prose": 3.4293492635091147} +{"step": 620, "train/loss": 3.1635772585868835, "train/lm_loss": 3.1635772585868835, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9910854956497696e-05, "perf/tokens_per_sec": 26004.612423881437, "train/loss_math": 2.719054841995239, "train/loss_prose": 3.904447635014852} +{"step": 621, "train/loss": 3.109932541847229, "train/lm_loss": 3.109932541847229, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.990937629050971e-05, "perf/tokens_per_sec": 26070.474559089984, "train/loss_prose": 3.5251389026641844, "train/loss_code": 1.954653263092041, "train/loss_math": 2.6495563983917236} +{"step": 622, "train/loss": 2.951603889465332, "train/lm_loss": 2.951603889465332, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.990788548399432e-05, "perf/tokens_per_sec": 25965.348817947965, "train/loss_math": 2.611456871032715, "train/loss_prose": 3.7501602172851562, "train/loss_code": 1.6946377754211426} +{"step": 623, "train/loss": 3.02937114238739, "train/lm_loss": 3.02937114238739, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.990638253767812e-05, "perf/tokens_per_sec": 24940.79663659618, "train/loss_prose": 3.838037073612213, "train/loss_math": 2.4915716648101807, "train/loss_code": 1.4081058502197266} +{"step": 624, "train/loss": 2.969350039958954, "train/lm_loss": 2.969350039958954, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.990486745229364e-05, "perf/tokens_per_sec": 25283.364484991016, "train/loss_math": 2.4923564195632935, "train/loss_prose": 3.688361883163452, "train/loss_code": 2.0083200335502625} +{"step": 625, "train/loss": 3.02388858795166, "train/lm_loss": 3.02388858795166, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.990334022857932e-05, "perf/tokens_per_sec": 25301.5356057761, "train/loss_math": 2.6600857377052307, "train/loss_prose": 3.3876911997795105} +{"step": 626, "train/loss": 2.9920076727867126, "train/lm_loss": 2.9920076727867126, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.990180086727949e-05, "perf/tokens_per_sec": 26509.484672062215, "train/loss_prose": 3.5880818963050842, "train/loss_math": 2.710301081339518, "train/loss_code": 1.4528297185897827} +{"step": 627, "train/loss": 2.798568904399872, "train/lm_loss": 2.798568904399872, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9900249369144434e-05, "perf/tokens_per_sec": 25460.78621372403, "train/loss_prose": 3.6175971825917563, "train/loss_math": 2.7639506657918296, "train/loss_code": 1.6219537854194641} +{"step": 628, "train/loss": 3.075594425201416, "train/lm_loss": 3.075594425201416, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.989868573493032e-05, "perf/tokens_per_sec": 26396.57730583848, "train/loss_math": 2.8790162801742554, "train/loss_prose": 3.94429620107015, "train/loss_code": 1.2558022737503052} +{"step": 629, "train/loss": 2.6838988065719604, "train/lm_loss": 2.6838988065719604, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.989710996539926e-05, "perf/tokens_per_sec": 25756.461928720924, "train/loss_code": 1.7324410378932953, "train/loss_prose": 3.635356366634369} +{"step": 630, "train/loss": 2.736118733882904, "train/lm_loss": 2.736118733882904, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.989552206131925e-05, "perf/tokens_per_sec": 25686.98892370996, "train/loss_math": 2.5706337690353394, "train/loss_code": 1.595743179321289, "train/loss_prose": 3.336890379587809} +{"step": 631, "train/loss": 1.7739014327526093, "train/lm_loss": 1.7739014327526093, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9893922023464236e-05, "perf/tokens_per_sec": 25497.55364693485, "train/loss_code": 1.6724901454789298, "train/loss_math": 2.4837801456451416} +{"step": 632, "train/loss": 2.8015055060386658, "train/lm_loss": 2.8015055060386658, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.989230985261403e-05, "perf/tokens_per_sec": 26974.498360007095, "train/loss_math": 2.665287812550863, "train/loss_prose": 3.864102602005005, "train/loss_code": 2.5562143325805664} +{"step": 633, "train/loss": 2.418853849172592, "train/lm_loss": 2.418853849172592, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.989068554955439e-05, "perf/tokens_per_sec": 25584.774685474647, "train/loss_code": 1.5492322742938995, "train/loss_prose": 4.653988838195801, "train/loss_math": 2.833304484685262} +{"step": 634, "train/loss": 2.1839448511600494, "train/lm_loss": 2.1839448511600494, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9889049115077005e-05, "perf/tokens_per_sec": 24460.134779501168, "train/loss_math": 2.4843307018280028, "train/loss_code": 1.6833017667134602} +{"step": 635, "train/loss": 2.856775164604187, "train/lm_loss": 2.856775164604187, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.988740054997943e-05, "perf/tokens_per_sec": 26038.58709124942, "train/loss_code": 2.2946082750956216, "train/loss_math": 2.8334975242614746, "train/loss_prose": 3.734942078590393} +{"step": 636, "train/loss": 2.411760240793228, "train/lm_loss": 2.411760240793228, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.988573985506516e-05, "perf/tokens_per_sec": 25543.76698197649, "train/loss_prose": 3.4261120557785034, "train/loss_math": 2.6688860257466636, "train/loss_code": 1.4783997535705566} +{"step": 637, "train/loss": 2.349571853876114, "train/lm_loss": 2.349571853876114, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.98840670311436e-05, "perf/tokens_per_sec": 26108.82689320831, "train/loss_math": 2.73148512840271, "train/loss_code": 1.7721912264823914, "train/loss_prose": 3.513354539871216} +{"step": 638, "train/loss": 2.6848074197769165, "train/lm_loss": 2.6848074197769165, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9882382079030064e-05, "perf/tokens_per_sec": 25642.552608679427, "train/loss_math": 2.4835116863250732, "train/loss_prose": 3.468569278717041, "train/loss_code": 2.303636848926544} +{"step": 639, "train/loss": 2.725620210170746, "train/lm_loss": 2.725620210170746, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.988068499954578e-05, "perf/tokens_per_sec": 25596.362821521372, "train/loss_code": 2.100024700164795, "train/loss_math": 2.564555048942566, "train/loss_prose": 3.6733455657958984} +{"step": 640, "train/loss": 2.7581984996795654, "train/lm_loss": 2.7581984996795654, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.987897579351788e-05, "perf/tokens_per_sec": 26066.321212958002, "train/loss_prose": 3.3641724586486816, "train/loss_code": 2.1121314764022827, "train/loss_math": 2.5829355716705322} +{"step": 641, "train/loss": 2.431885063648224, "train/lm_loss": 2.431885063648224, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.987725446177941e-05, "perf/tokens_per_sec": 25780.773495122176, "train/loss_code": 1.4375649094581604, "train/loss_prose": 3.117910146713257, "train/loss_math": 2.6924081802368165} +{"step": 642, "train/loss": 2.648668020963669, "train/lm_loss": 2.648668020963669, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.987552100516933e-05, "perf/tokens_per_sec": 27068.63824490529, "train/loss_code": 1.9761316776275635, "train/loss_math": 2.7036617596944175, "train/loss_prose": 3.5749822854995728} +{"step": 643, "train/loss": 2.673859417438507, "train/lm_loss": 2.673859417438507, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.987377542453251e-05, "perf/tokens_per_sec": 26355.5155764124, "train/loss_math": 2.688342730204264, "train/loss_prose": 3.4407015641530356, "train/loss_code": 1.5018714368343353} +{"step": 644, "train/loss": 2.7061354517936707, "train/lm_loss": 2.7061354517936707, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.987201772071971e-05, "perf/tokens_per_sec": 25786.113492784185, "train/loss_code": 2.0159666538238525, "train/loss_math": 2.553957462310791, "train/loss_prose": 3.677086353302002} +{"step": 645, "train/loss": 1.8778253495693207, "train/lm_loss": 1.8778253495693207, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.987024789458762e-05, "perf/tokens_per_sec": 26516.113035438946, "train/loss_code": 1.4991257190704346, "train/loss_prose": 4.528722763061523} +{"step": 646, "train/loss": 3.0052163302898407, "train/lm_loss": 3.0052163302898407, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.986846594699883e-05, "perf/tokens_per_sec": 26417.237430957917, "train/loss_prose": 3.7508280873298645, "train/loss_code": 1.8957263231277466, "train/loss_math": 2.6234830617904663} +{"step": 647, "train/loss": 2.8991525173187256, "train/lm_loss": 2.8991525173187256, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.986667187882186e-05, "perf/tokens_per_sec": 25793.85654723029, "train/loss_code": 1.6370236277580261, "train/loss_prose": 3.4348697662353516, "train/loss_math": 2.7448246479034424} +{"step": 648, "train/loss": 2.4839888215065002, "train/lm_loss": 2.4839888215065002, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9864865690931086e-05, "perf/tokens_per_sec": 26967.088574532074, "train/loss_code": 1.8793805539608002, "train/loss_math": 2.394684314727783, "train/loss_prose": 3.7825101613998413} +{"step": 649, "train/loss": 2.1821784377098083, "train/lm_loss": 2.1821784377098083, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9863047384206835e-05, "perf/tokens_per_sec": 25585.87967992089, "train/loss_code": 1.5716682523488998, "train/loss_math": 2.351684014002482, "train/loss_prose": 4.115703105926514} +{"step": 650, "train/loss": 2.067381650209427, "train/lm_loss": 2.067381650209427, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9861216959535335e-05, "perf/tokens_per_sec": 26025.60949169386, "train/loss_prose": 4.266674995422363, "train/loss_code": 1.3271725535392762, "train/loss_math": 2.818257451057434} +{"step": 651, "train/loss": 2.282953679561615, "train/lm_loss": 2.282953679561615, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.98593744178087e-05, "perf/tokens_per_sec": 25582.10785069942, "train/loss_math": 2.4135955174764, "train/loss_prose": 3.5662271976470947, "train/loss_code": 1.2967963218688965} +{"step": 652, "train/loss": 2.475156009197235, "train/lm_loss": 2.475156009197235, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9857519759924974e-05, "perf/tokens_per_sec": 25429.882536139423, "train/loss_prose": 3.373016834259033, "train/loss_code": 1.9607349932193756, "train/loss_math": 1.8392572402954102} +{"step": 653, "train/loss": 2.92531156539917, "train/lm_loss": 2.92531156539917, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.985565298678809e-05, "perf/tokens_per_sec": 26229.688911688638, "train/loss_prose": 4.025416135787964, "train/loss_math": 2.670049858093262, "train/loss_code": 2.001410961151123} +{"step": 654, "train/loss": 2.8657463788986206, "train/lm_loss": 2.8657463788986206, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.985377409930789e-05, "perf/tokens_per_sec": 25009.45385516825, "train/loss_math": 2.6204419136047363, "train/loss_prose": 3.534736752510071, "train/loss_code": 0.9256982207298279} +{"step": 655, "train/loss": 2.633489727973938, "train/lm_loss": 2.633489727973938, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.985188309840012e-05, "perf/tokens_per_sec": 24558.24720822922, "train/loss_math": 2.7889021039009094, "train/loss_prose": 4.085367202758789, "train/loss_code": 1.9423139095306396} +{"step": 656, "train/loss": 2.6354002952575684, "train/lm_loss": 2.6354002952575684, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9849979984986426e-05, "perf/tokens_per_sec": 25433.609607404524, "train/loss_code": 1.9536594152450562, "train/loss_prose": 3.68220853805542, "train/loss_math": 2.6192692120869956} +{"step": 657, "train/loss": 2.6901947259902954, "train/lm_loss": 2.6901947259902954, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.984806475999437e-05, "perf/tokens_per_sec": 27077.981688336047, "train/loss_prose": 3.126902222633362, "train/loss_math": 2.7316381454467775, "train/loss_code": 1.6095627546310425} +{"step": 658, "train/loss": 2.460145741701126, "train/lm_loss": 2.460145741701126, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.984613742435742e-05, "perf/tokens_per_sec": 25319.06118917963, "train/loss_math": 2.519049072265625, "train/loss_code": 1.5706377625465393, "train/loss_prose": 3.944643974304199} +{"step": 659, "train/loss": 2.455163300037384, "train/lm_loss": 2.455163300037384, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.984419797901491e-05, "perf/tokens_per_sec": 25983.885004552507, "train/loss_math": 2.576291561126709, "train/loss_code": 1.6295861601829529, "train/loss_prose": 3.985189199447632} +{"step": 660, "train/loss": 2.394431322813034, "train/lm_loss": 2.394431322813034, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.984224642491212e-05, "perf/tokens_per_sec": 26452.788317571376, "train/loss_prose": 3.087028682231903, "train/loss_code": 1.0939478278160095, "train/loss_math": 2.3097198009490967} +{"step": 661, "train/loss": 2.545448422431946, "train/lm_loss": 2.545448422431946, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.984028276300021e-05, "perf/tokens_per_sec": 25606.472888679556, "train/loss_code": 1.4601191679636638, "train/loss_prose": 3.5017894903818765, "train/loss_math": 2.738930344581604} +{"step": 662, "train/loss": 1.9036200642585754, "train/lm_loss": 1.9036200642585754, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.983830699423625e-05, "perf/tokens_per_sec": 25638.381432775594, "train/loss_math": 2.5217838287353516, "train/loss_code": 1.2854563891887665} +{"step": 663, "train/loss": 2.455796957015991, "train/lm_loss": 2.455796957015991, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.983631911958319e-05, "perf/tokens_per_sec": 25915.995908922232, "train/loss_math": 2.536093533039093, "train/loss_code": 1.8702277541160583, "train/loss_prose": 2.8807730674743652} +{"step": 664, "train/loss": 2.3184691667556763, "train/lm_loss": 2.3184691667556763, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.983431914000991e-05, "perf/tokens_per_sec": 25749.08863419379, "train/loss_prose": 3.434624671936035, "train/loss_code": 1.5565255284309387, "train/loss_math": 2.7262006998062134} +{"step": 665, "train/loss": 2.64976105093956, "train/lm_loss": 2.64976105093956, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.983230705649118e-05, "perf/tokens_per_sec": 25455.089773584292, "train/loss_prose": 3.575785239537557, "train/loss_math": 2.8889737129211426, "train/loss_code": 1.8954396843910217} +{"step": 666, "train/loss": 2.46875461935997, "train/lm_loss": 2.46875461935997, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9830282870007646e-05, "perf/tokens_per_sec": 26424.551308317492, "train/loss_math": 2.464564621448517, "train/loss_code": 1.7975937922795613, "train/loss_prose": 4.498998165130615} +{"step": 667, "train/loss": 2.4992920458316803, "train/lm_loss": 2.4992920458316803, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.982824658154589e-05, "perf/tokens_per_sec": 26598.012695306454, "train/loss_prose": 3.456606149673462, "train/loss_code": 1.1510806481043498, "train/loss_math": 2.7146711349487305} +{"step": 668, "train/loss": 3.3879387974739075, "train/lm_loss": 3.3879387974739075, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.982619819209836e-05, "perf/tokens_per_sec": 25381.41732804624, "train/loss_math": 2.554387927055359, "train/loss_prose": 3.665789087613424} +{"step": 669, "train/loss": 2.706434190273285, "train/lm_loss": 2.706434190273285, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.982413770266342e-05, "perf/tokens_per_sec": 25993.045043354854, "train/loss_math": 2.385991334915161, "train/loss_code": 1.9418460726737976, "train/loss_prose": 3.8326990604400635} +{"step": 670, "train/loss": 3.035508394241333, "train/lm_loss": 3.035508394241333, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.982206511424534e-05, "perf/tokens_per_sec": 25520.468795864406, "train/loss_prose": 3.7925455570220947, "train/loss_math": 2.7571909427642822, "train/loss_code": 2.1188979546229043} +{"step": 671, "train/loss": 2.8464449644088745, "train/lm_loss": 2.8464449644088745, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.981998042785427e-05, "perf/tokens_per_sec": 26827.574809371297, "train/loss_math": 2.6867125829060874, "train/loss_prose": 3.4346395333607993, "train/loss_code": 2.203751504421234} +{"step": 672, "train/loss": 2.18911612033844, "train/lm_loss": 2.18911612033844, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.981788364450625e-05, "perf/tokens_per_sec": 26213.76001562462, "train/loss_code": 1.6116580963134766, "train/loss_math": 2.446821093559265, "train/loss_prose": 2.8906688690185547} +{"step": 673, "train/loss": 2.5650708079338074, "train/lm_loss": 2.5650708079338074, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9815774765223226e-05, "perf/tokens_per_sec": 25700.78431970942, "train/loss_math": 2.6130669116973877, "train/loss_code": 2.229098081588745} +{"step": 674, "train/loss": 2.69290554523468, "train/lm_loss": 2.69290554523468, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9813653791033057e-05, "perf/tokens_per_sec": 26631.740042474692, "train/loss_prose": 3.625385363896688, "train/loss_code": 1.5907751321792603, "train/loss_math": 2.4951792558034263} +{"step": 675, "train/loss": 3.1871290802955627, "train/lm_loss": 3.1871290802955627, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9811520722969465e-05, "perf/tokens_per_sec": 26004.533699437976, "train/loss_math": 2.746403535207113, "train/loss_prose": 3.832427740097046, "train/loss_code": 1.9281114339828491} +{"step": 676, "train/loss": 2.449439823627472, "train/lm_loss": 2.449439823627472, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.980937556207208e-05, "perf/tokens_per_sec": 26193.217130921345, "train/loss_prose": 3.6167949438095093, "train/loss_code": 1.5379473368326824, "train/loss_math": 2.5826953252156577} +{"step": 677, "train/loss": 2.33100426197052, "train/lm_loss": 2.33100426197052, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9807218309386444e-05, "perf/tokens_per_sec": 25485.41206279131, "train/loss_code": 1.5984804928302765, "train/loss_math": 2.495486259460449, "train/loss_prose": 3.631569504737854} +{"step": 678, "train/loss": 2.583278477191925, "train/lm_loss": 2.583278477191925, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.980504896596396e-05, "perf/tokens_per_sec": 26012.329732258713, "train/loss_math": 2.5711022218068442, "train/loss_code": 1.6004378000895183, "train/loss_prose": 4.075804829597473} +{"step": 679, "train/loss": 2.7883611917495728, "train/lm_loss": 2.7883611917495728, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.980286753286195e-05, "perf/tokens_per_sec": 25944.488013796894, "train/loss_math": 3.0812830924987793, "train/loss_code": 1.617889642715454, "train/loss_prose": 3.592984139919281} +{"step": 680, "train/loss": 1.8019006550312042, "train/lm_loss": 1.8019006550312042, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9800674011143614e-05, "perf/tokens_per_sec": 26082.467353398868, "train/loss_code": 1.3885969817638397, "train/loss_math": 2.456862211227417, "train/loss_prose": 3.6267611980438232} +{"step": 681, "train/loss": 2.9059877395629883, "train/lm_loss": 2.9059877395629883, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.979846840187804e-05, "perf/tokens_per_sec": 25926.555731955403, "train/loss_prose": 3.5659082730611167, "train/loss_code": 2.281404674053192, "train/loss_math": 2.6624557971954346} +{"step": 682, "train/loss": 2.6704857647418976, "train/lm_loss": 2.6704857647418976, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9796250706140224e-05, "perf/tokens_per_sec": 25922.2525432066, "train/loss_prose": 3.962817351023356, "train/loss_code": 1.429149905840556, "train/loss_math": 2.5939918756484985} +{"step": 683, "train/loss": 2.0271985828876495, "train/lm_loss": 2.0271985828876495, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9794020925011044e-05, "perf/tokens_per_sec": 26600.071817644126, "train/loss_code": 1.4800259828567506, "train/loss_math": 2.5743783712387085, "train/loss_prose": 3.6687021255493164} +{"step": 684, "train/loss": 2.8239076137542725, "train/lm_loss": 2.8239076137542725, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.979177905957726e-05, "perf/tokens_per_sec": 25848.110627144935, "train/loss_code": 1.7058165868123372, "train/loss_math": 2.57199490070343, "train/loss_prose": 4.109940608342488} +{"step": 685, "train/loss": 2.3394337594509125, "train/lm_loss": 2.3394337594509125, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9789525110931545e-05, "perf/tokens_per_sec": 26086.34857275611, "train/loss_math": 2.49106502532959, "train/loss_prose": 3.1704261302948, "train/loss_code": 1.20517897605896} +{"step": 686, "train/loss": 2.7013968527317047, "train/lm_loss": 2.7013968527317047, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.978725908017243e-05, "perf/tokens_per_sec": 25348.01284230406, "train/loss_prose": 3.731083869934082, "train/loss_math": 2.516561190287272, "train/loss_code": 1.4341195821762085} +{"step": 687, "train/loss": 2.4209239780902863, "train/lm_loss": 2.4209239780902863, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.978498096840436e-05, "perf/tokens_per_sec": 26249.847868902554, "train/loss_code": 1.3849025219678879, "train/loss_prose": 3.456945538520813} +{"step": 688, "train/loss": 2.4583787322044373, "train/lm_loss": 2.4583787322044373, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.978269077673767e-05, "perf/tokens_per_sec": 26308.375688915636, "train/loss_math": 2.862023949623108, "train/loss_code": 1.6654661893844604, "train/loss_prose": 3.222536563873291} +{"step": 689, "train/loss": 2.6637792587280273, "train/lm_loss": 2.6637792587280273, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.978038850628854e-05, "perf/tokens_per_sec": 25984.906834778296, "train/loss_code": 1.8912673592567444, "train/loss_prose": 4.031837701797485, "train/loss_math": 2.8407446146011353} +{"step": 690, "train/loss": 2.556469202041626, "train/lm_loss": 2.556469202041626, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.97780741581791e-05, "perf/tokens_per_sec": 25987.61900468779, "train/loss_math": 2.4538235664367676, "train/loss_code": 1.637254277865092, "train/loss_prose": 3.271542012691498} +{"step": 691, "train/loss": 2.6096556782722473, "train/lm_loss": 2.6096556782722473, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.977574773353732e-05, "perf/tokens_per_sec": 26596.07741096516, "train/loss_code": 1.3985950350761414, "train/loss_prose": 3.32097061475118, "train/loss_math": 2.705714464187622} +{"step": 692, "train/loss": 3.375230610370636, "train/lm_loss": 3.375230610370636, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.977340923349707e-05, "perf/tokens_per_sec": 25687.296180667694, "train/loss_prose": 3.964278221130371, "train/loss_math": 2.6877158880233765, "train/loss_code": 1.8050203323364258} +{"step": 693, "train/loss": 2.083853930234909, "train/lm_loss": 2.083853930234909, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.977105865919812e-05, "perf/tokens_per_sec": 26211.560307644893, "train/loss_code": 1.6249224185943603, "train/loss_prose": 3.2970149517059326, "train/loss_math": 2.624602198600769} +{"step": 694, "train/loss": 2.1603469252586365, "train/lm_loss": 2.1603469252586365, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.976869601178609e-05, "perf/tokens_per_sec": 26237.21984758472, "train/loss_prose": 3.363853335380554, "train/loss_code": 1.0432559251785278, "train/loss_math": 2.47510035832723} +{"step": 695, "train/loss": 2.6639519035816193, "train/lm_loss": 2.6639519035816193, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.976632129241252e-05, "perf/tokens_per_sec": 26190.5018065181, "train/loss_prose": 3.4478246569633484, "train/loss_code": 1.8800792396068573} +{"step": 696, "train/loss": 3.0948997139930725, "train/lm_loss": 3.0948997139930725, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.976393450223482e-05, "perf/tokens_per_sec": 26362.512136361824, "train/loss_prose": 3.5031973838806154, "train/loss_code": 2.7167882919311523, "train/loss_math": 2.2632108330726624} +{"step": 697, "train/loss": 2.21385857462883, "train/lm_loss": 2.21385857462883, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.976153564241628e-05, "perf/tokens_per_sec": 25695.787061369723, "train/loss_prose": 3.6659064292907715, "train/loss_code": 1.6914156198501586, "train/loss_math": 2.7939422130584717} +{"step": 698, "train/loss": 3.1549258828163147, "train/lm_loss": 3.1549258828163147, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.975912471412607e-05, "perf/tokens_per_sec": 26979.708817940544, "train/loss_prose": 4.286638577779134, "train/loss_math": 2.7023860613505044, "train/loss_code": 2.1361671686172485} +{"step": 699, "train/loss": 2.8016862273216248, "train/lm_loss": 2.8016862273216248, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.975670171853926e-05, "perf/tokens_per_sec": 27052.441169153055, "train/loss_code": 2.4230019251505532, "train/loss_prose": 3.931938648223877, "train/loss_math": 2.8031359910964966} +{"step": 700, "train/loss": 2.735502064228058, "train/lm_loss": 2.735502064228058, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.975426665683678e-05, "perf/tokens_per_sec": 27450.89668924965, "train/loss_prose": 3.7960476875305176, "train/loss_code": 1.6922885179519653, "train/loss_math": 2.7095038890838623} +{"step": 701, "train/loss": 2.315721958875656, "train/lm_loss": 2.315721958875656, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.975181953020544e-05, "perf/tokens_per_sec": 26687.92165683856, "train/loss_math": 2.448947270711263, "train/loss_code": 1.4501699606577556, "train/loss_prose": 3.4142119884490967} +{"step": 702, "train/loss": 2.667835086584091, "train/lm_loss": 2.667835086584091, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.974936033983795e-05, "perf/tokens_per_sec": 26475.12765177083, "train/loss_math": 2.5061197757720945, "train/loss_prose": 3.762399435043335, "train/loss_code": 1.2872833013534546} +{"step": 703, "train/loss": 2.666661858558655, "train/lm_loss": 2.666661858558655, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9746889086932895e-05, "perf/tokens_per_sec": 25920.961865116427, "train/loss_math": 2.7500593066215515, "train/loss_prose": 3.704716682434082, "train/loss_code": 1.4618121981620789} +{"step": 704, "train/loss": 2.889211416244507, "train/lm_loss": 2.889211416244507, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9744405772694725e-05, "perf/tokens_per_sec": 24446.803867690272, "train/loss_prose": 3.836288630962372, "train/loss_math": 2.561846375465393, "train/loss_code": 1.3224219977855682} +{"step": 705, "train/loss": 2.825834274291992, "train/lm_loss": 2.825834274291992, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.974191039833378e-05, "perf/tokens_per_sec": 25628.055003774127, "train/loss_math": 2.5052599906921387, "train/loss_prose": 3.7875566482543945} +{"step": 706, "train/loss": 2.896843731403351, "train/lm_loss": 2.896843731403351, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9739402965066276e-05, "perf/tokens_per_sec": 25498.537583913534, "train/loss_prose": 3.902226289113363, "train/loss_code": 1.6664283871650696, "train/loss_math": 2.711737950642904} +{"step": 707, "train/loss": 2.7600908875465393, "train/lm_loss": 2.7600908875465393, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.973688347411431e-05, "perf/tokens_per_sec": 25475.05736231798, "train/loss_math": 2.538904905319214, "train/loss_code": 1.8979892333348591, "train/loss_prose": 3.769650141398112} +{"step": 708, "train/loss": 2.9472344517707825, "train/lm_loss": 2.9472344517707825, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.973435192670584e-05, "perf/tokens_per_sec": 25613.039169761476, "train/loss_code": 1.8394973278045654, "train/loss_math": 2.541407585144043, "train/loss_prose": 4.091552257537842} +{"step": 709, "train/loss": 2.190660923719406, "train/lm_loss": 2.190660923719406, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9731808324074717e-05, "perf/tokens_per_sec": 26579.700541345053, "train/loss_code": 1.858810591697693, "train/loss_math": 2.5285998582839966, "train/loss_prose": 3.174034595489502} +{"step": 710, "train/loss": 2.765025556087494, "train/lm_loss": 2.765025556087494, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9729252667460655e-05, "perf/tokens_per_sec": 26751.836958925185, "train/loss_math": 2.6538219451904297, "train/loss_prose": 3.533814032872518, "train/loss_code": 1.7786478996276855} +{"step": 711, "train/loss": 2.685956835746765, "train/lm_loss": 2.685956835746765, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9726684958109266e-05, "perf/tokens_per_sec": 26218.32058626931, "train/loss_math": 2.5873464345932007, "train/loss_code": 1.8828751742839813, "train/loss_prose": 4.390730500221252} +{"step": 712, "train/loss": 2.819898694753647, "train/lm_loss": 2.819898694753647, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.972410519727201e-05, "perf/tokens_per_sec": 27122.354932975068, "train/loss_code": 1.6750198900699615, "train/loss_prose": 3.964777112007141} +{"step": 713, "train/loss": 2.4020888209342957, "train/lm_loss": 2.4020888209342957, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.972151338620623e-05, "perf/tokens_per_sec": 26395.2795170764, "train/loss_code": 1.7840688526630402, "train/loss_prose": 3.233715057373047, "train/loss_math": 2.8065025806427} +{"step": 714, "train/loss": 2.9900174140930176, "train/lm_loss": 2.9900174140930176, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.971890952617515e-05, "perf/tokens_per_sec": 26844.426432742326, "train/loss_prose": 3.6676209568977356, "train/loss_code": 2.23492032289505, "train/loss_math": 2.389907121658325} +{"step": 715, "train/loss": 3.311574310064316, "train/lm_loss": 3.311574310064316, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.971629361844785e-05, "perf/tokens_per_sec": 26853.867555341585, "train/loss_prose": 4.058828401565552, "train/loss_math": 2.7845675945281982, "train/loss_code": 1.7069419622421265} +{"step": 716, "train/loss": 2.4183146953582764, "train/lm_loss": 2.4183146953582764, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.97136656642993e-05, "perf/tokens_per_sec": 26190.5018065181, "train/loss_code": 1.8088624000549316, "train/loss_math": 2.8636229038238525, "train/loss_prose": 3.7192912101745605} +{"step": 717, "train/loss": 2.7390549778938293, "train/lm_loss": 2.7390549778938293, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.971102566501034e-05, "perf/tokens_per_sec": 26596.24210505131, "train/loss_code": 1.8770535389582317, "train/loss_prose": 3.5954834620157876, "train/loss_math": 2.7474145889282227} +{"step": 718, "train/loss": 2.4474576115608215, "train/lm_loss": 2.4474576115608215, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9708373621867656e-05, "perf/tokens_per_sec": 25992.76975495803, "train/loss_prose": 3.17860209941864, "train/loss_code": 2.082027792930603, "train/loss_math": 2.8123180866241455} +{"step": 719, "train/loss": 2.3608517050743103, "train/lm_loss": 2.3608517050743103, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9705709536163824e-05, "perf/tokens_per_sec": 27195.902511920063, "train/loss_math": 2.5384654998779297, "train/loss_code": 1.7592091858386993, "train/loss_prose": 3.386522650718689} +{"step": 720, "train/loss": 2.7708733677864075, "train/lm_loss": 2.7708733677864075, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.97030334091973e-05, "perf/tokens_per_sec": 27173.96401874037, "train/loss_math": 2.815208673477173, "train/loss_prose": 3.1442790826161704, "train/loss_code": 1.473314881324768} +{"step": 721, "train/loss": 3.200053572654724, "train/lm_loss": 3.200053572654724, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.970034524227238e-05, "perf/tokens_per_sec": 26228.607696124458, "train/loss_prose": 3.5605380535125732, "train/loss_math": 2.8395691514015198} +{"step": 722, "train/loss": 3.2020516991615295, "train/lm_loss": 3.2020516991615295, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.969764503669926e-05, "perf/tokens_per_sec": 26860.669276663873, "train/loss_prose": 3.859340476989746, "train/loss_math": 2.4435662031173706, "train/loss_code": 1.4325789213180542} +{"step": 723, "train/loss": 2.7900572419166565, "train/lm_loss": 2.7900572419166565, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.969493279379398e-05, "perf/tokens_per_sec": 26121.291879530967, "train/loss_prose": 3.4954593181610107, "train/loss_math": 2.6493533849716187, "train/loss_code": 1.5199567675590515} +{"step": 724, "train/loss": 2.7221860885620117, "train/lm_loss": 2.7221860885620117, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9692208514878444e-05, "perf/tokens_per_sec": 26296.576802507836, "train/loss_code": 1.9610718886057537, "train/loss_prose": 3.626527468363444, "train/loss_math": 2.5073450803756714} +{"step": 725, "train/loss": 2.653352677822113, "train/lm_loss": 2.653352677822113, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.968947220128045e-05, "perf/tokens_per_sec": 26291.103781785238, "train/loss_prose": 3.3666649659474692, "train/loss_code": 1.794040560722351, "train/loss_math": 2.5129154523213706} +{"step": 726, "train/loss": 2.7376124262809753, "train/lm_loss": 2.7376124262809753, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.968672385433364e-05, "perf/tokens_per_sec": 26494.317379047432, "train/loss_math": 2.8239105939865112, "train/loss_prose": 3.3180274963378906, "train/loss_code": 1.490484356880188} +{"step": 727, "train/loss": 2.1229849755764008, "train/lm_loss": 2.1229849755764008, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.968396347537751e-05, "perf/tokens_per_sec": 26199.76817146186, "train/loss_code": 1.5581646680831909, "train/loss_math": 2.2850561141967773, "train/loss_prose": 3.4540001153945923} +{"step": 728, "train/loss": 3.0009315609931946, "train/lm_loss": 3.0009315609931946, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9681191065757455e-05, "perf/tokens_per_sec": 26310.83344411126, "train/loss_math": 2.8016364574432373, "train/loss_prose": 3.734254519144694, "train/loss_code": 1.5981426239013672} +{"step": 729, "train/loss": 2.431777775287628, "train/lm_loss": 2.431777775287628, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.96784066268247e-05, "perf/tokens_per_sec": 26348.23985976127, "train/loss_prose": 3.408090829849243, "train/loss_code": 1.4495129187901814, "train/loss_math": 2.44070565700531} +{"step": 730, "train/loss": 2.612707495689392, "train/lm_loss": 2.612707495689392, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.967561015993635e-05, "perf/tokens_per_sec": 26037.126958482742, "train/loss_prose": 3.555750767389933, "train/loss_code": 1.9048343896865845, "train/loss_math": 2.615070104598999} +{"step": 731, "train/loss": 2.7437058687210083, "train/lm_loss": 2.7437058687210083, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.967280166645538e-05, "perf/tokens_per_sec": 26169.955480271085, "train/loss_math": 2.449172616004944, "train/loss_prose": 3.4133182168006897, "train/loss_code": 1.6990138292312622} +{"step": 732, "train/loss": 2.123906195163727, "train/lm_loss": 2.123906195163727, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.96699811477506e-05, "perf/tokens_per_sec": 26115.81217145967, "train/loss_code": 1.7844719588756561, "train/loss_math": 2.463340163230896} +{"step": 733, "train/loss": 2.394141435623169, "train/lm_loss": 2.394141435623169, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.96671486051967e-05, "perf/tokens_per_sec": 26300.683063639564, "train/loss_math": 2.4311776161193848, "train/loss_code": 1.8454139232635498, "train/loss_prose": 3.4545600414276123} +{"step": 734, "train/loss": 2.9460968375205994, "train/lm_loss": 2.9460968375205994, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.966430404017424e-05, "perf/tokens_per_sec": 26318.209465742406, "train/loss_prose": 3.337412178516388, "train/loss_math": 2.6595816612243652, "train/loss_code": 2.2403810024261475} +{"step": 735, "train/loss": 3.1283326148986816, "train/lm_loss": 3.1283326148986816, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.966144745406961e-05, "perf/tokens_per_sec": 26164.654578240225, "train/loss_prose": 3.7595086097717285, "train/loss_math": 2.2712652683258057, "train/loss_code": 1.6865874528884888} +{"step": 736, "train/loss": 2.2125852704048157, "train/lm_loss": 2.2125852704048157, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9658578848275076e-05, "perf/tokens_per_sec": 26284.024429947494, "train/loss_math": 2.4799168705940247, "train/loss_code": 1.451059897740682, "train/loss_prose": 3.427835464477539} +{"step": 737, "train/loss": 2.522402375936508, "train/lm_loss": 2.522402375936508, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.965569822418877e-05, "perf/tokens_per_sec": 25988.562500850912, "train/loss_prose": 3.7439721822738647, "train/loss_code": 1.7834425568580627, "train/loss_math": 2.7787522077560425} +{"step": 738, "train/loss": 2.9215153455734253, "train/lm_loss": 2.9215153455734253, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.965280558321468e-05, "perf/tokens_per_sec": 25658.829339108357, "train/loss_code": 1.585038959980011, "train/loss_math": 2.6693379878997803, "train/loss_prose": 3.715842545032501} +{"step": 739, "train/loss": 3.0204936861991882, "train/lm_loss": 3.0204936861991882, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.964990092676263e-05, "perf/tokens_per_sec": 26320.669058730164, "train/loss_code": 2.5159438848495483, "train/loss_prose": 3.3630345463752747, "train/loss_math": 2.839961290359497} +{"step": 740, "train/loss": 1.8420672118663788, "train/lm_loss": 1.8420672118663788, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.964698425624831e-05, "perf/tokens_per_sec": 26052.01219813631, "train/loss_code": 1.337082815170288, "train/loss_math": 2.189379334449768, "train/loss_prose": 3.6723647117614746} +{"step": 741, "train/loss": 2.2773708403110504, "train/lm_loss": 2.2773708403110504, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.964405557309328e-05, "perf/tokens_per_sec": 26404.366719178663, "train/loss_prose": 4.845446586608887, "train/loss_code": 1.5865338087081908, "train/loss_math": 2.720425605773926} +{"step": 742, "train/loss": 3.0343021750450134, "train/lm_loss": 3.0343021750450134, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9641114878724956e-05, "perf/tokens_per_sec": 25948.328274528758, "train/loss_prose": 3.6810967445373537, "train/loss_math": 2.286614775657654, "train/loss_code": 1.2957042455673218} +{"step": 743, "train/loss": 3.341035783290863, "train/lm_loss": 3.341035783290863, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.963816217457657e-05, "perf/tokens_per_sec": 26786.28990532736, "train/loss_prose": 3.557201067606608, "train/loss_math": 2.6925400495529175} +{"step": 744, "train/loss": 2.9322275519371033, "train/lm_loss": 2.9322275519371033, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.963519746208726e-05, "perf/tokens_per_sec": 25972.964302776316, "train/loss_prose": 3.2441991329193116, "train/loss_code": 2.2946810126304626, "train/loss_math": 2.6474623680114746} +{"step": 745, "train/loss": 2.27509543299675, "train/lm_loss": 2.27509543299675, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9632220742701965e-05, "perf/tokens_per_sec": 26430.120696418984, "train/loss_code": 1.7629223465919495, "train/loss_math": 2.4458197355270386} +{"step": 746, "train/loss": 2.229537218809128, "train/lm_loss": 2.229537218809128, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9629232017871524e-05, "perf/tokens_per_sec": 26004.651786281913, "train/loss_math": 2.5187892436981203, "train/loss_code": 1.747450351715088} +{"step": 747, "train/loss": 2.4622097611427307, "train/lm_loss": 2.4622097611427307, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9626231289052596e-05, "perf/tokens_per_sec": 26338.545150711896, "train/loss_code": 1.4672792355219524, "train/loss_math": 2.6572277545928955, "train/loss_prose": 3.6620789766311646} +{"step": 748, "train/loss": 3.1419742703437805, "train/lm_loss": 3.1419742703437805, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9623218557707694e-05, "perf/tokens_per_sec": 26042.928993822716, "train/loss_math": 2.6031976540883384, "train/loss_prose": 3.870997130870819, "train/loss_code": 1.8422130346298218} +{"step": 749, "train/loss": 2.6332347989082336, "train/lm_loss": 2.6332347989082336, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.962019382530521e-05, "perf/tokens_per_sec": 26008.43113163273, "train/loss_code": 1.8481205304463704, "train/loss_math": 2.669911026954651, "train/loss_prose": 3.393898328145345} +{"step": 750, "train/loss": 2.5649631023406982, "train/lm_loss": 2.5649631023406982, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9617157093319326e-05, "perf/tokens_per_sec": 25159.914859840046, "train/loss_math": 2.5707562446594237, "train/loss_prose": 3.1533055305480957, "train/loss_code": 1.3593131303787231} +{"step": 751, "train/loss": 3.1048152446746826, "train/lm_loss": 3.1048152446746826, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9614108363230135e-05, "perf/tokens_per_sec": 26058.84852382628, "train/loss_prose": 3.605593538284302, "train/loss_math": 2.2852829694747925, "train/loss_code": 2.239988327026367} +{"step": 752, "train/loss": 2.358566164970398, "train/lm_loss": 2.358566164970398, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.961104763652355e-05, "perf/tokens_per_sec": 26106.56541659132, "train/loss_math": 2.371464192867279, "train/loss_prose": 3.67067813873291, "train/loss_code": 1.9039982159932454} +{"step": 753, "train/loss": 2.38075253367424, "train/lm_loss": 2.38075253367424, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9607974914691316e-05, "perf/tokens_per_sec": 26151.6306594432, "train/loss_code": 1.847835946083069, "train/loss_prose": 3.5121623277664185, "train/loss_math": 2.782515287399292} +{"step": 754, "train/loss": 2.126065045595169, "train/lm_loss": 2.126065045595169, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.960489019923105e-05, "perf/tokens_per_sec": 26091.617789819637, "train/loss_code": 1.7198168754577636, "train/loss_math": 2.3891735076904297, "train/loss_prose": 3.6310887336730957} +{"step": 755, "train/loss": 2.601623058319092, "train/lm_loss": 2.601623058319092, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.960179349164621e-05, "perf/tokens_per_sec": 25953.855542580208, "train/loss_code": 2.1187138160069785, "train/loss_math": 2.5015791257222495, "train/loss_prose": 3.4760526418685913} +{"step": 756, "train/loss": 2.4220938086509705, "train/lm_loss": 2.4220938086509705, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9598684793446085e-05, "perf/tokens_per_sec": 25973.71038941208, "train/loss_math": 2.6845056215922036, "train/loss_code": 1.7974028587341309, "train/loss_prose": 4.133622646331787} +{"step": 757, "train/loss": 2.970193326473236, "train/lm_loss": 2.970193326473236, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.959556410614582e-05, "perf/tokens_per_sec": 26362.67395047217, "train/loss_prose": 3.6954383850097656, "train/loss_math": 2.5874083042144775, "train/loss_code": 1.2175685167312622} +{"step": 758, "train/loss": 2.7484253644943237, "train/lm_loss": 2.7484253644943237, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.959243143126639e-05, "perf/tokens_per_sec": 26067.70551462109, "train/loss_prose": 3.6568753719329834, "train/loss_code": 1.2343419790267944} +{"step": 759, "train/loss": 2.521138846874237, "train/lm_loss": 2.521138846874237, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9589286770334654e-05, "perf/tokens_per_sec": 26035.193521157922, "train/loss_math": 2.739974101384481, "train/loss_prose": 3.6654900312423706, "train/loss_code": 1.5394028425216675} +{"step": 760, "train/loss": 2.7353197932243347, "train/lm_loss": 2.7353197932243347, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.958613012488324e-05, "perf/tokens_per_sec": 26036.258849061294, "train/loss_prose": 3.8401613235473633, "train/loss_code": 1.6829386949539185, "train/loss_math": 2.6566288471221924} +{"step": 761, "train/loss": 2.768673539161682, "train/lm_loss": 2.768673539161682, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.958296149645069e-05, "perf/tokens_per_sec": 26762.547078607404, "train/loss_prose": 3.367628733317057, "train/loss_math": 2.468742311000824, "train/loss_code": 2.1715328693389893} +{"step": 762, "train/loss": 2.3796769976615906, "train/lm_loss": 2.3796769976615906, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.957978088658134e-05, "perf/tokens_per_sec": 26064.857968200024, "train/loss_code": 1.4009053707122803, "train/loss_prose": 3.4576937357584634, "train/loss_math": 2.230809211730957} +{"step": 763, "train/loss": 2.167403221130371, "train/lm_loss": 2.167403221130371, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9576588296825386e-05, "perf/tokens_per_sec": 26160.271995809453, "train/loss_code": 1.7963817119598389, "train/loss_math": 2.3900161266326903} +{"step": 764, "train/loss": 2.1210371255874634, "train/lm_loss": 2.1210371255874634, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.957338372873886e-05, "perf/tokens_per_sec": 26348.724782560017, "train/loss_code": 1.6208119988441467, "train/loss_prose": 3.0513131618499756, "train/loss_math": 2.4779117902119956} +{"step": 765, "train/loss": 2.6031978726387024, "train/lm_loss": 2.6031978726387024, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.957016718388362e-05, "perf/tokens_per_sec": 25985.064046561718, "train/loss_prose": 3.4197939236958823, "train/loss_math": 2.369286000728607, "train/loss_code": 1.089058518409729} +{"step": 766, "train/loss": 3.001668095588684, "train/lm_loss": 3.001668095588684, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9566938663827377e-05, "perf/tokens_per_sec": 25939.904701077314, "train/loss_code": 2.0667677521705627, "train/loss_math": 2.658560832341512, "train/loss_prose": 3.9680422147115073} +{"step": 767, "train/loss": 2.3648908138275146, "train/lm_loss": 2.3648908138275146, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9563698170143666e-05, "perf/tokens_per_sec": 26439.760876858723, "train/loss_math": 2.1958367824554443, "train/loss_prose": 3.795963764190674, "train/loss_code": 1.5798960129419963} +{"step": 768, "train/loss": 2.3730970919132233, "train/lm_loss": 2.3730970919132233, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.956044570441188e-05, "perf/tokens_per_sec": 26224.72406434418, "train/loss_math": 2.5500393509864807, "train/loss_prose": 3.6069905757904053, "train/loss_code": 1.7258762915929158} +{"step": 769, "train/loss": 2.613235682249069, "train/lm_loss": 2.613235682249069, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9557181268217227e-05, "perf/tokens_per_sec": 25652.392774272157, "train/loss_math": 2.638128900527954, "train/loss_code": 1.1801962852478027, "train/loss_prose": 3.267521858215332} +{"step": 770, "train/loss": 1.9436203837394714, "train/lm_loss": 1.9436203837394714, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.955390486315073e-05, "perf/tokens_per_sec": 25108.361650850813, "train/loss_code": 1.3946897238492966, "train/loss_math": 2.2656351725260415, "train/loss_prose": 3.173297882080078} +{"step": 771, "train/loss": 2.899860143661499, "train/lm_loss": 2.899860143661499, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.95506164908093e-05, "perf/tokens_per_sec": 25163.74749935186, "train/loss_math": 2.476510524749756, "train/loss_code": 2.499671697616577, "train/loss_prose": 4.146748185157776} +{"step": 772, "train/loss": 2.105105698108673, "train/lm_loss": 2.105105698108673, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.954731615279563e-05, "perf/tokens_per_sec": 25609.335678637657, "train/loss_code": 1.695469903945923, "train/loss_prose": 3.4337098598480225, "train/loss_math": 2.464893102645874} +{"step": 773, "train/loss": 2.3772318959236145, "train/lm_loss": 2.3772318959236145, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9544003850718266e-05, "perf/tokens_per_sec": 25726.454699830785, "train/loss_math": 2.4072441260019937, "train/loss_code": 1.732690652211507, "train/loss_prose": 3.2990245819091797} +{"step": 774, "train/loss": 2.4694093465805054, "train/lm_loss": 2.4694093465805054, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9540679586191605e-05, "perf/tokens_per_sec": 26074.70837842309, "train/loss_code": 1.242449124654134, "train/loss_math": 2.3833495378494263, "train/loss_prose": 3.7537430127461753} +{"step": 775, "train/loss": 3.1181195974349976, "train/lm_loss": 3.1181195974349976, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.953734336083583e-05, "perf/tokens_per_sec": 25337.58262282848, "train/loss_prose": 3.6489577770233153, "train/loss_math": 2.74739933013916, "train/loss_code": 1.205370306968689} +{"step": 776, "train/loss": 2.3291175365448, "train/lm_loss": 2.3291175365448, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.953399517627698e-05, "perf/tokens_per_sec": 26056.71425668031, "train/loss_math": 2.5118919372558595, "train/loss_code": 1.2018473148345947, "train/loss_prose": 3.669786214828491} +{"step": 777, "train/loss": 2.930113911628723, "train/lm_loss": 2.930113911628723, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.953063503414692e-05, "perf/tokens_per_sec": 25999.338939830293, "train/loss_prose": 3.4344681104024253, "train/loss_math": 2.6275014877319336} +{"step": 778, "train/loss": 1.7927693724632263, "train/lm_loss": 1.7927693724632263, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.952726293608335e-05, "perf/tokens_per_sec": 25957.071065199885, "train/loss_code": 1.5547825694084167, "train/loss_math": 2.5067297220230103} +{"step": 779, "train/loss": 2.580385744571686, "train/lm_loss": 2.580385744571686, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.952387888372979e-05, "perf/tokens_per_sec": 26056.753776968868, "train/loss_prose": 3.6575376987457275, "train/loss_code": 1.6539007822672527, "train/loss_math": 2.3543848991394043} +{"step": 780, "train/loss": 3.1583677530288696, "train/lm_loss": 3.1583677530288696, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.952048287873558e-05, "perf/tokens_per_sec": 25898.220100367675, "train/loss_math": 2.583548148473104, "train/loss_prose": 4.001228153705597, "train/loss_code": 1.5113844871520996} +{"step": 781, "train/loss": 2.763316512107849, "train/lm_loss": 2.763316512107849, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.951707492275589e-05, "perf/tokens_per_sec": 26151.949132701604, "train/loss_prose": 3.748466908931732, "train/loss_math": 2.462885856628418, "train/loss_code": 1.5499261220296223} +{"step": 782, "train/loss": 2.4835785031318665, "train/lm_loss": 2.4835785031318665, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.951365501745172e-05, "perf/tokens_per_sec": 26002.44767535137, "train/loss_math": 2.364041248957316, "train/loss_code": 2.013800541559855, "train/loss_prose": 3.3675509691238403} +{"step": 783, "train/loss": 2.9732537865638733, "train/lm_loss": 2.9732537865638733, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.95102231644899e-05, "perf/tokens_per_sec": 25701.207253839504, "train/loss_math": 2.6332563757896423, "train/loss_prose": 3.9027485847473145, "train/loss_code": 1.544758915901184} +{"step": 784, "train/loss": 2.540846526622772, "train/lm_loss": 2.540846526622772, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9506779365543046e-05, "perf/tokens_per_sec": 25999.53567320847, "train/loss_code": 1.6301225225130718, "train/loss_prose": 3.810802459716797, "train/loss_math": 2.604933023452759} +{"step": 785, "train/loss": 2.172319322824478, "train/lm_loss": 2.172319322824478, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9503323622289655e-05, "perf/tokens_per_sec": 25532.074629128187, "train/loss_math": 2.452608346939087, "train/loss_code": 1.2532182335853577, "train/loss_prose": 3.808466672897339} +{"step": 786, "train/loss": 2.5488969683647156, "train/lm_loss": 2.5488969683647156, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.949985593641399e-05, "perf/tokens_per_sec": 25874.310114552678, "train/loss_code": 2.096537629763285, "train/loss_math": 2.6366836428642273, "train/loss_prose": 3.5548274517059326} +{"step": 787, "train/loss": 2.148439347743988, "train/lm_loss": 2.148439347743988, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.949637630960617e-05, "perf/tokens_per_sec": 26408.872071252234, "train/loss_code": 1.9192494451999664, "train/loss_math": 2.377629041671753} +{"step": 788, "train/loss": 2.1359708309173584, "train/lm_loss": 2.1359708309173584, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.949288474356213e-05, "perf/tokens_per_sec": 26213.12006249695, "train/loss_code": 1.3120939612388611, "train/loss_prose": 3.509098768234253} +{"step": 789, "train/loss": 2.3845039308071136, "train/lm_loss": 2.3845039308071136, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.94893812399836e-05, "perf/tokens_per_sec": 26088.408464371132, "train/loss_math": 2.341839869817098, "train/loss_code": 1.5226518114407857, "train/loss_prose": 3.7412779331207275} +{"step": 790, "train/loss": 2.5450958013534546, "train/lm_loss": 2.5450958013534546, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.948586580057816e-05, "perf/tokens_per_sec": 25949.621679427593, "train/loss_prose": 3.7835997343063354, "train/loss_math": 2.5199901262919107, "train/loss_code": 1.7445322672526042} +{"step": 791, "train/loss": 2.790909707546234, "train/lm_loss": 2.790909707546234, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.948233842705919e-05, "perf/tokens_per_sec": 26131.26449208679, "train/loss_prose": 3.9762535095214844, "train/loss_code": 1.4720370769500732, "train/loss_math": 2.484813849131266} +{"step": 792, "train/loss": 2.739089995622635, "train/lm_loss": 2.739089995622635, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.947879912114588e-05, "perf/tokens_per_sec": 26098.3956261308, "train/loss_prose": 3.711666226387024, "train/loss_math": 2.6210838317871095, "train/loss_code": 1.3839689493179321} +{"step": 793, "train/loss": 2.1147588044404984, "train/lm_loss": 2.1147588044404984, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.947524788456325e-05, "perf/tokens_per_sec": 26220.88160220269, "train/loss_code": 1.3331353515386581, "train/loss_prose": 3.262939691543579, "train/loss_math": 2.529824376106262} +{"step": 794, "train/loss": 2.2762087881565094, "train/lm_loss": 2.2762087881565094, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.947168471904213e-05, "perf/tokens_per_sec": 25055.594067116836, "train/loss_code": 1.3401177525520325, "train/loss_math": 2.877446413040161, "train/loss_prose": 4.216860294342041} +{"step": 795, "train/loss": 2.763876438140869, "train/lm_loss": 2.763876438140869, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.946810962631916e-05, "perf/tokens_per_sec": 26425.32356501556, "train/loss_prose": 3.84100612004598, "train/loss_code": 1.8302982250849407, "train/loss_math": 2.5485488176345825} +{"step": 796, "train/loss": 3.217751383781433, "train/lm_loss": 3.217751383781433, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9464522608136805e-05, "perf/tokens_per_sec": 25876.570505907373, "train/loss_prose": 3.657330791155497, "train/loss_math": 2.484245538711548, "train/loss_code": 1.3137803077697754} +{"step": 797, "train/loss": 2.3070545196533203, "train/lm_loss": 2.3070545196533203, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.946092366624333e-05, "perf/tokens_per_sec": 25775.126339589126, "train/loss_code": 1.1896999279658, "train/loss_math": 2.8408342003822327, "train/loss_prose": 3.523998737335205} +{"step": 798, "train/loss": 2.3240577578544617, "train/lm_loss": 2.3240577578544617, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.945731280239281e-05, "perf/tokens_per_sec": 26308.77856813612, "train/loss_math": 2.4746145009994507, "train/loss_code": 1.8723869323730469} +{"step": 799, "train/loss": 3.1820207238197327, "train/lm_loss": 3.1820207238197327, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9453690018345144e-05, "perf/tokens_per_sec": 26457.5546811594, "train/loss_code": 1.6603544354438782, "train/loss_prose": 3.9338807582855226, "train/loss_math": 2.4660539627075195} +{"step": 800, "train/loss": 3.0564048886299133, "train/lm_loss": 3.0564048886299133, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9450055315866026e-05, "perf/tokens_per_sec": 25661.703848538033, "train/loss_prose": 3.628874361515045, "train/loss_math": 2.4839354157447815} +{"step": 800, "eval/loss": 2.3249791079124975, "eval/lm_loss": 2.3249791079124975, "eval/lb_loss": 0.0, "eval/z_loss": 0.0, "eval/perplexity": 10.226466431327639, "eval/loss_code": 1.6715229572770802, "eval/ppl_code": 5.320264165431536, "eval/loss_prose": 3.4951191178539345, "eval/ppl_prose": 32.95421315436823, "eval/loss_math": 2.391172904329202, "eval/ppl_math": 10.926301937142014} +{"step": 801, "train/loss": 2.360531806945801, "train/lm_loss": 2.360531806945801, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9446408696726974e-05, "perf/tokens_per_sec": 25466.598454797186, "train/loss_code": 1.402424693107605, "train/loss_math": 3.0877187252044678, "train/loss_prose": 4.3922059535980225} +{"step": 802, "train/loss": 2.1809651851654053, "train/lm_loss": 2.1809651851654053, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9442750162705295e-05, "perf/tokens_per_sec": 22837.249704230107, "train/loss_math": 2.421706716219584, "train/loss_code": 1.4587404131889343} +{"step": 803, "train/loss": 2.186338037252426, "train/lm_loss": 2.186338037252426, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9439079715584135e-05, "perf/tokens_per_sec": 25839.441069682794, "train/loss_code": 1.5266656875610352, "train/loss_math": 2.6555685997009277, "train/loss_prose": 3.6009035110473633} +{"step": 804, "train/loss": 2.785844385623932, "train/lm_loss": 2.785844385623932, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.94353973571524e-05, "perf/tokens_per_sec": 25561.211509476914, "train/loss_code": 1.9609630505243938, "train/loss_prose": 3.83205509185791, "train/loss_math": 2.453850269317627} +{"step": 805, "train/loss": 2.8716049194335938, "train/lm_loss": 2.8716049194335938, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.943170308920484e-05, "perf/tokens_per_sec": 25940.805566293002, "train/loss_prose": 3.729515790939331, "train/loss_code": 1.2389366626739502, "train/loss_math": 2.6363390684127808} +{"step": 806, "train/loss": 2.140012949705124, "train/lm_loss": 2.140012949705124, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9427996913542e-05, "perf/tokens_per_sec": 25810.287211491224, "train/loss_math": 2.364531636238098, "train/loss_code": 1.428327699502309, "train/loss_prose": 3.3769943714141846} +{"step": 807, "train/loss": 2.4926605224609375, "train/lm_loss": 2.4926605224609375, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.942427883197021e-05, "perf/tokens_per_sec": 25729.19024919165, "train/loss_math": 2.6208813985188804, "train/loss_prose": 3.2251535654067993, "train/loss_code": 1.8761108716328938} +{"step": 808, "train/loss": 2.8497126400470734, "train/lm_loss": 2.8497126400470734, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.942054884630162e-05, "perf/tokens_per_sec": 26557.594995138283, "train/loss_code": 1.745370328426361, "train/loss_math": 2.5646554231643677, "train/loss_prose": 3.5444124937057495} +{"step": 809, "train/loss": 2.21111199259758, "train/lm_loss": 2.21111199259758, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.94168069583542e-05, "perf/tokens_per_sec": 25043.541084548106, "train/loss_code": 1.4887042045593262, "train/loss_math": 2.561314105987549, "train/loss_prose": 2.977527141571045} +{"step": 810, "train/loss": 2.7100911736488342, "train/lm_loss": 2.7100911736488342, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.941305316995169e-05, "perf/tokens_per_sec": 26162.303737826747, "train/loss_prose": 3.7283626794815063, "train/loss_math": 2.4989898204803467, "train/loss_code": 1.729054570198059} +{"step": 811, "train/loss": 2.4101003110408783, "train/lm_loss": 2.4101003110408783, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.940928748292363e-05, "perf/tokens_per_sec": 24765.131904882717, "train/loss_code": 1.640300691127777, "train/loss_prose": 3.6678980588912964, "train/loss_math": 2.6919018030166626} +{"step": 812, "train/loss": 2.5059421360492706, "train/lm_loss": 2.5059421360492706, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.940550989910537e-05, "perf/tokens_per_sec": 25891.818972909838, "train/loss_math": 2.6777557134628296, "train/loss_code": 1.2381442189216614, "train/loss_prose": 3.4301129579544067} +{"step": 813, "train/loss": 2.030479282140732, "train/lm_loss": 2.030479282140732, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.940172042033808e-05, "perf/tokens_per_sec": 25594.875598720544, "train/loss_prose": 3.3914912939071655, "train/loss_math": 2.3019015789031982, "train/loss_code": 1.4317901372909545} +{"step": 814, "train/loss": 2.2528297305107117, "train/lm_loss": 2.2528297305107117, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.939791904846869e-05, "perf/tokens_per_sec": 25655.840532355276, "train/loss_code": 1.6830103695392609, "train/loss_math": 2.5832464694976807, "train/loss_prose": 3.5408565998077393} +{"step": 815, "train/loss": 2.537413239479065, "train/lm_loss": 2.537413239479065, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9394105785349944e-05, "perf/tokens_per_sec": 25596.362821521372, "train/loss_code": 2.2646217346191406, "train/loss_math": 2.62834362188975} +{"step": 816, "train/loss": 2.6902549266815186, "train/lm_loss": 2.6902549266815186, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.939028063284038e-05, "perf/tokens_per_sec": 25432.705974658882, "train/loss_prose": 3.7841315269470215, "train/loss_math": 2.5820322831471763, "train/loss_code": 2.0692266623179116} +{"step": 817, "train/loss": 2.8036895990371704, "train/lm_loss": 2.8036895990371704, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.938644359280433e-05, "perf/tokens_per_sec": 26376.799883314783, "train/loss_code": 2.0252652565638223, "train/loss_math": 2.421688675880432, "train/loss_prose": 3.8367807865142822} +{"step": 818, "train/loss": 2.250092029571533, "train/lm_loss": 2.250092029571533, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.938259466711193e-05, "perf/tokens_per_sec": 25890.921506808852, "train/loss_prose": 3.6094911098480225, "train/loss_code": 1.351409673690796, "train/loss_math": 2.68805730342865} +{"step": 819, "train/loss": 3.1345253586769104, "train/lm_loss": 3.1345253586769104, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.937873385763908e-05, "perf/tokens_per_sec": 25259.61016979057, "train/loss_prose": 3.601111125946045, "train/loss_math": 2.3237781524658203, "train/loss_code": 2.4230895042419434} +{"step": 820, "train/loss": 2.6294771432876587, "train/lm_loss": 2.6294771432876587, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.937486116626752e-05, "perf/tokens_per_sec": 25394.92390925163, "train/loss_math": 2.453894793987274, "train/loss_code": 2.1355960369110107, "train/loss_prose": 3.474523186683655} +{"step": 821, "train/loss": 2.7712427973747253, "train/lm_loss": 2.7712427973747253, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9370976594884723e-05, "perf/tokens_per_sec": 26234.81588760785, "train/loss_code": 1.443364441394806, "train/loss_math": 2.6076236565907798, "train/loss_prose": 3.8201138178507485} +{"step": 822, "train/loss": 2.3655009269714355, "train/lm_loss": 2.3655009269714355, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9367080145384006e-05, "perf/tokens_per_sec": 25532.719707159173, "train/loss_code": 1.6354737877845764, "train/loss_math": 2.635560154914856, "train/loss_prose": 3.4753451347351074} +{"step": 823, "train/loss": 2.556482881307602, "train/lm_loss": 2.556482881307602, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9363171819664434e-05, "perf/tokens_per_sec": 26778.02312466683, "train/loss_prose": 3.447343428929647, "train/loss_math": 2.2847185134887695, "train/loss_code": 1.8467984596888225} +{"step": 824, "train/loss": 2.41327440738678, "train/lm_loss": 2.41327440738678, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9359251619630886e-05, "perf/tokens_per_sec": 26700.157410904252, "train/loss_math": 2.359145164489746, "train/loss_prose": 3.9448012510935464, "train/loss_code": 1.2781616747379303} +{"step": 825, "train/loss": 2.5248076915740967, "train/lm_loss": 2.5248076915740967, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9355319547194014e-05, "perf/tokens_per_sec": 25950.99361340882, "train/loss_math": 2.576603968938192, "train/loss_code": 2.3694183826446533} +{"step": 826, "train/loss": 2.646537810564041, "train/lm_loss": 2.646537810564041, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.935137560427027e-05, "perf/tokens_per_sec": 25610.17555178572, "train/loss_prose": 3.6554417610168457, "train/loss_code": 1.7415941556294758, "train/loss_math": 2.4905972480773926} +{"step": 827, "train/loss": 2.8578534722328186, "train/lm_loss": 2.8578534722328186, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9347419792781876e-05, "perf/tokens_per_sec": 26075.46032189323, "train/loss_prose": 3.473110616207123, "train/loss_code": 1.8915827870368958, "train/loss_math": 2.593609571456909} +{"step": 828, "train/loss": 3.09783136844635, "train/lm_loss": 3.09783136844635, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.934345211465686e-05, "perf/tokens_per_sec": 27070.6429153319, "train/loss_code": 1.4979163706302643, "train/loss_prose": 3.8140005588531496, "train/loss_math": 2.7168149948120117} +{"step": 829, "train/loss": 2.2339936196804047, "train/lm_loss": 2.2339936196804047, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.933947257182901e-05, "perf/tokens_per_sec": 25906.18689691883, "train/loss_code": 1.395075798034668, "train/loss_prose": 3.632189989089966} +{"step": 830, "train/loss": 2.918090283870697, "train/lm_loss": 2.918090283870697, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9335481166237904e-05, "perf/tokens_per_sec": 25888.775644474183, "train/loss_code": 2.0656003952026367, "train/loss_prose": 3.5429691672325134, "train/loss_math": 2.520822525024414} +{"step": 831, "train/loss": 2.7462942004203796, "train/lm_loss": 2.7462942004203796, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.93314778998289e-05, "perf/tokens_per_sec": 25833.57395330373, "train/loss_math": 2.714031934738159, "train/loss_code": 1.933403770128886, "train/loss_prose": 3.5806926091512046} +{"step": 832, "train/loss": 2.47369721531868, "train/lm_loss": 2.47369721531868, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9327462774553166e-05, "perf/tokens_per_sec": 26638.388386938717, "train/loss_prose": 3.581430673599243, "train/loss_code": 1.2887683510780334, "train/loss_math": 2.512294828891754} +{"step": 833, "train/loss": 2.8489798307418823, "train/lm_loss": 2.8489798307418823, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.93234357923676e-05, "perf/tokens_per_sec": 26191.340302498564, "train/loss_code": 2.212187886238098, "train/loss_prose": 3.6128745873769126, "train/loss_math": 2.509613037109375} +{"step": 834, "train/loss": 3.0635985136032104, "train/lm_loss": 3.0635985136032104, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.931939695523492e-05, "perf/tokens_per_sec": 25166.733344857188, "train/loss_prose": 3.7913053035736084, "train/loss_code": 2.260856866836548, "train/loss_math": 2.410926342010498} +{"step": 835, "train/loss": 2.09614634513855, "train/lm_loss": 2.09614634513855, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9315346265123594e-05, "perf/tokens_per_sec": 26032.195288409523, "train/loss_prose": 3.1880396604537964, "train/loss_math": 2.283034920692444, "train/loss_code": 1.4567553997039795} +{"step": 836, "train/loss": 2.3895211815834045, "train/lm_loss": 2.3895211815834045, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9311283724007887e-05, "perf/tokens_per_sec": 25539.70381448341, "train/loss_code": 1.6605509996414185, "train/loss_math": 2.229675054550171, "train/loss_prose": 4.2918701171875} +{"step": 837, "train/loss": 2.4706843495368958, "train/lm_loss": 2.4706843495368958, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.930720933386782e-05, "perf/tokens_per_sec": 25224.377070602364, "train/loss_code": 2.110560357570648, "train/loss_math": 2.270049571990967, "train/loss_prose": 3.3915669918060303} +{"step": 838, "train/loss": 2.6971570253372192, "train/lm_loss": 2.6971570253372192, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.930312309668922e-05, "perf/tokens_per_sec": 24994.75394819732, "train/loss_code": 1.6300985018412273, "train/loss_prose": 3.6183822751045227, "train/loss_math": 2.2134320735931396} +{"step": 839, "train/loss": 2.717723846435547, "train/lm_loss": 2.717723846435547, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.929902501446366e-05, "perf/tokens_per_sec": 25346.890901311617, "train/loss_math": 2.6398504972457886, "train/loss_prose": 3.773173173268636, "train/loss_code": 1.714189926783244} +{"step": 840, "train/loss": 2.9900208711624146, "train/lm_loss": 2.9900208711624146, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.929491508918849e-05, "perf/tokens_per_sec": 25261.801577183625, "train/loss_prose": 3.5460195541381836, "train/loss_math": 2.434022009372711} +{"step": 841, "train/loss": 2.2525097727775574, "train/lm_loss": 2.2525097727775574, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.929079332286685e-05, "perf/tokens_per_sec": 24378.636863390613, "train/loss_code": 1.8933369517326355, "train/loss_prose": 3.3300278186798096} +{"step": 842, "train/loss": 2.6842209696769714, "train/lm_loss": 2.6842209696769714, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9286659717507635e-05, "perf/tokens_per_sec": 25037.372985372407, "train/loss_code": 1.6929733157157898, "train/loss_math": 2.328009049097697, "train/loss_prose": 3.7012644608815513} +{"step": 843, "train/loss": 2.3204126954078674, "train/lm_loss": 2.3204126954078674, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.92825142751255e-05, "perf/tokens_per_sec": 24029.43584105764, "train/loss_code": 1.7537426054477692, "train/loss_math": 2.455483317375183, "train/loss_prose": 3.3186821937561035} +{"step": 844, "train/loss": 3.065555155277252, "train/lm_loss": 3.065555155277252, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9278356997740904e-05, "perf/tokens_per_sec": 24942.96928710186, "train/loss_prose": 3.6245116233825683, "train/loss_math": 2.381905674934387, "train/loss_code": 1.6380720138549805} +{"step": 845, "train/loss": 2.770490348339081, "train/lm_loss": 2.770490348339081, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.927418788738004e-05, "perf/tokens_per_sec": 25691.752232717005, "train/loss_math": 2.75492787361145, "train/loss_prose": 3.4718029499053955, "train/loss_code": 1.7418649196624756} +{"step": 846, "train/loss": 2.0554173588752747, "train/lm_loss": 2.0554173588752747, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.927000694607489e-05, "perf/tokens_per_sec": 26205.882768204305, "train/loss_code": 1.8651129206021626, "train/loss_math": 2.626330256462097} +{"step": 847, "train/loss": 2.408218652009964, "train/lm_loss": 2.408218652009964, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9265814175863186e-05, "perf/tokens_per_sec": 25140.47483957826, "train/loss_math": 2.478876531124115, "train/loss_code": 1.40678071975708, "train/loss_prose": 3.2683404684066772} +{"step": 848, "train/loss": 2.24233078956604, "train/lm_loss": 2.24233078956604, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9261609578788435e-05, "perf/tokens_per_sec": 25876.29767909833, "train/loss_prose": 3.4440016746520996, "train/loss_code": 1.6382757663726806, "train/loss_math": 2.859264612197876} +{"step": 849, "train/loss": 2.3918184638023376, "train/lm_loss": 2.3918184638023376, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.925739315689991e-05, "perf/tokens_per_sec": 26125.423034343483, "train/loss_code": 1.7024869918823242, "train/loss_prose": 3.680168867111206, "train/loss_math": 2.4821311235427856} +{"step": 850, "train/loss": 2.903804898262024, "train/lm_loss": 2.903804898262024, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.925316491225265e-05, "perf/tokens_per_sec": 24986.320225838495, "train/loss_prose": 3.174357056617737, "train/loss_math": 2.4088966846466064, "train/loss_code": 1.7754004001617432} +{"step": 851, "train/loss": 2.670728027820587, "train/lm_loss": 2.670728027820587, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.924892484690743e-05, "perf/tokens_per_sec": 25876.68743344384, "train/loss_math": 2.598968426386515, "train/loss_code": 1.7756099104881287, "train/loss_prose": 3.3392330010732016} +{"step": 852, "train/loss": 3.05882066488266, "train/lm_loss": 3.05882066488266, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.924467296293083e-05, "perf/tokens_per_sec": 25931.604019278286, "train/loss_prose": 3.5768469333648683, "train/loss_code": 2.1401328444480896, "train/loss_math": 2.3060643672943115} +{"step": 853, "train/loss": 2.1408780217170715, "train/lm_loss": 2.1408780217170715, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.924040926239515e-05, "perf/tokens_per_sec": 26516.235814168853, "train/loss_code": 1.518868088722229, "train/loss_prose": 3.0494256019592285, "train/loss_math": 2.380248636007309} +{"step": 854, "train/loss": 2.89089173078537, "train/lm_loss": 2.89089173078537, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9236133747378475e-05, "perf/tokens_per_sec": 26135.239766028495, "train/loss_math": 2.654991626739502, "train/loss_prose": 3.9143691062927246, "train/loss_code": 2.0246808926264444} +{"step": 855, "train/loss": 2.8037301898002625, "train/lm_loss": 2.8037301898002625, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.923184641996463e-05, "perf/tokens_per_sec": 25078.197252179398, "train/loss_math": 2.639839291572571, "train/loss_prose": 4.317799925804138, "train/loss_code": 1.6174426078796387} +{"step": 856, "train/loss": 2.3777697682380676, "train/lm_loss": 2.3777697682380676, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9227547282243214e-05, "perf/tokens_per_sec": 25992.809081514857, "train/loss_code": 1.9317433834075928, "train/loss_math": 2.4805232683817544, "train/loss_prose": 3.8536150455474854} +{"step": 857, "train/loss": 2.4625352323055267, "train/lm_loss": 2.4625352323055267, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.922323633630958e-05, "perf/tokens_per_sec": 26225.404617721364, "train/loss_math": 2.611512780189514, "train/loss_code": 1.5535812775293987, "train/loss_prose": 3.2721705436706543} +{"step": 858, "train/loss": 2.772856295108795, "train/lm_loss": 2.772856295108795, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9218913584264814e-05, "perf/tokens_per_sec": 26049.089084620762, "train/loss_math": 2.6262574990590415, "train/loss_prose": 3.71457306543986, "train/loss_code": 1.5801790356636047} +{"step": 859, "train/loss": 3.0272827744483948, "train/lm_loss": 3.0272827744483948, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9214579028215776e-05, "perf/tokens_per_sec": 26633.308917730665, "train/loss_code": 2.1041391491889954, "train/loss_math": 2.5089768171310425, "train/loss_prose": 3.7480072379112244} +{"step": 860, "train/loss": 3.045286178588867, "train/lm_loss": 3.045286178588867, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9210232670275094e-05, "perf/tokens_per_sec": 25645.99772199706, "train/loss_prose": 3.516527271270752, "train/loss_math": 2.3955276012420654, "train/loss_code": 1.9885984659194946} +{"step": 861, "train/loss": 2.5326773524284363, "train/lm_loss": 2.5326773524284363, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9205874512561115e-05, "perf/tokens_per_sec": 26271.162643896114, "train/loss_code": 1.7382172346115112, "train/loss_math": 2.439495881398519, "train/loss_prose": 3.8862268924713135} +{"step": 862, "train/loss": 2.9662147164344788, "train/lm_loss": 2.9662147164344788, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9201504557197955e-05, "perf/tokens_per_sec": 25870.41381532781, "train/loss_prose": 3.5641611099243162, "train/loss_code": 1.8132949471473694, "train/loss_math": 2.282322645187378} +{"step": 863, "train/loss": 2.361512452363968, "train/lm_loss": 2.361512452363968, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.919712280631547e-05, "perf/tokens_per_sec": 25795.057136937063, "train/loss_prose": 3.9199661016464233, "train/loss_code": 1.579238772392273, "train/loss_math": 2.367606043815613} +{"step": 864, "train/loss": 2.9432188272476196, "train/lm_loss": 2.9432188272476196, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.919272926204929e-05, "perf/tokens_per_sec": 26623.320818107717, "train/loss_math": 2.548583507537842, "train/loss_prose": 3.7842032313346863, "train/loss_code": 1.6558856964111328} +{"step": 865, "train/loss": 2.591347247362137, "train/lm_loss": 2.591347247362137, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9188323926540746e-05, "perf/tokens_per_sec": 26245.156042236995, "train/loss_prose": 3.5185306072235107, "train/loss_math": 2.4603216648101807, "train/loss_code": 1.9262150526046753} +{"step": 866, "train/loss": 2.258774757385254, "train/lm_loss": 2.258774757385254, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.918390680193698e-05, "perf/tokens_per_sec": 25949.504091835963, "train/loss_prose": 3.854201316833496, "train/loss_math": 2.5705875158309937, "train/loss_code": 1.8149643898010255} +{"step": 867, "train/loss": 2.9033822119235992, "train/lm_loss": 2.9033822119235992, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9179477890390825e-05, "perf/tokens_per_sec": 25823.633139278827, "train/loss_prose": 3.831131160259247, "train/loss_code": 1.5931426286697388, "train/loss_math": 2.358124256134033} +{"step": 868, "train/loss": 2.8045862317085266, "train/lm_loss": 2.8045862317085266, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.917503719406088e-05, "perf/tokens_per_sec": 26670.354982333472, "train/loss_code": 1.4460790157318115, "train/loss_math": 2.8001049518585206, "train/loss_prose": 3.4950430393218994} +{"step": 869, "train/loss": 2.687959611415863, "train/lm_loss": 2.687959611415863, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.917058471511149e-05, "perf/tokens_per_sec": 26120.060730363813, "train/loss_math": 2.505463441212972, "train/loss_prose": 3.8529045581817627, "train/loss_code": 2.0938259760538735} +{"step": 870, "train/loss": 2.3451726138591766, "train/lm_loss": 2.3451726138591766, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9166120455712736e-05, "perf/tokens_per_sec": 25825.884684592595, "train/loss_math": 2.51241135597229, "train/loss_code": 1.8444570302963257, "train/loss_prose": 3.8463194370269775} +{"step": 871, "train/loss": 2.3602600693702698, "train/lm_loss": 2.3602600693702698, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.916164441804044e-05, "perf/tokens_per_sec": 26288.166996928943, "train/loss_math": 2.766649842262268, "train/loss_prose": 3.7512660026550293, "train/loss_code": 1.3547382752100627} +{"step": 872, "train/loss": 2.619412213563919, "train/lm_loss": 2.619412213563919, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9157156604276175e-05, "perf/tokens_per_sec": 25900.09419987246, "train/loss_code": 1.8311265707015991, "train/loss_math": 2.5165061950683594, "train/loss_prose": 3.613509774208069} +{"step": 873, "train/loss": 2.7017646431922913, "train/lm_loss": 2.7017646431922913, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.915265701660726e-05, "perf/tokens_per_sec": 26060.825008874097, "train/loss_code": 1.7935175498326619, "train/loss_math": 2.346780776977539, "train/loss_prose": 3.846668004989624} +{"step": 874, "train/loss": 2.588840365409851, "train/lm_loss": 2.588840365409851, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.914814565722671e-05, "perf/tokens_per_sec": 26438.662273526817, "train/loss_math": 2.70748108625412, "train/loss_prose": 3.4674108028411865, "train/loss_code": 1.472988337278366} +{"step": 875, "train/loss": 2.5202711820602417, "train/lm_loss": 2.5202711820602417, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.914362252833332e-05, "perf/tokens_per_sec": 25409.497107022424, "train/loss_prose": 3.560405731201172, "train/loss_code": 1.8760361075401306, "train/loss_math": 2.3223214149475098} +{"step": 876, "train/loss": 2.377710998058319, "train/lm_loss": 2.377710998058319, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.913908763213162e-05, "perf/tokens_per_sec": 25415.58674355507, "train/loss_math": 2.4587988376617433, "train/loss_code": 1.6002236008644104, "train/loss_prose": 3.527244806289673} +{"step": 877, "train/loss": 2.521284282207489, "train/lm_loss": 2.521284282207489, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.913454097083185e-05, "perf/tokens_per_sec": 25559.11995025009, "train/loss_code": 1.4864739179611206, "train/loss_math": 2.507659355799357, "train/loss_prose": 3.224782943725586} +{"step": 878, "train/loss": 2.8000934720039368, "train/lm_loss": 2.8000934720039368, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.912998254665e-05, "perf/tokens_per_sec": 26012.762982254255, "train/loss_code": 1.9295550187428792, "train/loss_prose": 3.5665148496627808, "train/loss_math": 2.346022844314575} +{"step": 879, "train/loss": 2.528222382068634, "train/lm_loss": 2.528222382068634, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.912541236180779e-05, "perf/tokens_per_sec": 26461.10001386215, "train/loss_code": 1.4999857544898987, "train/loss_math": 2.768408179283142, "train/loss_prose": 4.344510078430176} +{"step": 880, "train/loss": 2.508644759654999, "train/lm_loss": 2.508644759654999, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.912083041853267e-05, "perf/tokens_per_sec": 26431.991236451195, "train/loss_math": 2.653719345728556, "train/loss_code": 1.761134386062622, "train/loss_prose": 3.4122984409332275} +{"step": 881, "train/loss": 2.494201183319092, "train/lm_loss": 2.494201183319092, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.911623671905784e-05, "perf/tokens_per_sec": 25907.593311024684, "train/loss_prose": 3.2787962555885315, "train/loss_code": 1.411514163017273, "train/loss_math": 2.6038811206817627} +{"step": 882, "train/loss": 2.5567841827869415, "train/lm_loss": 2.5567841827869415, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9111631265622184e-05, "perf/tokens_per_sec": 26682.947609077863, "train/loss_code": 1.468929573893547, "train/loss_prose": 3.644638955593109} +{"step": 883, "train/loss": 2.837165594100952, "train/lm_loss": 2.837165594100952, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.910701406047037e-05, "perf/tokens_per_sec": 25800.906769110163, "train/loss_math": 2.5755873322486877, "train/loss_prose": 3.546388785044352, "train/loss_code": 1.7558084726333618} +{"step": 884, "train/loss": 2.117469072341919, "train/lm_loss": 2.117469072341919, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.910238510585276e-05, "perf/tokens_per_sec": 26237.38012724768, "train/loss_code": 1.4920732736587525, "train/loss_prose": 3.727251410484314, "train/loss_math": 2.02488374710083} +{"step": 885, "train/loss": 2.335432380437851, "train/lm_loss": 2.335432380437851, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9097744404025435e-05, "perf/tokens_per_sec": 25140.76916120214, "train/loss_prose": 3.7639834880828857, "train/loss_math": 2.540701985359192, "train/loss_code": 1.5185221135616302} +{"step": 886, "train/loss": 2.778057038784027, "train/lm_loss": 2.778057038784027, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.909309195725025e-05, "perf/tokens_per_sec": 25680.19274342183, "train/loss_math": 2.6649444103240967, "train/loss_code": 1.4405893882115681, "train/loss_prose": 4.190932671229045} +{"step": 887, "train/loss": 2.224974125623703, "train/lm_loss": 2.224974125623703, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.908842776779472e-05, "perf/tokens_per_sec": 25702.70685974264, "train/loss_code": 1.5488589107990265, "train/loss_prose": 3.2464334964752197, "train/loss_math": 2.555745244026184} +{"step": 888, "train/loss": 2.7234357595443726, "train/lm_loss": 2.7234357595443726, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9083751837932126e-05, "perf/tokens_per_sec": 26031.80083550898, "train/loss_math": 2.8078232606252036, "train/loss_prose": 3.2498584985733032, "train/loss_code": 2.288099447886149} +{"step": 889, "train/loss": 2.5991818010807037, "train/lm_loss": 2.5991818010807037, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.907906416994146e-05, "perf/tokens_per_sec": 26145.97904957577, "train/loss_code": 1.715793291727702, "train/loss_prose": 3.551706393559774, "train/loss_math": 2.4954776763916016} +{"step": 890, "train/loss": 2.687839686870575, "train/lm_loss": 2.687839686870575, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.907436476610743e-05, "perf/tokens_per_sec": 26131.940203337857, "train/loss_math": 2.495106887817383, "train/loss_code": 2.000082492828369, "train/loss_prose": 3.513550639152527} +{"step": 891, "train/loss": 2.4532052874565125, "train/lm_loss": 2.4532052874565125, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.906965362872047e-05, "perf/tokens_per_sec": 26818.403346862317, "train/loss_prose": 3.3099666436513266, "train/loss_math": 2.3859774271647134, "train/loss_code": 1.2689048051834106} +{"step": 892, "train/loss": 2.7139195799827576, "train/lm_loss": 2.7139195799827576, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.906493076007674e-05, "perf/tokens_per_sec": 26178.64882400896, "train/loss_math": 2.440447727839152, "train/loss_prose": 3.6465868949890137, "train/loss_code": 1.725126564502716} +{"step": 893, "train/loss": 2.9731063842773438, "train/lm_loss": 2.9731063842773438, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.90601961624781e-05, "perf/tokens_per_sec": 25366.53921876961, "train/loss_prose": 3.673206865787506, "train/loss_math": 2.679264465967814, "train/loss_code": 1.0542298555374146} +{"step": 894, "train/loss": 2.8259094059467316, "train/lm_loss": 2.8259094059467316, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.905544983823214e-05, "perf/tokens_per_sec": 26659.55302855139, "train/loss_math": 2.623538374900818, "train/loss_code": 1.6947338581085205, "train/loss_prose": 3.492682933807373} +{"step": 895, "train/loss": 2.1666440814733505, "train/lm_loss": 2.1666440814733505, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.905069178965215e-05, "perf/tokens_per_sec": 25759.435603169724, "train/loss_math": 2.5540215969085693, "train/loss_prose": 3.0183868408203125, "train/loss_code": 1.3662264347076416} +{"step": 896, "train/loss": 2.675863415002823, "train/lm_loss": 2.675863415002823, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9045922019057155e-05, "perf/tokens_per_sec": 25903.296414069908, "train/loss_math": 2.675351142883301, "train/loss_prose": 3.512419581413269, "train/loss_code": 1.8403319120407104} +{"step": 897, "train/loss": 2.093231976032257, "train/lm_loss": 2.093231976032257, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.904114052877188e-05, "perf/tokens_per_sec": 25528.318645770334, "train/loss_math": 2.4111107190450034, "train/loss_prose": 3.1891989707946777, "train/loss_code": 1.5808310210704803} +{"step": 898, "train/loss": 2.6367759108543396, "train/lm_loss": 2.6367759108543396, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.903634732112678e-05, "perf/tokens_per_sec": 26450.711209305548, "train/loss_math": 2.5031336148579917, "train/loss_prose": 3.54742169380188, "train/loss_code": 1.471270203590393} +{"step": 899, "train/loss": 3.0066897869110107, "train/lm_loss": 3.0066897869110107, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9031542398457974e-05, "perf/tokens_per_sec": 25692.290136657703, "train/loss_prose": 3.77755184173584, "train/loss_math": 2.243892192840576, "train/loss_code": 1.4609341621398926} +{"step": 900, "train/loss": 2.81984943151474, "train/lm_loss": 2.81984943151474, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.902672576310735e-05, "perf/tokens_per_sec": 26844.006981369992, "train/loss_code": 1.8207319974899292, "train/loss_math": 2.464613676071167, "train/loss_prose": 3.841163476308187} +{"step": 901, "train/loss": 2.549904763698578, "train/lm_loss": 2.549904763698578, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.902189741742247e-05, "perf/tokens_per_sec": 24936.380080934992, "train/loss_prose": 3.537201762199402, "train/loss_code": 1.1159790754318237, "train/loss_math": 2.4417709589004515} +{"step": 902, "train/loss": 2.818226009607315, "train/lm_loss": 2.818226009607315, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.90170573637566e-05, "perf/tokens_per_sec": 25844.922041914826, "train/loss_prose": 3.7791677117347717, "train/loss_math": 2.5070641040802, "train/loss_code": 1.2075042426586151} +{"step": 903, "train/loss": 2.2991470992565155, "train/lm_loss": 2.2991470992565155, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9012205604468744e-05, "perf/tokens_per_sec": 25569.12450234336, "train/loss_math": 2.358001470565796, "train/loss_code": 1.893556535243988, "train/loss_prose": 3.7449464797973633} +{"step": 904, "train/loss": 2.2261655628681183, "train/lm_loss": 2.2261655628681183, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.900734214192358e-05, "perf/tokens_per_sec": 25746.889044750173, "train/loss_prose": 3.692641258239746, "train/loss_code": 1.332031711935997, "train/loss_math": 2.547956943511963} +{"step": 905, "train/loss": 2.9889703392982483, "train/lm_loss": 2.9889703392982483, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.90024669784915e-05, "perf/tokens_per_sec": 26784.911754113262, "train/loss_prose": 3.390911817550659, "train/loss_math": 2.587028741836548} +{"step": 906, "train/loss": 2.609736442565918, "train/lm_loss": 2.609736442565918, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.8997580116548595e-05, "perf/tokens_per_sec": 25265.739637364753, "train/loss_math": 2.3552348613739014, "train/loss_prose": 3.836127281188965, "train/loss_code": 1.8923484086990356} +{"step": 907, "train/loss": 2.464220702648163, "train/lm_loss": 2.464220702648163, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.899268155847667e-05, "perf/tokens_per_sec": 25515.541405890304, "train/loss_prose": 3.825570027033488, "train/loss_code": 1.3425050377845764, "train/loss_math": 2.867035388946533} +{"step": 908, "train/loss": 2.324521690607071, "train/lm_loss": 2.324521690607071, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.898777130666322e-05, "perf/tokens_per_sec": 26486.18897222629, "train/loss_math": 2.648979961872101, "train/loss_code": 1.4447753032048543, "train/loss_prose": 3.6659281253814697} +{"step": 909, "train/loss": 2.426737755537033, "train/lm_loss": 2.426737755537033, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.898284936350144e-05, "perf/tokens_per_sec": 25770.718636511596, "train/loss_code": 1.573302298784256, "train/loss_prose": 4.093929767608643, "train/loss_math": 2.466416358947754} +{"step": 910, "train/loss": 2.877941608428955, "train/lm_loss": 2.877941608428955, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.897791573139023e-05, "perf/tokens_per_sec": 25788.43592095289, "train/loss_math": 2.529073715209961, "train/loss_prose": 3.757322132587433, "train/loss_code": 1.4680479168891907} +{"step": 911, "train/loss": 2.7306156754493713, "train/lm_loss": 2.7306156754493713, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.8972970412734176e-05, "perf/tokens_per_sec": 25966.60467281527, "train/loss_math": 2.3174667358398438, "train/loss_prose": 3.621820569038391, "train/loss_code": 1.3613542318344116} +{"step": 912, "train/loss": 2.544873207807541, "train/lm_loss": 2.544873207807541, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.896801340994357e-05, "perf/tokens_per_sec": 25762.68039487082, "train/loss_code": 1.6933329105377197, "train/loss_prose": 4.103005409240723, "train/loss_math": 2.3576582272847495} +{"step": 913, "train/loss": 2.4549233317375183, "train/lm_loss": 2.4549233317375183, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.89630447254344e-05, "perf/tokens_per_sec": 25818.122392056735, "train/loss_code": 1.9126877387364705, "train/loss_math": 2.191693345705668, "train/loss_prose": 3.6631215810775757} +{"step": 914, "train/loss": 2.0187088549137115, "train/lm_loss": 2.0187088549137115, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.895806436162833e-05, "perf/tokens_per_sec": 25618.30990040396, "train/loss_code": 1.3949077725410461, "train/loss_prose": 3.8901116847991943} +{"step": 915, "train/loss": 2.940681129693985, "train/lm_loss": 2.940681129693985, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.8953072320952745e-05, "perf/tokens_per_sec": 26313.211054083404, "train/loss_code": 1.838139812151591, "train/loss_prose": 3.8484652042388916, "train/loss_math": 2.61716890335083} +{"step": 916, "train/loss": 2.376820683479309, "train/lm_loss": 2.376820683479309, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.8948068605840694e-05, "perf/tokens_per_sec": 25887.059210694842, "train/loss_math": 2.5030261278152466, "train/loss_code": 1.591499924659729, "train/loss_prose": 3.078004757563273} +{"step": 917, "train/loss": 3.0400325655937195, "train/lm_loss": 3.0400325655937195, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.894305321873092e-05, "perf/tokens_per_sec": 26627.81866286414, "train/loss_math": 2.625629186630249, "train/loss_prose": 3.580370616912842, "train/loss_code": 1.1671488285064697} +{"step": 918, "train/loss": 2.437476009130478, "train/lm_loss": 2.437476009130478, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.893802616206787e-05, "perf/tokens_per_sec": 25670.56138568295, "train/loss_prose": 2.7036021947860718, "train/loss_math": 2.460324215888977, "train/loss_code": 1.7909828424453735} +{"step": 919, "train/loss": 2.147125244140625, "train/lm_loss": 2.147125244140625, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.893298743830168e-05, "perf/tokens_per_sec": 26297.14033105872, "train/loss_math": 2.4317426681518555, "train/loss_code": 1.7355339765548705, "train/loss_prose": 3.6358468532562256} +{"step": 920, "train/loss": 2.6439377665519714, "train/lm_loss": 2.6439377665519714, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.892793704988816e-05, "perf/tokens_per_sec": 25756.693618947957, "train/loss_math": 2.1086511611938477, "train/loss_prose": 3.3889516592025757, "train/loss_code": 1.8290149370829265} +{"step": 921, "train/loss": 3.0711886286735535, "train/lm_loss": 3.0711886286735535, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.892287499928879e-05, "perf/tokens_per_sec": 25862.391136267837, "train/loss_code": 1.8360393047332764, "train/loss_prose": 3.812277889251709} +{"step": 922, "train/loss": 2.6989020705223083, "train/lm_loss": 2.6989020705223083, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.891780128897077e-05, "perf/tokens_per_sec": 26187.387862861186, "train/loss_prose": 3.502063512802124, "train/loss_code": 1.3602997064590454} +{"step": 923, "train/loss": 2.7153866291046143, "train/lm_loss": 2.7153866291046143, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.891271592140695e-05, "perf/tokens_per_sec": 25944.409652964452, "train/loss_prose": 3.302006423473358, "train/loss_code": 1.9198681712150574, "train/loss_math": 2.3376652002334595} +{"step": 924, "train/loss": 2.3395567536354065, "train/lm_loss": 2.3395567536354065, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.890761889907589e-05, "perf/tokens_per_sec": 26016.58408433193, "train/loss_math": 2.4836395382881165, "train/loss_code": 1.661190390586853, "train/loss_prose": 3.7983250617980957} +{"step": 925, "train/loss": 3.1815711855888367, "train/lm_loss": 3.1815711855888367, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.890251022446181e-05, "perf/tokens_per_sec": 26511.57102161373, "train/loss_math": 2.62131404876709, "train/loss_prose": 3.625891168912252, "train/loss_code": 1.0759085416793823} +{"step": 926, "train/loss": 2.5313994884490967, "train/lm_loss": 2.5313994884490967, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.889738990005462e-05, "perf/tokens_per_sec": 26289.25312856736, "train/loss_prose": 3.5906894207000732, "train/loss_code": 2.08536097407341, "train/loss_math": 2.364186644554138} +{"step": 927, "train/loss": 2.7876230478286743, "train/lm_loss": 2.7876230478286743, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.889225792834991e-05, "perf/tokens_per_sec": 26637.851440904764, "train/loss_prose": 3.5946398973464966, "train/loss_math": 2.9128143787384033, "train/loss_code": 1.0483982861042023} +{"step": 928, "train/loss": 2.8775227069854736, "train/lm_loss": 2.8775227069854736, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.8887114311848915e-05, "perf/tokens_per_sec": 26745.298417843205, "train/loss_code": 2.3584439357121787, "train/loss_prose": 4.133120059967041, "train/loss_math": 2.5595362186431885} +{"step": 929, "train/loss": 2.446460634469986, "train/lm_loss": 2.446460634469986, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.888195905305859e-05, "perf/tokens_per_sec": 26057.504685229622, "train/loss_code": 1.1883507370948792, "train/loss_math": 2.5564186573028564, "train/loss_prose": 3.1752423445383706} +{"step": 930, "train/loss": 2.3733801543712616, "train/lm_loss": 2.3733801543712616, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.8876792154491556e-05, "perf/tokens_per_sec": 25543.045390545434, "train/loss_code": 1.4862414598464966, "train/loss_math": 2.2341605027516684, "train/loss_prose": 3.9129170179367065} +{"step": 931, "train/loss": 2.4539812207221985, "train/lm_loss": 2.4539812207221985, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.887161361866608e-05, "perf/tokens_per_sec": 26825.228998324583, "train/loss_code": 1.8471081733703614, "train/loss_prose": 3.4654363791147866} +{"step": 932, "train/loss": 2.620781421661377, "train/lm_loss": 2.620781421661377, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.886642344810611e-05, "perf/tokens_per_sec": 25683.571185743865, "train/loss_math": 2.6320059299468994, "train/loss_prose": 3.1891874074935913, "train/loss_code": 1.4278476238250732} +{"step": 933, "train/loss": 2.801074266433716, "train/lm_loss": 2.801074266433716, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.886122164534131e-05, "perf/tokens_per_sec": 25705.898911306493, "train/loss_math": 2.720974604288737, "train/loss_code": 1.7741003036499023, "train/loss_prose": 3.5658226013183594} +{"step": 934, "train/loss": 2.963341534137726, "train/lm_loss": 2.963341534137726, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.8856008212906925e-05, "perf/tokens_per_sec": 25436.54685734845, "train/loss_math": 2.630100965499878, "train/loss_code": 2.3102664947509766, "train/loss_prose": 3.625353733698527} +{"step": 935, "train/loss": 2.339396357536316, "train/lm_loss": 2.339396357536316, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.885078315334395e-05, "perf/tokens_per_sec": 26107.239851075145, "train/loss_code": 1.463726282119751, "train/loss_math": 2.51775221824646, "train/loss_prose": 3.1989574432373047} +{"step": 936, "train/loss": 2.5134338438510895, "train/lm_loss": 2.5134338438510895, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.884554646919901e-05, "perf/tokens_per_sec": 25847.91617869201, "train/loss_math": 2.5978564421335855, "train/loss_prose": 3.1596880753835044, "train/loss_code": 1.417418122291565} +{"step": 937, "train/loss": 2.526162326335907, "train/lm_loss": 2.526162326335907, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.88402981630244e-05, "perf/tokens_per_sec": 25629.24020807879, "train/loss_prose": 3.2323557138442993, "train/loss_code": 1.819968968629837} +{"step": 938, "train/loss": 2.479581296443939, "train/lm_loss": 2.479581296443939, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.883503823737808e-05, "perf/tokens_per_sec": 25933.1306327249, "train/loss_math": 2.4156509240468345, "train/loss_prose": 3.507782816886902, "train/loss_code": 1.8580442269643147} +{"step": 939, "train/loss": 2.2427602410316467, "train/lm_loss": 2.2427602410316467, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.882976669482367e-05, "perf/tokens_per_sec": 26120.497576461574, "train/loss_code": 1.1299297213554382, "train/loss_prose": 3.1065407594045005, "train/loss_math": 2.6163347959518433} +{"step": 940, "train/loss": 2.975081741809845, "train/lm_loss": 2.975081741809845, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.882448353793048e-05, "perf/tokens_per_sec": 25495.39682002606, "train/loss_prose": 3.702430486679077, "train/loss_math": 2.3899068037668862, "train/loss_code": 1.821210265159607} +{"step": 941, "train/loss": 2.6141878068447113, "train/lm_loss": 2.6141878068447113, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.8819188769273414e-05, "perf/tokens_per_sec": 25557.484969614925, "train/loss_code": 1.093480110168457, "train/loss_prose": 3.4880223274230957, "train/loss_math": 2.387225866317749} +{"step": 942, "train/loss": 2.3710930943489075, "train/lm_loss": 2.3710930943489075, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.881388239143311e-05, "perf/tokens_per_sec": 26544.66921505672, "train/loss_math": 2.5272408723831177, "train/loss_prose": 3.572584390640259, "train/loss_code": 1.6922736167907715} +{"step": 943, "train/loss": 2.6715996265411377, "train/lm_loss": 2.6715996265411377, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.880856440699582e-05, "perf/tokens_per_sec": 25924.286599848496, "train/loss_prose": 3.3368846575419107, "train/loss_code": 1.1868599653244019, "train/loss_math": 2.5438206791877747} +{"step": 944, "train/loss": 2.680136203765869, "train/lm_loss": 2.680136203765869, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.880323481855347e-05, "perf/tokens_per_sec": 25612.046376344904, "train/loss_math": 2.5225824117660522, "train/loss_prose": 3.633922735850016, "train/loss_code": 1.8313855330149333} +{"step": 945, "train/loss": 1.9970018565654755, "train/lm_loss": 1.9970018565654755, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.8797893628703635e-05, "perf/tokens_per_sec": 26904.880319353026, "train/loss_prose": 3.4276602268218994, "train/loss_math": 2.005721390247345, "train/loss_code": 1.7073824882507325} +{"step": 946, "train/loss": 2.7669886350631714, "train/lm_loss": 2.7669886350631714, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.879254084004955e-05, "perf/tokens_per_sec": 26766.71675830468, "train/loss_prose": 3.506026268005371, "train/loss_math": 2.5435830950737, "train/loss_code": 2.4747616052627563} +{"step": 947, "train/loss": 2.2451247572898865, "train/lm_loss": 2.2451247572898865, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.878717645520008e-05, "perf/tokens_per_sec": 25903.491696646393, "train/loss_math": 2.7812726497650146, "train/loss_code": 1.2241977900266647, "train/loss_prose": 3.7508310079574585} +{"step": 948, "train/loss": 2.6789146959781647, "train/lm_loss": 2.6789146959781647, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.878180047676978e-05, "perf/tokens_per_sec": 25397.664496958318, "train/loss_prose": 3.4469641844431558, "train/loss_math": 2.5313225984573364, "train/loss_code": 0.9651340842247009} +{"step": 949, "train/loss": 2.7206687331199646, "train/lm_loss": 2.7206687331199646, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.877641290737884e-05, "perf/tokens_per_sec": 25459.729727646987, "train/loss_math": 2.329859733581543, "train/loss_prose": 3.786733627319336, "train/loss_code": 1.4295182625452678} +{"step": 950, "train/loss": 2.8564348220825195, "train/lm_loss": 2.8564348220825195, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.877101374965308e-05, "perf/tokens_per_sec": 26012.0934201621, "train/loss_math": 2.5678292751312255, "train/loss_code": 2.4072084426879883, "train/loss_prose": 3.8025625944137573} +{"step": 951, "train/loss": 1.9225787818431854, "train/lm_loss": 1.9225787818431854, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.8765603006224006e-05, "perf/tokens_per_sec": 25937.437434136828, "train/loss_code": 1.5133429169654846, "train/loss_math": 2.6046385765075684} +{"step": 952, "train/loss": 2.638668954372406, "train/lm_loss": 2.638668954372406, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.876018067972872e-05, "perf/tokens_per_sec": 26910.443280091604, "train/loss_prose": 3.3432024121284485, "train/loss_code": 1.9341353476047516} +{"step": 953, "train/loss": 2.627063661813736, "train/lm_loss": 2.627063661813736, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.875474677281002e-05, "perf/tokens_per_sec": 25491.349024926218, "train/loss_prose": 3.4016359647115073, "train/loss_math": 2.9055092334747314, "train/loss_code": 1.6668610175450642} +{"step": 954, "train/loss": 2.988694667816162, "train/lm_loss": 2.988694667816162, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.874930128811631e-05, "perf/tokens_per_sec": 25646.150859630292, "train/loss_prose": 3.4483224153518677, "train/loss_math": 2.489250818888346, "train/loss_code": 2.648515224456787} +{"step": 955, "train/loss": 2.0062361657619476, "train/lm_loss": 2.0062361657619476, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.874384422830167e-05, "perf/tokens_per_sec": 25782.321092890597, "train/loss_code": 1.4469926476478576, "train/loss_math": 2.7538598775863647, "train/loss_prose": 3.307206392288208} +{"step": 956, "train/loss": 2.7293026447296143, "train/lm_loss": 2.7293026447296143, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.87383755960258e-05, "perf/tokens_per_sec": 25904.272856397558, "train/loss_code": 1.6004489660263062, "train/loss_math": 2.6129912535349527, "train/loss_prose": 3.598182996114095} +{"step": 957, "train/loss": 2.890489935874939, "train/lm_loss": 2.890489935874939, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.8732895393954036e-05, "perf/tokens_per_sec": 25866.557634833094, "train/loss_code": 1.8760153849919636, "train/loss_prose": 3.742272436618805, "train/loss_math": 2.5267820358276367} +{"step": 958, "train/loss": 2.65746408700943, "train/lm_loss": 2.65746408700943, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.8727403624757365e-05, "perf/tokens_per_sec": 25487.56720060411, "train/loss_math": 2.3338257471720376, "train/loss_prose": 3.736956516901652, "train/loss_code": 1.5236828327178955} +{"step": 959, "train/loss": 2.918367624282837, "train/lm_loss": 2.918367624282837, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.8721900291112415e-05, "perf/tokens_per_sec": 25622.780262851757, "train/loss_prose": 3.6202540397644043, "train/loss_math": 2.5029280185699463, "train/loss_code": 1.9300343990325928} +{"step": 960, "train/loss": 1.7216759324073792, "train/lm_loss": 1.7216759324073792, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.8716385395701435e-05, "perf/tokens_per_sec": 26852.062738649915, "train/loss_math": 2.6592823266983032, "train/loss_code": 1.4091404577096303} +{"step": 961, "train/loss": 2.4768486618995667, "train/lm_loss": 2.4768486618995667, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.871085894121233e-05, "perf/tokens_per_sec": 25609.41202846572, "train/loss_math": 2.548817813396454, "train/loss_prose": 3.5171104669570923, "train/loss_code": 1.2926485538482666} +{"step": 962, "train/loss": 2.5847604870796204, "train/lm_loss": 2.5847604870796204, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.8705320930338615e-05, "perf/tokens_per_sec": 25863.792799031682, "train/loss_prose": 3.746219515800476, "train/loss_math": 2.2860230207443237, "train/loss_code": 2.020776391029358} +{"step": 963, "train/loss": 2.7183457016944885, "train/lm_loss": 2.7183457016944885, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.8699771365779453e-05, "perf/tokens_per_sec": 25711.939057151878, "train/loss_math": 2.6046364307403564, "train/loss_code": 1.8084346850713093, "train/loss_prose": 3.7040624618530273} +{"step": 964, "train/loss": 1.9802154898643494, "train/lm_loss": 1.9802154898643494, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.869421025023965e-05, "perf/tokens_per_sec": 25877.077199530653, "train/loss_code": 1.4956111669540406, "train/loss_prose": 3.3574979305267334, "train/loss_math": 2.503084897994995} +{"step": 965, "train/loss": 1.6623661071062088, "train/lm_loss": 1.6623661071062088, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.8688637586429595e-05, "perf/tokens_per_sec": 25835.011344633855, "train/loss_code": 1.348908434311549, "train/loss_math": 2.6027389764785767} +{"step": 966, "train/loss": 2.4028176069259644, "train/lm_loss": 2.4028176069259644, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.8683053377065356e-05, "perf/tokens_per_sec": 24893.34638964035, "train/loss_math": 2.504442811012268, "train/loss_code": 1.8361310561498005, "train/loss_prose": 3.6963770389556885} +{"step": 967, "train/loss": 2.0788739919662476, "train/lm_loss": 2.0788739919662476, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.867745762486861e-05, "perf/tokens_per_sec": 25479.024409921767, "train/loss_code": 1.3298065185546875, "train/loss_prose": 3.747791886329651, "train/loss_math": 2.486375093460083} +{"step": 968, "train/loss": 2.6965164840221405, "train/lm_loss": 2.6965164840221405, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.867185033256665e-05, "perf/tokens_per_sec": 24202.930273052832, "train/loss_code": 1.7998353640238445, "train/loss_prose": 3.4265148043632507, "train/loss_math": 2.466566562652588} +{"step": 969, "train/loss": 2.273077577352524, "train/lm_loss": 2.273077577352524, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.8666231502892415e-05, "perf/tokens_per_sec": 25853.08950383661, "train/loss_math": 2.5923526287078857, "train/loss_code": 1.7758979201316833, "train/loss_prose": 1.6710624694824219} +{"step": 970, "train/loss": 2.8205674290657043, "train/lm_loss": 2.8205674290657043, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.866060113858444e-05, "perf/tokens_per_sec": 25807.340552834095, "train/loss_prose": 3.6621365547180176, "train/loss_code": 1.7701783180236816, "train/loss_math": 2.6054575443267822} +{"step": 971, "train/loss": 1.9473379254341125, "train/lm_loss": 1.9473379254341125, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.8654959242386896e-05, "perf/tokens_per_sec": 25128.744378168543, "train/loss_code": 1.5744117101033528, "train/loss_prose": 3.761674404144287, "train/loss_math": 2.3705592155456543} +{"step": 972, "train/loss": 2.2563620805740356, "train/lm_loss": 2.2563620805740356, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.8649305817049596e-05, "perf/tokens_per_sec": 27028.351956502585, "train/loss_math": 2.5055902004241943, "train/loss_code": 1.8228166103363037, "train/loss_prose": 3.9256319999694824} +{"step": 973, "train/loss": 2.723302185535431, "train/lm_loss": 2.723302185535431, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.864364086532792e-05, "perf/tokens_per_sec": 25380.067460082966, "train/loss_math": 2.584370493888855, "train/loss_code": 1.8124093413352966, "train/loss_prose": 3.9120585918426514} +{"step": 974, "train/loss": 2.616841435432434, "train/lm_loss": 2.616841435432434, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.8637964389982926e-05, "perf/tokens_per_sec": 25660.860592355155, "train/loss_prose": 3.7249419689178467, "train/loss_math": 2.6761319637298584, "train/loss_code": 1.4692139228185017} +{"step": 975, "train/loss": 2.474367380142212, "train/lm_loss": 2.474367380142212, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.863227639378124e-05, "perf/tokens_per_sec": 26396.212289850442, "train/loss_math": 2.559658575057983, "train/loss_code": 1.8963584899902344, "train/loss_prose": 3.203929901123047} +{"step": 976, "train/loss": 2.1865948140621185, "train/lm_loss": 2.1865948140621185, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.862657687949512e-05, "perf/tokens_per_sec": 26371.414863275913, "train/loss_code": 1.7468869924545287, "train/loss_math": 2.699800133705139, "train/loss_prose": 3.3587229251861572} +{"step": 977, "train/loss": 2.3949158787727356, "train/lm_loss": 2.3949158787727356, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.8620865849902456e-05, "perf/tokens_per_sec": 25089.990483840345, "train/loss_math": 2.4829780714852467, "train/loss_code": 1.7784804105758667} +{"step": 978, "train/loss": 2.49959397315979, "train/lm_loss": 2.49959397315979, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.861514330778673e-05, "perf/tokens_per_sec": 25731.579514631016, "train/loss_code": 1.59152290225029, "train/loss_math": 2.554624557495117, "train/loss_prose": 3.6920113563537598} +{"step": 979, "train/loss": 2.4415228068828583, "train/lm_loss": 2.4415228068828583, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.860940925593703e-05, "perf/tokens_per_sec": 25262.433070217747, "train/loss_code": 1.4995092749595642, "train/loss_prose": 3.6875151793162027, "train/loss_math": 2.471599817276001} +{"step": 980, "train/loss": 2.0624958276748657, "train/lm_loss": 2.0624958276748657, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.860366369714807e-05, "perf/tokens_per_sec": 25599.0707734763, "train/loss_code": 1.5341237545013429, "train/loss_prose": 3.6616837978363037, "train/loss_math": 2.583831787109375} +{"step": 981, "train/loss": 2.370541602373123, "train/lm_loss": 2.370541602373123, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.859790663422016e-05, "perf/tokens_per_sec": 26907.535081866565, "train/loss_code": 1.4499346315860748, "train/loss_prose": 3.9127529859542847, "train/loss_math": 2.669544219970703} +{"step": 982, "train/loss": 2.742352843284607, "train/lm_loss": 2.742352843284607, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.859213806995924e-05, "perf/tokens_per_sec": 26164.415490426614, "train/loss_code": 1.9429776271184285, "train/loss_prose": 3.6605809529622397, "train/loss_math": 2.56407368183136} +{"step": 983, "train/loss": 2.581023633480072, "train/lm_loss": 2.581023633480072, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.858635800717681e-05, "perf/tokens_per_sec": 26738.72145604186, "train/loss_math": 2.4349889755249023, "train/loss_code": 1.871734082698822, "train/loss_prose": 3.199917713801066} +{"step": 984, "train/loss": 2.926189571619034, "train/lm_loss": 2.926189571619034, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.858056644869002e-05, "perf/tokens_per_sec": 25818.626799277434, "train/loss_prose": 4.169868111610413, "train/loss_math": 2.2330050468444824, "train/loss_code": 1.4990132649739583} +{"step": 985, "train/loss": 2.543488323688507, "train/lm_loss": 2.543488323688507, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.8574763397321614e-05, "perf/tokens_per_sec": 25942.842535713207, "train/loss_math": 2.5952018896738687, "train/loss_code": 1.789240042368571, "train/loss_prose": 3.5972907543182373} +{"step": 986, "train/loss": 2.343996375799179, "train/lm_loss": 2.343996375799179, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.856894885589991e-05, "perf/tokens_per_sec": 26369.269565671188, "train/loss_code": 1.4903543889522552, "train/loss_math": 2.5490244388580323, "train/loss_prose": 3.026139497756958} +{"step": 987, "train/loss": 2.871647745370865, "train/lm_loss": 2.871647745370865, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.856312282725886e-05, "perf/tokens_per_sec": 25584.774685474647, "train/loss_prose": 4.12756872177124, "train/loss_code": 1.3957187334696453, "train/loss_math": 2.275749921798706} +{"step": 988, "train/loss": 2.6018046736717224, "train/lm_loss": 2.6018046736717224, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.855728531423798e-05, "perf/tokens_per_sec": 25597.88835778909, "train/loss_code": 1.859778344631195, "train/loss_prose": 3.796128590901693, "train/loss_math": 1.9869379997253418} +{"step": 989, "train/loss": 2.464376211166382, "train/lm_loss": 2.464376211166382, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.855143631968242e-05, "perf/tokens_per_sec": 23778.26691930691, "train/loss_math": 2.379751662413279, "train/loss_prose": 3.8182222843170166, "train/loss_code": 1.6182771921157837} +{"step": 990, "train/loss": 2.580737888813019, "train/lm_loss": 2.580737888813019, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.85455758464429e-05, "perf/tokens_per_sec": 24342.677777289726, "train/loss_math": 2.793230175971985, "train/loss_code": 1.4453092018763225, "train/loss_prose": 3.5745049317677817} +{"step": 991, "train/loss": 2.267631411552429, "train/lm_loss": 2.267631411552429, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.8539703897375755e-05, "perf/tokens_per_sec": 24489.77235457127, "train/loss_code": 1.4390901923179626, "train/loss_prose": 3.380199432373047, "train/loss_math": 2.8121460676193237} +{"step": 992, "train/loss": 2.272601842880249, "train/lm_loss": 2.272601842880249, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.85338204753429e-05, "perf/tokens_per_sec": 26938.715029189058, "train/loss_prose": 3.783806244532267, "train/loss_code": 1.3658792138099671} +{"step": 993, "train/loss": 2.9765239357948303, "train/lm_loss": 2.9765239357948303, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.852792558321182e-05, "perf/tokens_per_sec": 26228.727826785263, "train/loss_prose": 3.4679353713989256, "train/loss_code": 1.8719509840011597, "train/loss_math": 2.30028235912323} +{"step": 994, "train/loss": 3.0054051876068115, "train/lm_loss": 3.0054051876068115, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.852201922385564e-05, "perf/tokens_per_sec": 25524.980921491515, "train/loss_prose": 3.8861494064331055, "train/loss_math": 2.476958465576172} +{"step": 995, "train/loss": 2.6854367852211, "train/lm_loss": 2.6854367852211, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.851610140015304e-05, "perf/tokens_per_sec": 26018.278334090566, "train/loss_prose": 3.4796841144561768, "train/loss_code": 1.3399370908737183, "train/loss_math": 2.4424420595169067} +{"step": 996, "train/loss": 2.2892777025699615, "train/lm_loss": 2.2892777025699615, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.851017211498829e-05, "perf/tokens_per_sec": 26351.958057425618, "train/loss_prose": 3.367993950843811, "train/loss_math": 2.5259690284729004, "train/loss_code": 1.3334423502286274} +{"step": 997, "train/loss": 2.953788608312607, "train/lm_loss": 2.953788608312607, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.8504231371251255e-05, "perf/tokens_per_sec": 25716.55764339934, "train/loss_code": 1.97951873143514, "train/loss_prose": 3.799752116203308, "train/loss_math": 2.492744207382202} +{"step": 998, "train/loss": 2.5326643586158752, "train/lm_loss": 2.5326643586158752, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.849827917183739e-05, "perf/tokens_per_sec": 25759.04937288682, "train/loss_math": 2.692917585372925, "train/loss_code": 1.7103766798973083, "train/loss_prose": 3.57563050587972} +{"step": 999, "train/loss": 2.5890047550201416, "train/lm_loss": 2.5890047550201416, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.849231551964771e-05, "perf/tokens_per_sec": 26309.6649458565, "train/loss_prose": 3.452291568120321, "train/loss_code": 1.6659005085627239, "train/loss_math": 2.678730607032776} +{"step": 1000, "train/loss": 2.8020384907722473, "train/lm_loss": 2.8020384907722473, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.8486340417588835e-05, "perf/tokens_per_sec": 25669.44906488085, "train/loss_math": 2.718029260635376, "train/loss_code": 1.5220219492912292, "train/loss_prose": 3.4840510487556458} +{"step": 1000, "eval/loss": 2.300006696227199, "eval/lm_loss": 2.300006696227199, "eval/lb_loss": 0.0, "eval/z_loss": 0.0, "eval/perplexity": 9.974249244430183, "eval/loss_code": 1.656781525780621, "eval/ppl_code": 5.242411098368986, "eval/loss_prose": 3.4925600478523657, "eval/ppl_prose": 32.86998882999483, "eval/loss_math": 2.333277762448255, "eval/ppl_math": 10.311685455916157} +{"step": 1001, "train/loss": 3.2365445494651794, "train/lm_loss": 3.2365445494651794, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.848035386857296e-05, "perf/tokens_per_sec": 26410.211796753578, "train/loss_prose": 3.629649353027344, "train/loss_math": 2.5813697973887124} +{"step": 1002, "train/loss": 3.0657039880752563, "train/lm_loss": 3.0657039880752563, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.8474355875517854e-05, "perf/tokens_per_sec": 26779.69277005141, "train/loss_math": 2.625987410545349, "train/loss_prose": 3.54184947013855, "train/loss_code": 1.564409852027893} +{"step": 1003, "train/loss": 2.117709845304489, "train/lm_loss": 2.117709845304489, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.846834644134686e-05, "perf/tokens_per_sec": 26854.287314925972, "train/loss_prose": 3.9061391353607178, "train/loss_math": 2.2772095998128257, "train/loss_code": 1.550977736711502} +{"step": 1004, "train/loss": 2.665568858385086, "train/lm_loss": 2.665568858385086, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.84623255689889e-05, "perf/tokens_per_sec": 25935.75369602401, "train/loss_math": 2.451828718185425, "train/loss_code": 1.4363588094711304, "train/loss_prose": 4.829993844032288} +{"step": 1005, "train/loss": 2.787772238254547, "train/lm_loss": 2.787772238254547, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.845629326137849e-05, "perf/tokens_per_sec": 25502.701234619217, "train/loss_prose": 3.4814149538675943, "train/loss_math": 2.427172362804413, "train/loss_code": 2.149245262145996} +{"step": 1006, "train/loss": 3.4619133472442627, "train/lm_loss": 3.4619133472442627, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.8450249521455695e-05, "perf/tokens_per_sec": 25641.8636970834, "train/loss_prose": 3.8685779174168906, "train/loss_math": 2.5300354957580566, "train/loss_code": 1.9538031816482544} +{"step": 1007, "train/loss": 2.770082652568817, "train/lm_loss": 2.770082652568817, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.844419435216615e-05, "perf/tokens_per_sec": 26023.165313492158, "train/loss_prose": 3.382317860921224, "train/loss_math": 2.402741622924805} +{"step": 1008, "train/loss": 2.8802112340927124, "train/lm_loss": 2.8802112340927124, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.84381277564611e-05, "perf/tokens_per_sec": 25207.79596496119, "train/loss_prose": 3.6410438418388367, "train/loss_code": 1.925279974937439, "train/loss_math": 2.313476800918579} +{"step": 1009, "train/loss": 2.398882806301117, "train/lm_loss": 2.398882806301117, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.843204973729729e-05, "perf/tokens_per_sec": 24641.871396565028, "train/loss_code": 1.9821035464604695, "train/loss_prose": 3.015859603881836, "train/loss_math": 2.404344161351522} +{"step": 1010, "train/loss": 3.401242673397064, "train/lm_loss": 3.401242673397064, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.84259602976371e-05, "perf/tokens_per_sec": 25802.99933464402, "train/loss_prose": 3.5125582218170166, "train/loss_math": 2.6220340728759766} +{"step": 1011, "train/loss": 2.457594633102417, "train/lm_loss": 2.457594633102417, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.841985944044845e-05, "perf/tokens_per_sec": 26160.351666022045, "train/loss_prose": 3.5070255994796753, "train/loss_code": 1.693814992904663, "train/loss_math": 2.521753708521525} +{"step": 1012, "train/loss": 2.63791024684906, "train/lm_loss": 2.63791024684906, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.841374716870481e-05, "perf/tokens_per_sec": 26181.401590402722, "train/loss_code": 2.065597573916117, "train/loss_prose": 3.178556442260742, "train/loss_math": 2.685409665107727} +{"step": 1013, "train/loss": 3.1678212583065033, "train/lm_loss": 3.1678212583065033, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.8407623485385234e-05, "perf/tokens_per_sec": 26078.864397980466, "train/loss_code": 1.7191136479377747, "train/loss_math": 2.761195421218872, "train/loss_prose": 4.0954883098602295} +{"step": 1014, "train/loss": 2.6546406745910645, "train/lm_loss": 2.6546406745910645, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.840148839347434e-05, "perf/tokens_per_sec": 26441.71417489834, "train/loss_prose": 3.7859275341033936, "train/loss_math": 2.150142192840576, "train/loss_code": 1.859685977300008} +{"step": 1015, "train/loss": 2.533450037240982, "train/lm_loss": 2.533450037240982, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.839534189596228e-05, "perf/tokens_per_sec": 26306.562885107738, "train/loss_math": 2.594598114490509, "train/loss_prose": 3.581324815750122, "train/loss_code": 1.3632795214653015} +{"step": 1016, "train/loss": 2.7882176637649536, "train/lm_loss": 2.7882176637649536, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.8389183995844785e-05, "perf/tokens_per_sec": 26045.692776737593, "train/loss_prose": 3.633362054824829, "train/loss_code": 2.4181172847747803, "train/loss_math": 2.5241798400878905} +{"step": 1017, "train/loss": 2.2151016891002655, "train/lm_loss": 2.2151016891002655, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.8383014696123144e-05, "perf/tokens_per_sec": 26047.62777383573, "train/loss_math": 2.0632983446121216, "train/loss_code": 1.6190736889839172, "train/loss_prose": 3.5589611530303955} +{"step": 1018, "train/loss": 2.4352235198020935, "train/lm_loss": 2.4352235198020935, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.837683399980421e-05, "perf/tokens_per_sec": 26053.948134353257, "train/loss_code": 1.768236666917801, "train/loss_prose": 3.784456491470337, "train/loss_math": 2.4199644327163696} +{"step": 1019, "train/loss": 3.566002130508423, "train/lm_loss": 3.566002130508423, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.837064190990036e-05, "perf/tokens_per_sec": 26115.573975434756, "train/loss_prose": 3.8326605558395386, "train/loss_code": 2.7660272121429443} +{"step": 1020, "train/loss": 2.576371133327484, "train/lm_loss": 2.576371133327484, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.836443842942956e-05, "perf/tokens_per_sec": 26058.65089181027, "train/loss_prose": 3.6644831895828247, "train/loss_code": 1.7340699434280396, "train/loss_math": 2.453465759754181} +{"step": 1021, "train/loss": 2.7640746533870697, "train/lm_loss": 2.7640746533870697, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.8358223561415304e-05, "perf/tokens_per_sec": 25818.005993189283, "train/loss_prose": 4.2744832038879395, "train/loss_math": 2.230491558710734, "train/loss_code": 1.2988364100456238} +{"step": 1022, "train/loss": 2.0593677163124084, "train/lm_loss": 2.0593677163124084, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.835199730888664e-05, "perf/tokens_per_sec": 25896.073028084942, "train/loss_math": 2.5313108762105307, "train/loss_code": 1.4246075749397278, "train/loss_prose": 3.1825780868530273} +{"step": 1023, "train/loss": 2.9588189125061035, "train/lm_loss": 2.9588189125061035, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.834575967487817e-05, "perf/tokens_per_sec": 25971.315210719975, "train/loss_math": 2.5244011878967285, "train/loss_prose": 3.8262572288513184, "train/loss_code": 1.6583600044250488} +{"step": 1024, "train/loss": 2.7558473944664, "train/lm_loss": 2.7558473944664, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.8339510662430046e-05, "perf/tokens_per_sec": 26800.07922151261, "train/loss_prose": 3.7613569100697837, "train/loss_code": 1.9120774666468303, "train/loss_math": 2.513237953186035} +{"step": 1025, "train/loss": 2.7212586998939514, "train/lm_loss": 2.7212586998939514, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.833325027458795e-05, "perf/tokens_per_sec": 26205.163255749394, "train/loss_prose": 3.408825933933258, "train/loss_code": 1.7648128271102905, "train/loss_math": 2.3025707006454468} +{"step": 1026, "train/loss": 3.1474345326423645, "train/lm_loss": 3.1474345326423645, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.832697851440313e-05, "perf/tokens_per_sec": 26138.977626507985, "train/loss_prose": 3.496424436569214, "train/loss_math": 2.5657846132914224} +{"step": 1027, "train/loss": 2.4863712787628174, "train/lm_loss": 2.4863712787628174, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.832069538493237e-05, "perf/tokens_per_sec": 26206.282514418905, "train/loss_math": 2.4220465819040933, "train/loss_prose": 3.5623944997787476, "train/loss_code": 1.8333474000295003} +{"step": 1028, "train/loss": 2.197456330060959, "train/lm_loss": 2.197456330060959, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.831440088923797e-05, "perf/tokens_per_sec": 25685.26074027339, "train/loss_math": 2.521621525287628, "train/loss_prose": 2.5931291580200195, "train/loss_code": 1.6333453456560771} +{"step": 1029, "train/loss": 2.4880660474300385, "train/lm_loss": 2.4880660474300385, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.830809503038781e-05, "perf/tokens_per_sec": 22349.774268162906, "train/loss_prose": 3.3677890300750732, "train/loss_code": 1.7723811268806458, "train/loss_math": 2.406046986579895} +{"step": 1030, "train/loss": 2.8010627031326294, "train/lm_loss": 2.8010627031326294, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.8301777811455276e-05, "perf/tokens_per_sec": 24682.585260447795, "train/loss_math": 2.6427488327026367, "train/loss_code": 2.155834436416626, "train/loss_prose": 3.551833232243856} +{"step": 1031, "train/loss": 2.7631218433380127, "train/lm_loss": 2.7631218433380127, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.829544923551931e-05, "perf/tokens_per_sec": 25170.38367399563, "train/loss_math": 2.3193874835968016, "train/loss_prose": 3.502678950627645} +{"step": 1032, "train/loss": 2.144386947154999, "train/lm_loss": 2.144386947154999, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.828910930566439e-05, "perf/tokens_per_sec": 26303.703191530083, "train/loss_prose": 3.358064333597819, "train/loss_code": 1.2980658113956451, "train/loss_math": 1.8886386156082153} +{"step": 1033, "train/loss": 2.2766471207141876, "train/lm_loss": 2.2766471207141876, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.828275802498051e-05, "perf/tokens_per_sec": 25087.05945298696, "train/loss_math": 2.6055687268575034, "train/loss_code": 1.5840658843517303, "train/loss_prose": 4.060207366943359} +{"step": 1034, "train/loss": 2.4490338265895844, "train/lm_loss": 2.4490338265895844, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.827639539656321e-05, "perf/tokens_per_sec": 25862.313270568826, "train/loss_math": 2.2495423555374146, "train/loss_prose": 3.3977630138397217, "train/loss_code": 1.6332987149556477} +{"step": 1035, "train/loss": 2.2799477875232697, "train/lm_loss": 2.2799477875232697, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.8270021423513554e-05, "perf/tokens_per_sec": 26461.915167198065, "train/loss_math": 2.52911434173584, "train/loss_code": 1.8646697600682576} +{"step": 1036, "train/loss": 2.9263026118278503, "train/lm_loss": 2.9263026118278503, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.8263636108938156e-05, "perf/tokens_per_sec": 26637.355817624204, "train/loss_prose": 3.8978654543558755, "train/loss_code": 2.139394839604696, "train/loss_math": 2.649319648742676} +{"step": 1037, "train/loss": 2.900570571422577, "train/lm_loss": 2.900570571422577, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.8257239455949124e-05, "perf/tokens_per_sec": 27127.965361840546, "train/loss_prose": 3.6441866159439087, "train/loss_code": 1.9886068105697632, "train/loss_math": 2.661997079849243} +{"step": 1038, "train/loss": 2.474956065416336, "train/lm_loss": 2.474956065416336, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.82508314676641e-05, "perf/tokens_per_sec": 26122.44370479285, "train/loss_prose": 3.606149196624756, "train/loss_code": 1.6692403554916382, "train/loss_math": 2.3042402267456055} +{"step": 1039, "train/loss": 2.663716197013855, "train/lm_loss": 2.663716197013855, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.8244412147206284e-05, "perf/tokens_per_sec": 26466.480954156254, "train/loss_prose": 3.71570094426473, "train/loss_math": 2.3405359983444214, "train/loss_code": 1.827184756596883} +{"step": 1040, "train/loss": 2.4138570427894592, "train/lm_loss": 2.4138570427894592, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.8237981497704365e-05, "perf/tokens_per_sec": 26794.18568605647, "train/loss_prose": 3.4201346238454184, "train/loss_code": 1.6697090864181519, "train/loss_math": 2.371615409851074} +{"step": 1041, "train/loss": 2.6849972009658813, "train/lm_loss": 2.6849972009658813, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.8231539522292564e-05, "perf/tokens_per_sec": 27101.38991751208, "train/loss_math": 2.6477853775024416, "train/loss_prose": 3.7500091791152954, "train/loss_code": 0.7410321831703186} +{"step": 1042, "train/loss": 2.6054638624191284, "train/lm_loss": 2.6054638624191284, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.8225086224110615e-05, "perf/tokens_per_sec": 27043.115340237408, "train/loss_code": 1.493083159128825, "train/loss_prose": 3.5573734045028687, "train/loss_math": 2.1349685192108154} +{"step": 1043, "train/loss": 2.4977460503578186, "train/lm_loss": 2.4977460503578186, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.821862160630378e-05, "perf/tokens_per_sec": 26081.27945590211, "train/loss_math": 2.570204178492228, "train/loss_code": 1.4743935068448384, "train/loss_prose": 3.9240875244140625} +{"step": 1044, "train/loss": 2.903611332178116, "train/lm_loss": 2.903611332178116, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.8212145672022844e-05, "perf/tokens_per_sec": 27248.742922489582, "train/loss_prose": 4.06544295946757, "train/loss_math": 2.474721610546112, "train/loss_code": 1.1336736679077148} +{"step": 1045, "train/loss": 2.9821225702762604, "train/lm_loss": 2.9821225702762604, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.820565842442408e-05, "perf/tokens_per_sec": 27268.162010304204, "train/loss_code": 2.046003063519796, "train/loss_prose": 4.356021483739217, "train/loss_math": 2.325453281402588} +{"step": 1046, "train/loss": 2.576662003993988, "train/lm_loss": 2.576662003993988, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.819915986666932e-05, "perf/tokens_per_sec": 26211.520316371494, "train/loss_math": 2.4313024044036866, "train/loss_prose": 3.602095127105713, "train/loss_code": 2.427344262599945} +{"step": 1047, "train/loss": 2.483462691307068, "train/lm_loss": 2.483462691307068, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.8192650001925855e-05, "perf/tokens_per_sec": 26121.33159595253, "train/loss_prose": 3.478539228439331, "train/loss_code": 1.488385945558548} +{"step": 1048, "train/loss": 2.357855200767517, "train/lm_loss": 2.357855200767517, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.818612883336654e-05, "perf/tokens_per_sec": 26231.931717059437, "train/loss_code": 2.041349321603775, "train/loss_prose": 3.4115819931030273, "train/loss_math": 2.4286208152770996} +{"step": 1049, "train/loss": 2.474395275115967, "train/lm_loss": 2.474395275115967, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.817959636416969e-05, "perf/tokens_per_sec": 25976.223838734502, "train/loss_code": 1.8764820992946625, "train/loss_math": 2.2964805364608765, "train/loss_prose": 3.8481366634368896} +{"step": 1050, "train/loss": 2.6843161284923553, "train/lm_loss": 2.6843161284923553, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.817305259751916e-05, "perf/tokens_per_sec": 26033.418168372944, "train/loss_math": 2.7927163441975913, "train/loss_code": 1.4993022084236145, "train/loss_prose": 3.365924914677938} +{"step": 1051, "train/loss": 2.6733911633491516, "train/lm_loss": 2.6733911633491516, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.81664975366043e-05, "perf/tokens_per_sec": 26095.620647595555, "train/loss_math": 2.717071612675985, "train/loss_prose": 3.555791139602661, "train/loss_code": 1.5289080142974854} +{"step": 1052, "train/loss": 2.5415077805519104, "train/lm_loss": 2.5415077805519104, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.815993118461998e-05, "perf/tokens_per_sec": 26423.657172739437, "train/loss_math": 2.354637861251831, "train/loss_prose": 3.3750160932540894, "train/loss_code": 1.0613612532615662} +{"step": 1053, "train/loss": 2.7167423367500305, "train/lm_loss": 2.7167423367500305, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.8153353544766553e-05, "perf/tokens_per_sec": 25797.226227479812, "train/loss_math": 2.4912481904029846, "train/loss_prose": 3.516637166341146, "train/loss_code": 1.2190345525741577} +{"step": 1054, "train/loss": 2.7861528396606445, "train/lm_loss": 2.7861528396606445, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.814676462024988e-05, "perf/tokens_per_sec": 26268.832792560268, "train/loss_prose": 3.796913206577301, "train/loss_code": 1.367808997631073, "train/loss_math": 2.182976245880127} +{"step": 1055, "train/loss": 2.250123232603073, "train/lm_loss": 2.250123232603073, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.8140164414281306e-05, "perf/tokens_per_sec": 26429.87673245469, "train/loss_code": 1.881756567955017, "train/loss_math": 2.4322140216827393, "train/loss_prose": 3.7277750968933105} +{"step": 1056, "train/loss": 2.487845540046692, "train/lm_loss": 2.487845540046692, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.8133552930077716e-05, "perf/tokens_per_sec": 25456.221313746613, "train/loss_math": 2.3607453405857086, "train/loss_prose": 3.6725893020629883, "train/loss_code": 1.5573023557662964} +{"step": 1057, "train/loss": 3.419967234134674, "train/lm_loss": 3.419967234134674, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.812693017086145e-05, "perf/tokens_per_sec": 26900.83597411362, "train/loss_prose": 3.7831928730010986, "train/loss_math": 2.330289840698242} +{"step": 1058, "train/loss": 3.3849961161613464, "train/lm_loss": 3.3849961161613464, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.8120296139860376e-05, "perf/tokens_per_sec": 25622.818477802088, "train/loss_math": 2.7197908560434976, "train/loss_prose": 3.78411922454834} +{"step": 1059, "train/loss": 2.4892576038837433, "train/lm_loss": 2.4892576038837433, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.8113650840307834e-05, "perf/tokens_per_sec": 26144.347701166917, "train/loss_prose": 3.899787108103434, "train/loss_code": 1.6429400444030762} +{"step": 1060, "train/loss": 1.98495215177536, "train/lm_loss": 1.98495215177536, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.810699427544265e-05, "perf/tokens_per_sec": 25511.14626402901, "train/loss_code": 1.5771504878997802, "train/loss_math": 2.3281615376472473, "train/loss_prose": 3.337541341781616} +{"step": 1061, "train/loss": 2.3676513731479645, "train/lm_loss": 2.3676513731479645, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.810032644850917e-05, "perf/tokens_per_sec": 26107.27952477703, "train/loss_math": 2.430091063181559, "train/loss_prose": 3.5548274517059326, "train/loss_code": 1.513761321703593} +{"step": 1062, "train/loss": 2.678084760904312, "train/lm_loss": 2.678084760904312, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.8093647362757206e-05, "perf/tokens_per_sec": 26358.10346631645, "train/loss_code": 1.4967689911524455, "train/loss_math": 2.4415693283081055, "train/loss_prose": 4.017077128092448} +{"step": 1063, "train/loss": 2.6486948430538177, "train/lm_loss": 2.6486948430538177, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.808695702144206e-05, "perf/tokens_per_sec": 25927.14264111762, "train/loss_prose": 4.126225471496582, "train/loss_math": 2.3414610624313354, "train/loss_code": 1.3759868741035461} +{"step": 1064, "train/loss": 1.9013305306434631, "train/lm_loss": 1.9013305306434631, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.808025542782453e-05, "perf/tokens_per_sec": 26086.863515163295, "train/loss_prose": 1.726981282234192, "train/loss_code": 1.483818531036377, "train/loss_math": 2.5161295731862388} +{"step": 1065, "train/loss": 2.538109302520752, "train/lm_loss": 2.538109302520752, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.8073542585170877e-05, "perf/tokens_per_sec": 26090.904538151026, "train/loss_math": 2.5329158306121826, "train/loss_code": 1.7420222759246826, "train/loss_prose": 3.740029811859131} +{"step": 1066, "train/loss": 2.8155184984207153, "train/lm_loss": 2.8155184984207153, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.8066818496752875e-05, "perf/tokens_per_sec": 25818.782005464367, "train/loss_prose": 3.411430060863495, "train/loss_math": 2.2159597476323447, "train/loss_code": 2.2305476665496826} +{"step": 1067, "train/loss": 2.567118465900421, "train/lm_loss": 2.567118465900421, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.8060083165847754e-05, "perf/tokens_per_sec": 26004.927326421843, "train/loss_math": 2.382514476776123, "train/loss_code": 2.1078628301620483, "train/loss_prose": 3.3955819606781006} +{"step": 1068, "train/loss": 2.390382021665573, "train/lm_loss": 2.390382021665573, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.8053336595738236e-05, "perf/tokens_per_sec": 26561.701050261985, "train/loss_code": 2.0388720333576202, "train/loss_math": 2.426748514175415, "train/loss_prose": 3.6873221397399902} +{"step": 1069, "train/loss": 3.128729283809662, "train/lm_loss": 3.128729283809662, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.8046578789712515e-05, "perf/tokens_per_sec": 26505.476529823518, "train/loss_prose": 3.854692280292511, "train/loss_code": 2.6605608463287354, "train/loss_math": 2.3168343702952066} +{"step": 1070, "train/loss": 2.955330491065979, "train/lm_loss": 2.955330491065979, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.803980975106427e-05, "perf/tokens_per_sec": 25474.452968285696, "train/loss_math": 2.3530641396840415, "train/loss_prose": 3.6935458183288574, "train/loss_code": 1.8092678785324097} +{"step": 1071, "train/loss": 2.8589690029621124, "train/lm_loss": 2.8589690029621124, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.803302948309264e-05, "perf/tokens_per_sec": 26411.998308881295, "train/loss_prose": 3.849399209022522, "train/loss_code": 1.8685385882854462} +{"step": 1072, "train/loss": 3.1297287344932556, "train/lm_loss": 3.1297287344932556, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.802623798910224e-05, "perf/tokens_per_sec": 25981.762998542108, "train/loss_prose": 3.688813066482544, "train/loss_code": 0.8712455630302429, "train/loss_math": 2.8612598180770874} +{"step": 1073, "train/loss": 2.5079398155212402, "train/lm_loss": 2.5079398155212402, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.801943527240318e-05, "perf/tokens_per_sec": 24526.062614743732, "train/loss_math": 2.689756393432617, "train/loss_code": 1.683184544245402, "train/loss_prose": 3.4723480939865112} +{"step": 1074, "train/loss": 2.3369520008563995, "train/lm_loss": 2.3369520008563995, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.8012621336311016e-05, "perf/tokens_per_sec": 24720.338869791314, "train/loss_math": 2.5972814559936523, "train/loss_code": 1.3899160325527191, "train/loss_prose": 3.512890021006266} +{"step": 1075, "train/loss": 2.5457702577114105, "train/lm_loss": 2.5457702577114105, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.800579618414676e-05, "perf/tokens_per_sec": 26427.234078162448, "train/loss_prose": 3.6492775678634644, "train/loss_math": 2.422510325908661, "train/loss_code": 1.6887824535369873} +{"step": 1076, "train/loss": 2.4943873584270477, "train/lm_loss": 2.4943873584270477, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.799895981923693e-05, "perf/tokens_per_sec": 24499.585990921678, "train/loss_code": 1.5469887256622314, "train/loss_math": 2.477203607559204, "train/loss_prose": 3.476153254508972} +{"step": 1077, "train/loss": 2.8004212379455566, "train/lm_loss": 2.8004212379455566, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.799211224491348e-05, "perf/tokens_per_sec": 24599.354203328527, "train/loss_math": 2.641326069831848, "train/loss_prose": 3.6186492443084717, "train/loss_code": 1.3230602741241455} +{"step": 1078, "train/loss": 1.9768143594264984, "train/lm_loss": 1.9768143594264984, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.7985253464513825e-05, "perf/tokens_per_sec": 24694.11574700269, "train/loss_math": 2.375738573074341, "train/loss_code": 1.3119404117266338} +{"step": 1079, "train/loss": 2.4568236470222473, "train/lm_loss": 2.4568236470222473, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.797838348138086e-05, "perf/tokens_per_sec": 24548.246221641773, "train/loss_math": 2.638038992881775, "train/loss_code": 1.1712995370229085, "train/loss_prose": 3.621537208557129} +{"step": 1080, "train/loss": 2.823656678199768, "train/lm_loss": 2.823656678199768, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.7971502298862936e-05, "perf/tokens_per_sec": 24767.738238478516, "train/loss_math": 2.358013939857483, "train/loss_prose": 3.5997277895609536} +{"step": 1081, "train/loss": 2.233737677335739, "train/lm_loss": 2.233737677335739, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.796460992031385e-05, "perf/tokens_per_sec": 25177.7244088026, "train/loss_code": 1.7557112872600555, "train/loss_math": 2.3972686926523843, "train/loss_prose": 3.6552507877349854} +{"step": 1082, "train/loss": 2.8893654346466064, "train/lm_loss": 2.8893654346466064, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.7957706349092865e-05, "perf/tokens_per_sec": 25283.773792388907, "train/loss_prose": 3.596801519393921, "train/loss_math": 2.3256683349609375, "train/loss_code": 2.134016235669454} +{"step": 1083, "train/loss": 2.274966776371002, "train/lm_loss": 2.274966776371002, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.79507915885647e-05, "perf/tokens_per_sec": 26268.993458695593, "train/loss_code": 1.4599756300449371, "train/loss_prose": 3.8116780519485474, "train/loss_math": 2.368237853050232} +{"step": 1084, "train/loss": 2.799028664827347, "train/lm_loss": 2.799028664827347, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.794386564209953e-05, "perf/tokens_per_sec": 25992.101221698576, "train/loss_code": 1.5158629417419434, "train/loss_math": 2.247979164123535, "train/loss_prose": 3.961483637491862} +{"step": 1085, "train/loss": 2.6724365949630737, "train/lm_loss": 2.6724365949630737, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.7936928513072964e-05, "perf/tokens_per_sec": 26025.924902970128, "train/loss_code": 1.5767156680425007, "train/loss_prose": 3.8053860664367676, "train/loss_math": 2.6165937185287476} +{"step": 1086, "train/loss": 2.073803663253784, "train/lm_loss": 2.073803663253784, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.792998020486609e-05, "perf/tokens_per_sec": 26162.383420415037, "train/loss_math": 2.7206717133522034, "train/loss_code": 1.42693530023098} +{"step": 1087, "train/loss": 2.923487961292267, "train/lm_loss": 2.923487961292267, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.7923020720865414e-05, "perf/tokens_per_sec": 24529.21423676155, "train/loss_code": 1.615619421005249, "train/loss_prose": 4.072435935338338, "train/loss_math": 2.6464521884918213} +{"step": 1088, "train/loss": 2.190777063369751, "train/lm_loss": 2.190777063369751, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.791605006446291e-05, "perf/tokens_per_sec": 25440.61493899685, "train/loss_math": 2.5679909586906433, "train/loss_prose": 2.9316041469573975, "train/loss_code": 1.4408825635910034} +{"step": 1089, "train/loss": 2.7660528421401978, "train/lm_loss": 2.7660528421401978, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.790906823905599e-05, "perf/tokens_per_sec": 25939.983034699144, "train/loss_math": 2.3891235987345376, "train/loss_prose": 3.2345898151397705, "train/loss_code": 2.6286404132843018} +{"step": 1090, "train/loss": 2.843266546726227, "train/lm_loss": 2.843266546726227, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.7902075248047515e-05, "perf/tokens_per_sec": 25894.784955588148, "train/loss_prose": 3.573540528615316, "train/loss_math": 2.4374905228614807, "train/loss_code": 2.275547981262207} +{"step": 1091, "train/loss": 2.523488074541092, "train/lm_loss": 2.523488074541092, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.789507109484579e-05, "perf/tokens_per_sec": 26054.5408254105, "train/loss_prose": 3.7596073150634766, "train/loss_math": 2.3604010740915933, "train/loss_code": 1.862495978673299} +{"step": 1092, "train/loss": 2.371007353067398, "train/lm_loss": 2.371007353067398, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.788805578286454e-05, "perf/tokens_per_sec": 26254.220396536493, "train/loss_code": 1.4719115694363911, "train/loss_math": 2.5526546637217202, "train/loss_prose": 3.447179913520813} +{"step": 1093, "train/loss": 2.820385754108429, "train/lm_loss": 2.820385754108429, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.788102931552294e-05, "perf/tokens_per_sec": 25815.678236225773, "train/loss_code": 2.2928056716918945, "train/loss_prose": 3.703120470046997, "train/loss_math": 2.287654161453247} +{"step": 1094, "train/loss": 2.5490314066410065, "train/lm_loss": 2.5490314066410065, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.7873991696245624e-05, "perf/tokens_per_sec": 25976.812998787336, "train/loss_math": 2.82983660697937, "train/loss_prose": 3.189744234085083, "train/loss_code": 1.721114953358968} +{"step": 1095, "train/loss": 2.7272360920906067, "train/lm_loss": 2.7272360920906067, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.7866942928462625e-05, "perf/tokens_per_sec": 25881.755308587293, "train/loss_prose": 3.5155738989512124, "train/loss_code": 2.174099922180176, "train/loss_math": 2.3076558907826743} +{"step": 1096, "train/loss": 2.363423705101013, "train/lm_loss": 2.363423705101013, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.785988301560944e-05, "perf/tokens_per_sec": 26073.204621579003, "train/loss_code": 1.7230719923973083, "train/loss_math": 2.4066566705703734, "train/loss_prose": 3.427961826324463} +{"step": 1097, "train/loss": 2.1916078627109528, "train/lm_loss": 2.1916078627109528, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.785281196112698e-05, "perf/tokens_per_sec": 23765.734452553937, "train/loss_code": 1.2075023800134659, "train/loss_prose": 3.551942507425944, "train/loss_math": 2.047024965286255} +{"step": 1098, "train/loss": 2.8260303139686584, "train/lm_loss": 2.8260303139686584, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.7845729768461576e-05, "perf/tokens_per_sec": 25209.349517526352, "train/loss_prose": 3.6677163441975913, "train/loss_math": 2.4880926609039307, "train/loss_code": 1.6527224779129028} +{"step": 1099, "train/loss": 2.9674819707870483, "train/lm_loss": 2.9674819707870483, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.783863644106502e-05, "perf/tokens_per_sec": 26554.803602701253, "train/loss_math": 2.408459281921387, "train/loss_prose": 3.899186293284098} +{"step": 1100, "train/loss": 2.5935812294483185, "train/lm_loss": 2.5935812294483185, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.783153198239452e-05, "perf/tokens_per_sec": 25804.317050054447, "train/loss_prose": 3.643243392308553, "train/loss_code": 1.389677385489146, "train/loss_math": 2.824943423271179} +{"step": 1101, "train/loss": 2.777979552745819, "train/lm_loss": 2.777979552745819, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.7824416395912686e-05, "perf/tokens_per_sec": 26013.19624668208, "train/loss_code": 1.8973808288574219, "train/loss_prose": 3.9167744318644204, "train/loss_math": 2.390684723854065} +{"step": 1102, "train/loss": 2.422442764043808, "train/lm_loss": 2.422442764043808, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.7817289685087577e-05, "perf/tokens_per_sec": 25974.142353898118, "train/loss_code": 1.9207911491394043, "train/loss_math": 2.5896599292755127} +{"step": 1103, "train/loss": 2.3883360624313354, "train/lm_loss": 2.3883360624313354, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.781015185339266e-05, "perf/tokens_per_sec": 26048.931097171742, "train/loss_math": 2.640651226043701, "train/loss_prose": 3.848391056060791, "train/loss_code": 1.5321509838104248} +{"step": 1104, "train/loss": 2.526701271533966, "train/lm_loss": 2.526701271533966, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.780300290430682e-05, "perf/tokens_per_sec": 25875.86896287884, "train/loss_math": 2.541167914867401, "train/loss_code": 1.391758680343628, "train/loss_prose": 3.6327110528945923} +{"step": 1105, "train/loss": 2.3014805912971497, "train/lm_loss": 2.3014805912971497, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.77958428413144e-05, "perf/tokens_per_sec": 25551.59317032542, "train/loss_code": 1.5268590847651164, "train/loss_prose": 3.2897424697875977, "train/loss_math": 2.417260726292928} +{"step": 1106, "train/loss": 2.3663850128650665, "train/lm_loss": 2.3663850128650665, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.7788671667905096e-05, "perf/tokens_per_sec": 26194.535064808333, "train/loss_math": 2.4073824882507324, "train/loss_code": 1.53475817044576, "train/loss_prose": 3.5523287057876587} +{"step": 1107, "train/loss": 2.6810246407985687, "train/lm_loss": 2.6810246407985687, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.778148938757406e-05, "perf/tokens_per_sec": 25976.891555480288, "train/loss_code": 1.4993716478347778, "train/loss_math": 3.046861410140991, "train/loss_prose": 3.618786414464315} +{"step": 1108, "train/loss": 2.240303725004196, "train/lm_loss": 2.240303725004196, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.777429600382185e-05, "perf/tokens_per_sec": 25578.794130828042, "train/loss_math": 2.5861103534698486, "train/loss_code": 1.740265917778015, "train/loss_prose": 4.0488786697387695} +{"step": 1109, "train/loss": 2.4728744328022003, "train/lm_loss": 2.4728744328022003, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.776709152015443e-05, "perf/tokens_per_sec": 26106.367060139317, "train/loss_code": 1.4515524208545685, "train/loss_prose": 3.8225385348002114, "train/loss_math": 2.5091702938079834} +{"step": 1110, "train/loss": 2.209965467453003, "train/lm_loss": 2.209965467453003, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.775987594008319e-05, "perf/tokens_per_sec": 24986.93802086818, "train/loss_code": 1.4134153574705124, "train/loss_prose": 3.5656808614730835, "train/loss_math": 2.4473506212234497} +{"step": 1111, "train/loss": 2.9188461005687714, "train/lm_loss": 2.9188461005687714, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.775264926712489e-05, "perf/tokens_per_sec": 25287.904817838855, "train/loss_math": 2.298393726348877, "train/loss_code": 1.3723349571228027, "train/loss_prose": 4.0023274421691895} +{"step": 1112, "train/loss": 2.3630168437957764, "train/lm_loss": 2.3630168437957764, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.774541150480175e-05, "perf/tokens_per_sec": 26028.685077783637, "train/loss_math": 2.4638355573018393, "train/loss_code": 1.3924375971158345, "train/loss_prose": 3.6676580905914307} +{"step": 1113, "train/loss": 2.23228320479393, "train/lm_loss": 2.23228320479393, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.773816265664136e-05, "perf/tokens_per_sec": 26506.08994242056, "train/loss_code": 1.776735043525696, "train/loss_math": 2.611992359161377, "train/loss_prose": 3.750605821609497} +{"step": 1114, "train/loss": 2.7620915174484253, "train/lm_loss": 2.7620915174484253, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.773090272617672e-05, "perf/tokens_per_sec": 26004.297528967465, "train/loss_math": 2.546539862950643, "train/loss_prose": 3.7230708599090576, "train/loss_code": 1.6439502239227295} +{"step": 1115, "train/loss": 2.059613883495331, "train/lm_loss": 2.059613883495331, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.772363171694622e-05, "perf/tokens_per_sec": 26858.653592645864, "train/loss_code": 1.4892616868019104, "train/loss_prose": 3.327223539352417, "train/loss_math": 2.376155376434326} +{"step": 1116, "train/loss": 2.1023791432380676, "train/lm_loss": 2.1023791432380676, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.7716349632493674e-05, "perf/tokens_per_sec": 26055.291606128347, "train/loss_code": 1.7460072040557861, "train/loss_math": 2.6963326930999756} +{"step": 1117, "train/loss": 2.864228904247284, "train/lm_loss": 2.864228904247284, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.770905647636828e-05, "perf/tokens_per_sec": 27142.02303455816, "train/loss_code": 2.4960174560546875, "train/loss_math": 2.6340613961219788, "train/loss_prose": 3.6927754878997803} +{"step": 1118, "train/loss": 3.3125401735305786, "train/lm_loss": 3.3125401735305786, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.770175225212463e-05, "perf/tokens_per_sec": 26109.501444544243, "train/loss_math": 2.65401562054952, "train/loss_prose": 3.7076550483703614} +{"step": 1119, "train/loss": 2.6869397461414337, "train/lm_loss": 2.6869397461414337, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.769443696332272e-05, "perf/tokens_per_sec": 25681.45955513035, "train/loss_prose": 3.5976030826568604, "train/loss_code": 1.7762763798236847} +{"step": 1120, "train/loss": 2.2890886068344116, "train/lm_loss": 2.2890886068344116, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.7687110613527926e-05, "perf/tokens_per_sec": 25892.755525613375, "train/loss_prose": 4.1361799240112305, "train/loss_code": 1.2668782025575638, "train/loss_math": 2.4864176511764526} +{"step": 1121, "train/loss": 2.416396141052246, "train/lm_loss": 2.416396141052246, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.767977320631103e-05, "perf/tokens_per_sec": 24252.266692264908, "train/loss_math": 2.5560187498728433, "train/loss_prose": 3.4295852184295654, "train/loss_code": 1.6013145446777344} +{"step": 1122, "train/loss": 1.8966206014156342, "train/lm_loss": 1.8966206014156342, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.7672424745248176e-05, "perf/tokens_per_sec": 26112.874724125562, "train/loss_code": 1.5774221022923787, "train/loss_math": 2.654917001724243, "train/loss_prose": 3.0535151958465576} +{"step": 1123, "train/loss": 2.608098655939102, "train/lm_loss": 2.608098655939102, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.7665065233920945e-05, "perf/tokens_per_sec": 25554.671771690028, "train/loss_prose": 3.657806098461151, "train/loss_math": 2.338087797164917, "train/loss_code": 0.7786944955587387} +{"step": 1124, "train/loss": 2.457025647163391, "train/lm_loss": 2.457025647163391, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.765769467591625e-05, "perf/tokens_per_sec": 26187.50761623272, "train/loss_math": 2.5238301753997803, "train/loss_prose": 3.5859471956888833, "train/loss_code": 1.283567746480306} +{"step": 1125, "train/loss": 2.689668297767639, "train/lm_loss": 2.689668297767639, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.7650313074826425e-05, "perf/tokens_per_sec": 25171.084363333743, "train/loss_prose": 3.858910322189331, "train/loss_code": 1.7180039882659912, "train/loss_math": 2.168202559153239} +{"step": 1126, "train/loss": 2.6082059741020203, "train/lm_loss": 2.6082059741020203, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.764292043424916e-05, "perf/tokens_per_sec": 25844.26109220665, "train/loss_prose": 3.4613051414489746, "train/loss_math": 2.625505268573761, "train/loss_code": 1.720508098602295} +{"step": 1127, "train/loss": 2.096144050359726, "train/lm_loss": 2.096144050359726, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.763551675778755e-05, "perf/tokens_per_sec": 25681.19082705126, "train/loss_code": 1.7163616180419923, "train/loss_prose": 3.8047196865081787, "train/loss_math": 2.191312313079834} +{"step": 1128, "train/loss": 2.547470808029175, "train/lm_loss": 2.547470808029175, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.7628102049050036e-05, "perf/tokens_per_sec": 25462.74849193575, "train/loss_prose": 3.2761785984039307, "train/loss_math": 2.57496178150177, "train/loss_code": 1.7637816071510315} +{"step": 1129, "train/loss": 2.424467593431473, "train/lm_loss": 2.424467593431473, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.762067631165049e-05, "perf/tokens_per_sec": 25594.41802647359, "train/loss_math": 2.0272185802459717, "train/loss_code": 1.1022768716017406, "train/loss_prose": 3.515422821044922} +{"step": 1130, "train/loss": 2.5713126957416534, "train/lm_loss": 2.5713126957416534, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.7613239549208106e-05, "perf/tokens_per_sec": 26470.966743090994, "train/loss_math": 2.5452144940694175, "train/loss_code": 1.917170524597168, "train/loss_prose": 3.5916736125946045} +{"step": 1131, "train/loss": 2.806174635887146, "train/lm_loss": 2.806174635887146, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.760579176534747e-05, "perf/tokens_per_sec": 25108.545131418952, "train/loss_prose": 3.8031859397888184, "train/loss_math": 2.2317111492156982, "train/loss_code": 1.386615812778473} +{"step": 1132, "train/loss": 2.9230917096138, "train/lm_loss": 2.9230917096138, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.7598332963698545e-05, "perf/tokens_per_sec": 25965.07411588854, "train/loss_prose": 3.9255133867263794, "train/loss_math": 2.518872022628784, "train/loss_code": 1.3224685788154602} +{"step": 1133, "train/loss": 2.693817436695099, "train/lm_loss": 2.693817436695099, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.7590863147896666e-05, "perf/tokens_per_sec": 26244.39427996169, "train/loss_prose": 3.3063478469848633, "train/loss_math": 2.489640712738037} +{"step": 1134, "train/loss": 2.7522092163562775, "train/lm_loss": 2.7522092163562775, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.758338232158252e-05, "perf/tokens_per_sec": 25763.33717836823, "train/loss_prose": 3.562635600566864, "train/loss_code": 1.7575266758600872, "train/loss_math": 2.494550943374634} +{"step": 1135, "train/loss": 2.114598125219345, "train/lm_loss": 2.114598125219345, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.7575890488402185e-05, "perf/tokens_per_sec": 26786.79108638572, "train/loss_code": 1.5749991834163666, "train/loss_math": 2.6541972756385803} +{"step": 1136, "train/loss": 2.3871668875217438, "train/lm_loss": 2.3871668875217438, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.7568387652007075e-05, "perf/tokens_per_sec": 26479.41230670113, "train/loss_code": 1.1939094364643097, "train/loss_math": 2.4231943786144257, "train/loss_prose": 3.508368968963623} +{"step": 1137, "train/loss": 2.8096070885658264, "train/lm_loss": 2.8096070885658264, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.7560873816053984e-05, "perf/tokens_per_sec": 25985.142653166793, "train/loss_math": 2.6189669370651245, "train/loss_prose": 3.5611321330070496, "train/loss_code": 1.4971972703933716} +{"step": 1138, "train/loss": 2.7650396823883057, "train/lm_loss": 2.7650396823883057, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.755334898420507e-05, "perf/tokens_per_sec": 26984.455051220593, "train/loss_prose": 3.5961636304855347, "train/loss_math": 2.3613890012105307, "train/loss_code": 0.6514952182769775} +{"step": 1139, "train/loss": 2.022792547941208, "train/lm_loss": 2.022792547941208, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.754581316012785e-05, "perf/tokens_per_sec": 26159.515152991695, "train/loss_math": 2.435069262981415, "train/loss_code": 1.6105157732963562} +{"step": 1140, "train/loss": 2.125617265701294, "train/lm_loss": 2.125617265701294, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.753826634749518e-05, "perf/tokens_per_sec": 23328.235641415944, "train/loss_prose": 3.4550925493240356, "train/loss_code": 1.6178887844085694, "train/loss_math": 2.0053091049194336} +{"step": 1141, "train/loss": 2.7381158471107483, "train/lm_loss": 2.7381158471107483, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.7530708549985287e-05, "perf/tokens_per_sec": 25148.82889297467, "train/loss_prose": 3.1033666133880615, "train/loss_math": 2.554448962211609, "train/loss_code": 2.3770318031311035} +{"step": 1142, "train/loss": 2.4620907604694366, "train/lm_loss": 2.4620907604694366, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.752313977128175e-05, "perf/tokens_per_sec": 25122.791406126616, "train/loss_math": 2.589942991733551, "train/loss_code": 1.3130465745925903, "train/loss_prose": 3.355430245399475} +{"step": 1143, "train/loss": 2.5564083456993103, "train/lm_loss": 2.5564083456993103, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.7515560015073514e-05, "perf/tokens_per_sec": 25256.37946602491, "train/loss_prose": 3.6769729455312095, "train/loss_code": 1.7018239200115204, "train/loss_math": 2.6130528450012207} +{"step": 1144, "train/loss": 2.3617983162403107, "train/lm_loss": 2.3617983162403107, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.7507969285054845e-05, "perf/tokens_per_sec": 26219.280908616412, "train/loss_code": 1.6259805560112, "train/loss_math": 2.3010406494140625, "train/loss_prose": 3.2191312313079834} +{"step": 1145, "train/loss": 2.546919524669647, "train/lm_loss": 2.546919524669647, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.750036758492537e-05, "perf/tokens_per_sec": 26025.688343796206, "train/loss_math": 2.5430266857147217, "train/loss_code": 1.5206229289372761, "train/loss_prose": 4.092203736305237} +{"step": 1146, "train/loss": 2.6772037148475647, "train/lm_loss": 2.6772037148475647, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.7492754918390074e-05, "perf/tokens_per_sec": 25520.772082023614, "train/loss_prose": 3.3382952213287354, "train/loss_math": 2.3466880917549133, "train/loss_code": 2.0159924030303955} +{"step": 1147, "train/loss": 2.740499436855316, "train/lm_loss": 2.740499436855316, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.7485131289159276e-05, "perf/tokens_per_sec": 25434.13675616057, "train/loss_code": 1.7020379304885864, "train/loss_math": 2.3227004210154214, "train/loss_prose": 3.8506064414978027} +{"step": 1148, "train/loss": 2.9810068607330322, "train/lm_loss": 2.9810068607330322, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.747749670094864e-05, "perf/tokens_per_sec": 25812.264951845787, "train/loss_prose": 3.5933085680007935, "train/loss_code": 2.3048575719197593, "train/loss_math": 2.5602481365203857} +{"step": 1149, "train/loss": 2.5784391164779663, "train/lm_loss": 2.5784391164779663, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.7469851157479177e-05, "perf/tokens_per_sec": 26736.51598444975, "train/loss_prose": 3.572529395421346, "train/loss_code": 1.9424364864826202, "train/loss_math": 2.1401782035827637} +{"step": 1150, "train/loss": 3.0377984642982483, "train/lm_loss": 3.0377984642982483, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.746219466247722e-05, "perf/tokens_per_sec": 26139.017396728796, "train/loss_code": 1.973948746919632, "train/loss_math": 1.91779363155365, "train/loss_prose": 3.6873395442962646} +{"step": 1151, "train/loss": 1.9564683437347412, "train/lm_loss": 1.9564683437347412, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.745452721967446e-05, "perf/tokens_per_sec": 26797.612816781235, "train/loss_code": 1.3659226149320602, "train/loss_math": 2.547014057636261} +{"step": 1152, "train/loss": 2.493207037448883, "train/lm_loss": 2.493207037448883, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.744684883280792e-05, "perf/tokens_per_sec": 25905.327496799506, "train/loss_math": 2.319539427757263, "train/loss_code": 1.7236473560333252, "train/loss_prose": 3.6101021766662598} +{"step": 1153, "train/loss": 2.9110174775123596, "train/lm_loss": 2.9110174775123596, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.743915950561994e-05, "perf/tokens_per_sec": 25830.194771375474, "train/loss_code": 1.871026913324992, "train/loss_prose": 3.7982015013694763, "train/loss_math": 2.4822521209716797} +{"step": 1154, "train/loss": 2.068804979324341, "train/lm_loss": 2.068804979324341, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.743145924185821e-05, "perf/tokens_per_sec": 24599.812112134758, "train/loss_code": 1.629266905784607, "train/loss_prose": 3.6501846313476562, "train/loss_math": 2.3769599199295044} +{"step": 1155, "train/loss": 2.6012703478336334, "train/lm_loss": 2.6012703478336334, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.742374804527575e-05, "perf/tokens_per_sec": 26520.98413051264, "train/loss_math": 2.5715742905934653, "train/loss_prose": 3.940317749977112, "train/loss_code": 1.7382680177688599} +{"step": 1156, "train/loss": 2.3584140241146088, "train/lm_loss": 2.3584140241146088, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.7416025919630904e-05, "perf/tokens_per_sec": 24966.566902334638, "train/loss_prose": 3.903789758682251, "train/loss_math": 2.492343783378601, "train/loss_code": 1.518761396408081} +{"step": 1157, "train/loss": 3.0623791217803955, "train/lm_loss": 3.0623791217803955, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.740829286868733e-05, "perf/tokens_per_sec": 25741.06387818001, "train/loss_math": 2.3875070810317993, "train/loss_prose": 3.7372512221336365} +{"step": 1158, "train/loss": 2.2963287234306335, "train/lm_loss": 2.2963287234306335, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.7400548896214024e-05, "perf/tokens_per_sec": 26190.66151589898, "train/loss_math": 2.493199904759725, "train/loss_prose": 3.6527758836746216, "train/loss_code": 1.1951594352722168} +{"step": 1159, "train/loss": 2.452520936727524, "train/lm_loss": 2.452520936727524, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.7392794005985326e-05, "perf/tokens_per_sec": 26151.113156941363, "train/loss_code": 1.7048738400141399, "train/loss_prose": 3.283900022506714, "train/loss_math": 2.326922655105591} +{"step": 1160, "train/loss": 2.2226142287254333, "train/lm_loss": 2.2226142287254333, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.738502820178085e-05, "perf/tokens_per_sec": 25216.52793654428, "train/loss_code": 1.5549002885818481, "train/loss_prose": 3.5858309268951416, "train/loss_math": 2.1948256492614746} +{"step": 1161, "train/loss": 2.5192057490348816, "train/lm_loss": 2.5192057490348816, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.737725148738557e-05, "perf/tokens_per_sec": 26227.686730948906, "train/loss_math": 2.4644604921340942, "train/loss_code": 1.7696218490600586, "train/loss_prose": 3.597262144088745} +{"step": 1162, "train/loss": 2.3350621461868286, "train/lm_loss": 2.3350621461868286, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.736946386658976e-05, "perf/tokens_per_sec": 26436.01786214389, "train/loss_code": 1.5427133242289226, "train/loss_prose": 3.67510724067688, "train/loss_math": 2.234047253926595} +{"step": 1163, "train/loss": 3.0344225764274597, "train/lm_loss": 3.0344225764274597, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.7361665343189e-05, "perf/tokens_per_sec": 25904.03850352527, "train/loss_math": 2.6578194300333657, "train/loss_prose": 3.7606464624404907, "train/loss_code": 1.2593348026275635} +{"step": 1164, "train/loss": 2.7858679592609406, "train/lm_loss": 2.7858679592609406, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.73538559209842e-05, "perf/tokens_per_sec": 25715.479824869963, "train/loss_math": 2.426203727722168, "train/loss_code": 1.3309463262557983, "train/loss_prose": 3.7503941853841147} +{"step": 1165, "train/loss": 2.9525870084762573, "train/lm_loss": 2.9525870084762573, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.73460356037816e-05, "perf/tokens_per_sec": 25042.29992099517, "train/loss_code": 2.292221963405609, "train/loss_prose": 3.741893212000529, "train/loss_math": 2.6035237312316895} +{"step": 1166, "train/loss": 2.7667880058288574, "train/lm_loss": 2.7667880058288574, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.7338204395392694e-05, "perf/tokens_per_sec": 26196.652024080366, "train/loss_prose": 3.789280891418457, "train/loss_math": 2.3356409072875977, "train/loss_code": 1.1529485285282135} +{"step": 1167, "train/loss": 2.4858046174049377, "train/lm_loss": 2.4858046174049377, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.733036229963435e-05, "perf/tokens_per_sec": 26211.96022709068, "train/loss_prose": 3.274289608001709, "train/loss_code": 2.1424148082733154, "train/loss_math": 2.239088535308838} +{"step": 1168, "train/loss": 2.353878527879715, "train/lm_loss": 2.353878527879715, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.7322509320328675e-05, "perf/tokens_per_sec": 26921.955796256596, "train/loss_code": 1.6299053132534027, "train/loss_math": 2.4701054096221924, "train/loss_prose": 3.685598611831665} +{"step": 1169, "train/loss": 3.120912790298462, "train/lm_loss": 3.120912790298462, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.731464546130314e-05, "perf/tokens_per_sec": 26075.183285042778, "train/loss_prose": 3.446321630477905, "train/loss_math": 2.5785648822784424} +{"step": 1170, "train/loss": 2.67535936832428, "train/lm_loss": 2.67535936832428, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.7306770726390496e-05, "perf/tokens_per_sec": 26317.80629763245, "train/loss_code": 2.2430264949798584, "train/loss_prose": 3.3097954591115317, "train/loss_math": 2.5013811588287354} +{"step": 1171, "train/loss": 2.366947501897812, "train/lm_loss": 2.366947501897812, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.7298885119428773e-05, "perf/tokens_per_sec": 26035.70644593231, "train/loss_prose": 3.966978152592977, "train/loss_code": 1.2338051795959473, "train/loss_math": 2.0994246006011963} +{"step": 1172, "train/loss": 2.5118389427661896, "train/lm_loss": 2.5118389427661896, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.7290988644261336e-05, "perf/tokens_per_sec": 26752.295193114736, "train/loss_code": 1.6363684833049774, "train/loss_math": 2.8534436225891113, "train/loss_prose": 3.921175479888916} +{"step": 1173, "train/loss": 2.1968813240528107, "train/lm_loss": 2.1968813240528107, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.728308130473683e-05, "perf/tokens_per_sec": 25585.1557072607, "train/loss_code": 1.7883650958538055, "train/loss_math": 2.2659929593404136, "train/loss_prose": 3.6236114501953125} +{"step": 1174, "train/loss": 2.7137452363967896, "train/lm_loss": 2.7137452363967896, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.72751631047092e-05, "perf/tokens_per_sec": 26056.516657035692, "train/loss_math": 2.526171064376831, "train/loss_prose": 3.658815860748291, "train/loss_code": 1.7614758014678955} +{"step": 1175, "train/loss": 2.1633895933628082, "train/lm_loss": 2.1633895933628082, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.7267234048037664e-05, "perf/tokens_per_sec": 26094.03521050817, "train/loss_code": 1.5689324140548706, "train/loss_math": 2.3180058002471924, "train/loss_prose": 3.5722243785858154} +{"step": 1176, "train/loss": 2.620337724685669, "train/lm_loss": 2.620337724685669, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.725929413858677e-05, "perf/tokens_per_sec": 26164.216253919712, "train/loss_math": 2.156531810760498, "train/loss_code": 2.1683236062526703, "train/loss_prose": 3.377624829610189} +{"step": 1177, "train/loss": 2.5894158482551575, "train/lm_loss": 2.5894158482551575, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.725134338022631e-05, "perf/tokens_per_sec": 25652.622595981557, "train/loss_prose": 4.003496050834656, "train/loss_math": 2.354948580265045, "train/loss_code": 1.6442701816558838} +{"step": 1178, "train/loss": 2.679350256919861, "train/lm_loss": 2.679350256919861, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.724338177683141e-05, "perf/tokens_per_sec": 25761.560076775433, "train/loss_prose": 4.133474826812744, "train/loss_math": 2.606791543960571, "train/loss_code": 2.1336851119995117} +{"step": 1179, "train/loss": 2.9574663043022156, "train/lm_loss": 2.9574663043022156, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.723540933228244e-05, "perf/tokens_per_sec": 25473.96191925078, "train/loss_prose": 3.482248902320862, "train/loss_math": 2.5741496086120605, "train/loss_code": 2.0082859992980957} +{"step": 1180, "train/loss": 2.379695475101471, "train/lm_loss": 2.379695475101471, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.7227426050465084e-05, "perf/tokens_per_sec": 26407.613665045, "train/loss_code": 1.9594771265983582, "train/loss_math": 2.345847487449646, "train/loss_prose": 3.253979802131653} +{"step": 1181, "train/loss": 3.061996042728424, "train/lm_loss": 3.061996042728424, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.721943193527029e-05, "perf/tokens_per_sec": 26118.23407771374, "train/loss_math": 2.4123430252075195, "train/loss_code": 2.666625499725342, "train/loss_prose": 4.059990008672078} +{"step": 1182, "train/loss": 2.7361254394054413, "train/lm_loss": 2.7361254394054413, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.7211426990594296e-05, "perf/tokens_per_sec": 26238.54221337076, "train/loss_math": 2.508734862009684, "train/loss_prose": 3.2558034658432007, "train/loss_code": 1.3395851850509644} +{"step": 1183, "train/loss": 2.786317229270935, "train/lm_loss": 2.786317229270935, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.720341122033862e-05, "perf/tokens_per_sec": 26079.062336719995, "train/loss_math": 2.313273032506307, "train/loss_code": 1.6349863409996033, "train/loss_prose": 4.026915470759074} +{"step": 1184, "train/loss": 3.2186315059661865, "train/lm_loss": 3.2186315059661865, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.719538462841003e-05, "perf/tokens_per_sec": 25933.091486545032, "train/loss_code": 1.8330553372701008, "train/loss_prose": 4.0499773025512695} +{"step": 1185, "train/loss": 2.9426506757736206, "train/lm_loss": 2.9426506757736206, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.718734721872062e-05, "perf/tokens_per_sec": 26369.633836887686, "train/loss_code": 2.114952484766642, "train/loss_prose": 3.668460190296173, "train/loss_math": 2.5225069522857666} +{"step": 1186, "train/loss": 2.7793341875076294, "train/lm_loss": 2.7793341875076294, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.71792989951877e-05, "perf/tokens_per_sec": 26784.368884994714, "train/loss_math": 2.5833088397979735, "train/loss_prose": 3.6271634101867676, "train/loss_code": 2.0638017654418945} +{"step": 1187, "train/loss": 2.337754398584366, "train/lm_loss": 2.337754398584366, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.71712399617339e-05, "perf/tokens_per_sec": 26217.840451475262, "train/loss_math": 2.450956344604492, "train/loss_code": 1.941798746585846, "train/loss_prose": 3.5819714069366455} +{"step": 1188, "train/loss": 2.8027385473251343, "train/lm_loss": 2.8027385473251343, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.716317012228707e-05, "perf/tokens_per_sec": 26267.868837018712, "train/loss_code": 2.002288579940796, "train/loss_math": 2.6702768007914224, "train/loss_prose": 3.4688335259755454} +{"step": 1189, "train/loss": 3.021894156932831, "train/lm_loss": 3.021894156932831, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.715508948078037e-05, "perf/tokens_per_sec": 26115.851871219595, "train/loss_prose": 3.3042561610539756, "train/loss_math": 2.73880672454834, "train/loss_code": 1.6108089685440063} +{"step": 1190, "train/loss": 2.6303971111774445, "train/lm_loss": 2.6303971111774445, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.7146998041152204e-05, "perf/tokens_per_sec": 25883.510035586332, "train/loss_math": 2.504183371861776, "train/loss_prose": 3.5970661640167236, "train/loss_code": 1.3697139620780945} +{"step": 1191, "train/loss": 2.596243381500244, "train/lm_loss": 2.596243381500244, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.713889580734623e-05, "perf/tokens_per_sec": 25886.903182546248, "train/loss_prose": 3.4865439732869468, "train/loss_code": 1.8956201076507568, "train/loss_math": 2.311727285385132} +{"step": 1192, "train/loss": 2.6594433188438416, "train/lm_loss": 2.6594433188438416, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.713078278331138e-05, "perf/tokens_per_sec": 26139.176478822235, "train/loss_code": 1.523490071296692, "train/loss_math": 2.409060557683309, "train/loss_prose": 3.667128245035807} +{"step": 1193, "train/loss": 2.1856305301189423, "train/lm_loss": 2.1856305301189423, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.712265897300186e-05, "perf/tokens_per_sec": 26224.483877467534, "train/loss_code": 1.6806633174419403, "train/loss_prose": 3.398033857345581, "train/loss_math": 2.4547855854034424} +{"step": 1194, "train/loss": 2.8361275792121887, "train/lm_loss": 2.8361275792121887, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.71145243803771e-05, "perf/tokens_per_sec": 26127.13148982053, "train/loss_math": 2.2904573678970337, "train/loss_prose": 3.6610350012779236, "train/loss_code": 1.731982707977295} +{"step": 1195, "train/loss": 2.555662453174591, "train/lm_loss": 2.555662453174591, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.710637900940181e-05, "perf/tokens_per_sec": 26316.99999846815, "train/loss_code": 1.8064124584197998, "train/loss_math": 2.502223173777262, "train/loss_prose": 3.6255483627319336} +{"step": 1196, "train/loss": 3.077684700489044, "train/lm_loss": 3.077684700489044, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.7098222864045945e-05, "perf/tokens_per_sec": 25432.14123679902, "train/loss_math": 2.6002917289733887, "train/loss_prose": 3.6087160110473633, "train/loss_code": 2.3857383728027344} +{"step": 1197, "train/loss": 2.502057582139969, "train/lm_loss": 2.502057582139969, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.7090055948284706e-05, "perf/tokens_per_sec": 26287.24184446648, "train/loss_prose": 3.5679057439168296, "train/loss_code": 1.810417652130127, "train/loss_math": 2.0710713863372803} +{"step": 1198, "train/loss": 2.4821805357933044, "train/lm_loss": 2.4821805357933044, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.7081878266098545e-05, "perf/tokens_per_sec": 27068.851493603794, "train/loss_math": 2.4508947134017944, "train/loss_code": 1.646233081817627, "train/loss_prose": 3.3806995153427124} +{"step": 1199, "train/loss": 2.6838679909706116, "train/lm_loss": 2.6838679909706116, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.707368982147318e-05, "perf/tokens_per_sec": 27049.58737886243, "train/loss_math": 2.4660649640219554, "train/loss_prose": 4.208489418029785} +{"step": 1200, "train/loss": 2.201565831899643, "train/lm_loss": 2.201565831899643, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.706549061839954e-05, "perf/tokens_per_sec": 26595.089289280302, "train/loss_code": 1.4754213591416676, "train/loss_prose": 4.379999041557312} +{"step": 1200, "eval/loss": 2.271929902796457, "eval/lm_loss": 2.271929902796457, "eval/lb_loss": 0.0, "eval/z_loss": 0.0, "eval/perplexity": 9.698099153957626, "eval/loss_code": 1.6342403871011517, "eval/ppl_code": 5.125563074072661, "eval/loss_prose": 3.489857256412506, "eval/ppl_prose": 32.781268056439856, "eval/loss_math": 2.2770099369520995, "eval/ppl_math": 9.747491179271767} +{"step": 1201, "train/loss": 2.9948887825012207, "train/lm_loss": 2.9948887825012207, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.7057280660873835e-05, "perf/tokens_per_sec": 25363.13132828232, "train/loss_prose": 3.5421046018600464, "train/loss_code": 2.6437289714813232, "train/loss_math": 2.251617670059204} +{"step": 1202, "train/loss": 2.5539679527282715, "train/lm_loss": 2.5539679527282715, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.704905995289749e-05, "perf/tokens_per_sec": 23206.53730161839, "train/loss_code": 1.7620235284169514, "train/loss_prose": 3.3836721579233804, "train/loss_math": 2.4973278045654297} +{"step": 1203, "train/loss": 2.724811851978302, "train/lm_loss": 2.724811851978302, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.704082849847718e-05, "perf/tokens_per_sec": 23734.67414171836, "train/loss_code": 2.155448704957962, "train/loss_math": 2.362919330596924, "train/loss_prose": 3.6045939127604165} +{"step": 1204, "train/loss": 3.0433835983276367, "train/lm_loss": 3.0433835983276367, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.70325863016248e-05, "perf/tokens_per_sec": 25264.996513168633, "train/loss_prose": 4.166986385981242, "train/loss_math": 2.3870725631713867, "train/loss_code": 2.342445969581604} +{"step": 1205, "train/loss": 2.0751178562641144, "train/lm_loss": 2.0751178562641144, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.702433336635753e-05, "perf/tokens_per_sec": 24946.989688595888, "train/loss_code": 1.743270218372345, "train/loss_math": 2.4231021404266357, "train/loss_prose": 3.0383870601654053} +{"step": 1206, "train/loss": 2.5882922410964966, "train/lm_loss": 2.5882922410964966, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.701606969669773e-05, "perf/tokens_per_sec": 24248.775108541617, "train/loss_math": 2.5721211433410645, "train/loss_code": 1.407820224761963, "train/loss_prose": 3.3914448420206704} +{"step": 1207, "train/loss": 2.6210156083106995, "train/lm_loss": 2.6210156083106995, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.7007795296673006e-05, "perf/tokens_per_sec": 25987.776249290928, "train/loss_code": 1.632802963256836, "train/loss_prose": 3.5015345215797424, "train/loss_math": 2.06357741355896} +{"step": 1208, "train/loss": 2.0001959800720215, "train/lm_loss": 2.0001959800720215, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.699951017031621e-05, "perf/tokens_per_sec": 26930.860499274993, "train/loss_code": 1.5816696137189865, "train/loss_math": 2.4187222719192505} +{"step": 1209, "train/loss": 2.6494545340538025, "train/lm_loss": 2.6494545340538025, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.6991214321665414e-05, "perf/tokens_per_sec": 24597.13534827117, "train/loss_prose": 3.4121605157852173, "train/loss_math": 2.3622024059295654, "train/loss_code": 1.411294400691986} +{"step": 1210, "train/loss": 2.7759746313095093, "train/lm_loss": 2.7759746313095093, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.6982907754763906e-05, "perf/tokens_per_sec": 25522.857369521196, "train/loss_math": 2.4933987617492677, "train/loss_prose": 3.2469343344370523} +{"step": 1211, "train/loss": 2.8476672172546387, "train/lm_loss": 2.8476672172546387, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.6974590473660216e-05, "perf/tokens_per_sec": 25592.81665244005, "train/loss_code": 1.4787784218788147, "train/loss_prose": 3.492445182800293, "train/loss_math": 2.3615541458129883} +{"step": 1212, "train/loss": 2.9194246530532837, "train/lm_loss": 2.9194246530532837, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.696626248240807e-05, "perf/tokens_per_sec": 25443.817585099703, "train/loss_math": 2.1892160177230835, "train/loss_code": 1.616339921951294, "train/loss_prose": 3.4721248626708983} +{"step": 1213, "train/loss": 2.729723781347275, "train/lm_loss": 2.729723781347275, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.6957923785066445e-05, "perf/tokens_per_sec": 25858.30382082678, "train/loss_prose": 3.371419811248779, "train/loss_math": 2.1989948749542236, "train/loss_code": 0.5827016830444336} +{"step": 1214, "train/loss": 2.195779949426651, "train/lm_loss": 2.195779949426651, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.694957438569951e-05, "perf/tokens_per_sec": 25630.234334929144, "train/loss_prose": 3.1220740477244058, "train/loss_code": 1.1346747875213623, "train/loss_math": 2.3979963064193726} +{"step": 1215, "train/loss": 2.778155654668808, "train/lm_loss": 2.778155654668808, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.694121428837668e-05, "perf/tokens_per_sec": 25350.294428647525, "train/loss_math": 2.3625581860542297, "train/loss_code": 1.5082314014434814, "train/loss_prose": 3.7555935382843018} +{"step": 1216, "train/loss": 2.632818341255188, "train/lm_loss": 2.632818341255188, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.693284349717254e-05, "perf/tokens_per_sec": 25896.541449855595, "train/loss_math": 2.444054126739502, "train/loss_code": 1.2752192616462708, "train/loss_prose": 3.726648489634196} +{"step": 1217, "train/loss": 2.207373321056366, "train/lm_loss": 2.207373321056366, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.692446201616692e-05, "perf/tokens_per_sec": 25919.084744586446, "train/loss_code": 1.7696640491485596, "train/loss_prose": 3.2591549158096313, "train/loss_math": 2.2923552989959717} +{"step": 1218, "train/loss": 2.0894100069999695, "train/lm_loss": 2.0894100069999695, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.691606984944486e-05, "perf/tokens_per_sec": 25616.514552915356, "train/loss_math": 2.440398073196411, "train/loss_code": 1.5044296979904175} +{"step": 1219, "train/loss": 2.9928227961063385, "train/lm_loss": 2.9928227961063385, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.690766700109659e-05, "perf/tokens_per_sec": 25988.95564447201, "train/loss_prose": 3.7557275772094725, "train/loss_code": 1.7213145891825359} +{"step": 1220, "train/loss": 2.4064472913742065, "train/lm_loss": 2.4064472913742065, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.689925347521757e-05, "perf/tokens_per_sec": 26384.537106033473, "train/loss_prose": 3.4546252886454263, "train/loss_math": 2.23993456363678, "train/loss_code": 1.4692779580752056} +{"step": 1221, "train/loss": 2.257205441594124, "train/lm_loss": 2.257205441594124, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.6890829275908434e-05, "perf/tokens_per_sec": 26335.7996133912, "train/loss_math": 2.3691614468892417, "train/loss_prose": 3.7243324518203735, "train/loss_code": 1.1671645442644756} +{"step": 1222, "train/loss": 2.3960080444812775, "train/lm_loss": 2.3960080444812775, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.6882394407275044e-05, "perf/tokens_per_sec": 26303.904558051232, "train/loss_code": 1.5817219376564027, "train/loss_prose": 3.7531514167785645} +{"step": 1223, "train/loss": 3.253305435180664, "train/lm_loss": 3.253305435180664, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.687394887342845e-05, "perf/tokens_per_sec": 26109.89825573001, "train/loss_prose": 3.6112986405690513, "train/loss_code": 2.1793261766433716} +{"step": 1224, "train/loss": 3.3697049617767334, "train/lm_loss": 3.3697049617767334, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.6865492678484895e-05, "perf/tokens_per_sec": 25960.13047255957, "train/loss_prose": 3.6050346919468472, "train/loss_math": 1.7223966121673584} +{"step": 1225, "train/loss": 2.9178916215896606, "train/lm_loss": 2.9178916215896606, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.685702582656584e-05, "perf/tokens_per_sec": 26291.26472046572, "train/loss_math": 2.5661826133728027, "train/loss_code": 1.2716725170612335, "train/loss_prose": 3.646721029281616} +{"step": 1226, "train/loss": 2.443207174539566, "train/lm_loss": 2.443207174539566, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.684854832179792e-05, "perf/tokens_per_sec": 25873.68662781066, "train/loss_code": 1.8559591472148895, "train/loss_math": 2.5776916980743407, "train/loss_prose": 2.9452812671661377} +{"step": 1227, "train/loss": 2.3360194265842438, "train/lm_loss": 2.3360194265842438, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.684006016831297e-05, "perf/tokens_per_sec": 26070.474559089984, "train/loss_code": 1.507499098777771, "train/loss_math": 2.491716146469116, "train/loss_prose": 3.3452541828155518} +{"step": 1228, "train/loss": 2.8782166242599487, "train/lm_loss": 2.8782166242599487, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.683156137024801e-05, "perf/tokens_per_sec": 25974.417247870097, "train/loss_prose": 3.533023500442505, "train/loss_code": 1.7868717114130657} +{"step": 1229, "train/loss": 3.1814464330673218, "train/lm_loss": 3.1814464330673218, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.682305193174524e-05, "perf/tokens_per_sec": 25912.08704596942, "train/loss_prose": 3.594951550165812, "train/loss_math": 2.495389461517334, "train/loss_code": 1.386472225189209} +{"step": 1230, "train/loss": 2.076666682958603, "train/lm_loss": 2.076666682958603, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.6814531856952084e-05, "perf/tokens_per_sec": 26841.867983251046, "train/loss_code": 1.6272670825322468, "train/loss_prose": 3.424865484237671} +{"step": 1231, "train/loss": 3.068697929382324, "train/lm_loss": 3.068697929382324, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.68060011500211e-05, "perf/tokens_per_sec": 26054.659366857857, "train/loss_prose": 3.5453972816467285, "train/loss_math": 2.633008122444153, "train/loss_code": 1.5565811395645142} +{"step": 1232, "train/loss": 2.149964779615402, "train/lm_loss": 2.149964779615402, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.679745981511005e-05, "perf/tokens_per_sec": 26126.456027227687, "train/loss_prose": 3.230021834373474, "train/loss_code": 1.6203367710113525, "train/loss_math": 2.637991428375244} +{"step": 1233, "train/loss": 2.920637786388397, "train/lm_loss": 2.920637786388397, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.6788907856381895e-05, "perf/tokens_per_sec": 26227.446489800528, "train/loss_math": 2.495718479156494, "train/loss_prose": 3.8153682947158813, "train/loss_code": 1.5560956597328186} +{"step": 1234, "train/loss": 2.830962896347046, "train/lm_loss": 2.830962896347046, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.678034527800474e-05, "perf/tokens_per_sec": 26163.18027299461, "train/loss_code": 1.9045198361078899, "train/loss_math": 2.644527792930603, "train/loss_prose": 3.881695826848348} +{"step": 1235, "train/loss": 2.6640204191207886, "train/lm_loss": 2.6640204191207886, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.6771772084151885e-05, "perf/tokens_per_sec": 26073.48161638605, "train/loss_math": 2.362268090248108, "train/loss_prose": 3.3874641259511313, "train/loss_code": 1.7006981372833252} +{"step": 1236, "train/loss": 2.90623140335083, "train/lm_loss": 2.90623140335083, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.6763188279001804e-05, "perf/tokens_per_sec": 26271.12247054416, "train/loss_prose": 3.739015519618988, "train/loss_code": 1.7066781520843506, "train/loss_math": 2.4402164220809937} +{"step": 1237, "train/loss": 2.7840658724308014, "train/lm_loss": 2.7840658724308014, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.675459386673815e-05, "perf/tokens_per_sec": 26001.778644174, "train/loss_math": 2.487054944038391, "train/loss_prose": 3.713068882624308, "train/loss_code": 1.1851000785827637} +{"step": 1238, "train/loss": 2.3192304968833923, "train/lm_loss": 2.3192304968833923, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.67459888515497e-05, "perf/tokens_per_sec": 26491.825226639445, "train/loss_code": 1.816431999206543, "train/loss_math": 2.4428277015686035, "train/loss_prose": 2.574834644794464} +{"step": 1239, "train/loss": 2.8638042509555817, "train/lm_loss": 2.8638042509555817, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.6737373237630476e-05, "perf/tokens_per_sec": 26207.721701775823, "train/loss_prose": 3.528158485889435, "train/loss_math": 2.3841554323832193, "train/loss_code": 1.6453324556350708} +{"step": 1240, "train/loss": 2.7952977418899536, "train/lm_loss": 2.7952977418899536, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.67287470291796e-05, "perf/tokens_per_sec": 25935.5579267096, "train/loss_math": 2.629643678665161, "train/loss_prose": 3.6163021326065063, "train/loss_code": 1.9815597534179688} +{"step": 1241, "train/loss": 3.514610707759857, "train/lm_loss": 3.514610707759857, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.672011023040138e-05, "perf/tokens_per_sec": 25991.354078162254, "train/loss_prose": 4.077211999893189, "train/loss_math": 2.717531204223633, "train/loss_code": 2.295762777328491} +{"step": 1242, "train/loss": 2.812728524208069, "train/lm_loss": 2.812728524208069, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.6711462845505304e-05, "perf/tokens_per_sec": 26534.665509305738, "train/loss_code": 2.139218012491862, "train/loss_prose": 3.651968797047933, "train/loss_math": 2.564134120941162} +{"step": 1243, "train/loss": 2.238858252763748, "train/lm_loss": 2.238858252763748, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.670280487870598e-05, "perf/tokens_per_sec": 26429.59211352196, "train/loss_math": 2.264366865158081, "train/loss_code": 1.679754416147868, "train/loss_prose": 3.8141353130340576} +{"step": 1244, "train/loss": 2.978530466556549, "train/lm_loss": 2.978530466556549, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.669413633422322e-05, "perf/tokens_per_sec": 27208.43623785866, "train/loss_prose": 3.5846755027771, "train/loss_code": 1.8292752504348755, "train/loss_math": 2.2463154792785645} +{"step": 1245, "train/loss": 2.331009566783905, "train/lm_loss": 2.331009566783905, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.6685457216281936e-05, "perf/tokens_per_sec": 26446.761530906617, "train/loss_prose": 3.425114393234253, "train/loss_code": 1.3828036387761433, "train/loss_math": 2.112161338329315} +{"step": 1246, "train/loss": 2.4052703380584717, "train/lm_loss": 2.4052703380584717, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.667676752911225e-05, "perf/tokens_per_sec": 27445.15190639822, "train/loss_math": 2.369420111179352, "train/loss_prose": 3.95379900932312, "train/loss_code": 1.6489309072494507} +{"step": 1247, "train/loss": 2.686492383480072, "train/lm_loss": 2.686492383480072, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.6668067276949414e-05, "perf/tokens_per_sec": 26338.060602529884, "train/loss_prose": 3.2240188121795654, "train/loss_math": 2.3639766216278075} +{"step": 1248, "train/loss": 2.737970232963562, "train/lm_loss": 2.737970232963562, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.6659356464033795e-05, "perf/tokens_per_sec": 24836.16274605047, "train/loss_math": 2.3434625466664634, "train/loss_prose": 3.76694663365682, "train/loss_code": 1.7862679958343506} +{"step": 1249, "train/loss": 2.281747281551361, "train/lm_loss": 2.281747281551361, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.665063509461097e-05, "perf/tokens_per_sec": 26631.03823706765, "train/loss_prose": 3.6172508001327515, "train/loss_code": 1.5744247734546661, "train/loss_math": 2.360888957977295} +{"step": 1250, "train/loss": 2.5208308696746826, "train/lm_loss": 2.5208308696746826, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.664190317293161e-05, "perf/tokens_per_sec": 26091.38003493052, "train/loss_code": 2.0224862337112426, "train/loss_math": 2.7116925716400146, "train/loss_prose": 3.671261429786682} +{"step": 1251, "train/loss": 2.8891185522079468, "train/lm_loss": 2.8891185522079468, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.6633160703251554e-05, "perf/tokens_per_sec": 25969.82335548394, "train/loss_prose": 3.4878389835357666, "train/loss_math": 2.693627953529358, "train/loss_code": 1.8871681094169617} +{"step": 1252, "train/loss": 2.5815833806991577, "train/lm_loss": 2.5815833806991577, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.662440768983177e-05, "perf/tokens_per_sec": 26106.8827931842, "train/loss_prose": 3.1712735493977866, "train/loss_code": 1.281867265701294, "train/loss_math": 2.4642446637153625} +{"step": 1253, "train/loss": 3.0203532576560974, "train/lm_loss": 3.0203532576560974, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.6615644136938375e-05, "perf/tokens_per_sec": 25945.78103587583, "train/loss_code": 2.0088138580322266, "train/loss_prose": 3.6670198917388914, "train/loss_math": 1.8100990056991577} +{"step": 1254, "train/loss": 2.539487510919571, "train/lm_loss": 2.539487510919571, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.6606870048842624e-05, "perf/tokens_per_sec": 25799.511913968934, "train/loss_prose": 3.4358327984809875, "train/loss_code": 1.3990343014399211, "train/loss_math": 2.375466823577881} +{"step": 1255, "train/loss": 2.4420344531536102, "train/lm_loss": 2.4420344531536102, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.659808542982088e-05, "perf/tokens_per_sec": 24516.227760058795, "train/loss_math": 2.502978722254435, "train/loss_code": 1.5395971536636353, "train/loss_prose": 3.7042739391326904} +{"step": 1256, "train/loss": 2.636044144630432, "train/lm_loss": 2.636044144630432, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.658929028415468e-05, "perf/tokens_per_sec": 24876.044800389216, "train/loss_prose": 3.680183172225952, "train/loss_code": 1.7528061469395955, "train/loss_math": 2.3946924209594727} +{"step": 1257, "train/loss": 3.0098853707313538, "train/lm_loss": 3.0098853707313538, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.658048461613068e-05, "perf/tokens_per_sec": 25150.15427497321, "train/loss_prose": 3.6768887042999268, "train/loss_code": 1.4119144082069397, "train/loss_math": 2.8708109855651855} +{"step": 1258, "train/loss": 2.6398420333862305, "train/lm_loss": 2.6398420333862305, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.6571668430040625e-05, "perf/tokens_per_sec": 25416.902789223033, "train/loss_prose": 3.38519024848938, "train/loss_math": 2.2974835634231567, "train/loss_code": 1.7732300758361816} +{"step": 1259, "train/loss": 2.6738677620887756, "train/lm_loss": 2.6738677620887756, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.656284173018144e-05, "perf/tokens_per_sec": 25178.90522810679, "train/loss_prose": 4.042138338088989, "train/loss_code": 1.7082664370536804, "train/loss_math": 2.4314610958099365} +{"step": 1260, "train/loss": 2.7016115188598633, "train/lm_loss": 2.7016115188598633, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.655400452085514e-05, "perf/tokens_per_sec": 24806.361889745134, "train/loss_prose": 3.9471662044525146, "train/loss_code": 1.9005889693895976, "train/loss_math": 2.0348130464553833} +{"step": 1261, "train/loss": 2.2544497549533844, "train/lm_loss": 2.2544497549533844, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.654515680636888e-05, "perf/tokens_per_sec": 26257.470649640447, "train/loss_math": 2.3020814657211304, "train/loss_code": 1.5242435534795125, "train/loss_prose": 4.254540920257568} +{"step": 1262, "train/loss": 2.578834056854248, "train/lm_loss": 2.578834056854248, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.653629859103492e-05, "perf/tokens_per_sec": 26230.249577075992, "train/loss_math": 2.3649977445602417, "train/loss_prose": 3.8372941811879477, "train/loss_code": 1.4629313548405964} +{"step": 1263, "train/loss": 2.4028452932834625, "train/lm_loss": 2.4028452932834625, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.652742987917066e-05, "perf/tokens_per_sec": 26468.764438528215, "train/loss_math": 2.4689879417419434, "train/loss_code": 1.631291925907135, "train/loss_prose": 3.6152398586273193} +{"step": 1264, "train/loss": 2.9642155468463898, "train/lm_loss": 2.9642155468463898, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.65185506750986e-05, "perf/tokens_per_sec": 25114.19764643969, "train/loss_prose": 3.909172534942627, "train/loss_code": 1.6259069442749023, "train/loss_math": 2.4126099348068237} +{"step": 1265, "train/loss": 2.7866867184638977, "train/lm_loss": 2.7866867184638977, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.6509660983146334e-05, "perf/tokens_per_sec": 25946.408002066062, "train/loss_math": 2.505202889442444, "train/loss_prose": 3.5074098110198975, "train/loss_code": 1.750451683998108} +{"step": 1266, "train/loss": 2.7942678332328796, "train/lm_loss": 2.7942678332328796, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.650076080764662e-05, "perf/tokens_per_sec": 25497.591489260652, "train/loss_code": 1.8805243968963623, "train/loss_prose": 3.875898996988932, "train/loss_math": 2.5424362421035767} +{"step": 1267, "train/loss": 2.942234218120575, "train/lm_loss": 2.942234218120575, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.649185015293728e-05, "perf/tokens_per_sec": 26951.90066313476, "train/loss_prose": 3.7803235054016113, "train/loss_math": 2.33325457572937, "train/loss_code": 1.875035583972931} +{"step": 1268, "train/loss": 2.108006238937378, "train/lm_loss": 2.108006238937378, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.648292902336126e-05, "perf/tokens_per_sec": 25031.171453174014, "train/loss_math": 2.505874514579773, "train/loss_prose": 3.401018977165222, "train/loss_code": 1.2625657767057419} +{"step": 1269, "train/loss": 2.4521965980529785, "train/lm_loss": 2.4521965980529785, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.6473997423266614e-05, "perf/tokens_per_sec": 25630.61671194578, "train/loss_prose": 3.7401441733042398, "train/loss_code": 1.4941194653511047, "train/loss_math": 2.4206628799438477} +{"step": 1270, "train/loss": 2.3056859970092773, "train/lm_loss": 2.3056859970092773, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.646505535700649e-05, "perf/tokens_per_sec": 26538.80547278207, "train/loss_math": 2.3535726964473724, "train/loss_prose": 3.4779003858566284, "train/loss_code": 1.037698119878769} +{"step": 1271, "train/loss": 2.4910850524902344, "train/lm_loss": 2.4910850524902344, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.645610282893915e-05, "perf/tokens_per_sec": 25167.581306702567, "train/loss_code": 1.6268555521965027, "train/loss_prose": 4.141799449920654, "train/loss_math": 2.568829298019409} +{"step": 1272, "train/loss": 3.0955832600593567, "train/lm_loss": 3.0955832600593567, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.644713984342793e-05, "perf/tokens_per_sec": 26287.92564653029, "train/loss_prose": 3.7897626161575317, "train/loss_math": 2.4014036655426025} +{"step": 1273, "train/loss": 2.7661861181259155, "train/lm_loss": 2.7661861181259155, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.643816640484131e-05, "perf/tokens_per_sec": 25882.964095023442, "train/loss_prose": 3.8406758308410645, "train/loss_code": 0.992303729057312, "train/loss_math": 2.403789460659027} +{"step": 1274, "train/loss": 2.2568992376327515, "train/lm_loss": 2.2568992376327515, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.642918251755281e-05, "perf/tokens_per_sec": 26048.062200456374, "train/loss_code": 1.3883976936340332, "train/loss_math": 2.1989593505859375, "train/loss_prose": 4.051842093467712} +{"step": 1275, "train/loss": 1.9241721332073212, "train/lm_loss": 1.9241721332073212, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.642018818594107e-05, "perf/tokens_per_sec": 26100.49707392665, "train/loss_code": 1.0546865463256836, "train/loss_math": 2.544879913330078, "train/loss_prose": 3.0424351692199707} +{"step": 1276, "train/loss": 2.172926127910614, "train/lm_loss": 2.172926127910614, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.6411183414389837e-05, "perf/tokens_per_sec": 26233.053263581587, "train/loss_prose": 3.417233149210612, "train/loss_code": 1.2254004925489426, "train/loss_math": 2.2301077842712402} +{"step": 1277, "train/loss": 3.2486765384674072, "train/lm_loss": 3.2486765384674072, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.64021682072879e-05, "perf/tokens_per_sec": 26081.75460191622, "train/loss_prose": 3.6832184394200644, "train/loss_math": 2.379866600036621, "train/loss_code": 1.5102351903915405} +{"step": 1278, "train/loss": 3.025909423828125, "train/lm_loss": 3.025909423828125, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.639314256902919e-05, "perf/tokens_per_sec": 25600.7492284698, "train/loss_code": 1.967082142829895, "train/loss_math": 2.3060548901557922, "train/loss_prose": 4.338657855987549} +{"step": 1279, "train/loss": 2.793739140033722, "train/lm_loss": 2.793739140033722, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.638410650401267e-05, "perf/tokens_per_sec": 26759.170590232672, "train/loss_prose": 3.7624882459640503, "train/loss_math": 2.674206018447876, "train/loss_code": 1.5419178009033203} +{"step": 1280, "train/loss": 2.293808728456497, "train/lm_loss": 2.293808728456497, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.6375060016642415e-05, "perf/tokens_per_sec": 26335.072949320693, "train/loss_code": 1.5762586295604706, "train/loss_math": 2.552910248438517, "train/loss_prose": 4.386704921722412} +{"step": 1281, "train/loss": 2.028085768222809, "train/lm_loss": 2.028085768222809, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.636600311132758e-05, "perf/tokens_per_sec": 27132.292544564105, "train/loss_code": 1.7786307136217754, "train/loss_math": 2.2340831756591797, "train/loss_prose": 3.3188180923461914} +{"step": 1282, "train/loss": 2.7126904726028442, "train/lm_loss": 2.7126904726028442, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.635693579248238e-05, "perf/tokens_per_sec": 26184.314567765003, "train/loss_prose": 3.2020793557167053, "train/loss_code": 1.8328441381454468, "train/loss_math": 2.3534539540608725} +{"step": 1283, "train/loss": 2.600615441799164, "train/lm_loss": 2.600615441799164, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.6347858064526125e-05, "perf/tokens_per_sec": 26387.17122762718, "train/loss_prose": 3.5527010560035706, "train/loss_code": 1.242175022761027, "train/loss_math": 2.867594003677368} +{"step": 1284, "train/loss": 3.1402817964553833, "train/lm_loss": 3.1402817964553833, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.6338769931883185e-05, "perf/tokens_per_sec": 26122.920351856217, "train/loss_prose": 3.878583312034607, "train/loss_math": 2.530500332514445, "train/loss_code": 2.016420841217041} +{"step": 1285, "train/loss": 2.930670738220215, "train/lm_loss": 2.930670738220215, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.632967139898301e-05, "perf/tokens_per_sec": 26816.435859373414, "train/loss_math": 2.483570098876953, "train/loss_prose": 3.620824694633484, "train/loss_code": 1.9974636435508728} +{"step": 1286, "train/loss": 2.3173946142196655, "train/lm_loss": 2.3173946142196655, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.632056247026011e-05, "perf/tokens_per_sec": 26269.35496468595, "train/loss_math": 2.297762950261434, "train/loss_prose": 3.1929606199264526, "train/loss_code": 1.7533156474431355} +{"step": 1287, "train/loss": 1.7515016794204712, "train/lm_loss": 1.7515016794204712, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.631144315015407e-05, "perf/tokens_per_sec": 25954.835800421202, "train/loss_code": 1.3436907529830933, "train/loss_math": 2.4311865965525308} +{"step": 1288, "train/loss": 2.128799259662628, "train/lm_loss": 2.128799259662628, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.6302313443109526e-05, "perf/tokens_per_sec": 26233.9746056857, "train/loss_math": 2.6566179593404136, "train/loss_code": 1.812108039855957} +{"step": 1289, "train/loss": 2.003062129020691, "train/lm_loss": 2.003062129020691, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.629317335357619e-05, "perf/tokens_per_sec": 26106.60508824343, "train/loss_code": 1.314599621295929, "train/loss_math": 1.9935396909713745, "train/loss_prose": 3.7289795875549316} +{"step": 1290, "train/loss": 2.8136459589004517, "train/lm_loss": 2.8136459589004517, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.6284022886008836e-05, "perf/tokens_per_sec": 26187.826963911437, "train/loss_math": 2.554292619228363, "train/loss_code": 1.7140300869941711, "train/loss_prose": 4.4319679737091064} +{"step": 1291, "train/loss": 2.8638843297958374, "train/lm_loss": 2.8638843297958374, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.6274862044867304e-05, "perf/tokens_per_sec": 26037.797809959004, "train/loss_math": 2.492639034986496, "train/loss_prose": 4.22618305683136, "train/loss_code": 2.244076371192932} +{"step": 1292, "train/loss": 2.851258337497711, "train/lm_loss": 2.851258337497711, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.6265690834616446e-05, "perf/tokens_per_sec": 26223.68328631962, "train/loss_prose": 3.8154053688049316, "train/loss_math": 2.4213669300079346, "train/loss_code": 2.173705577850342} +{"step": 1293, "train/loss": 2.63772189617157, "train/lm_loss": 2.63772189617157, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.625650925972622e-05, "perf/tokens_per_sec": 26045.890212249848, "train/loss_prose": 3.719067931175232, "train/loss_code": 1.3656847476959229, "train/loss_math": 2.128448963165283} +{"step": 1294, "train/loss": 2.9322271943092346, "train/lm_loss": 2.9322271943092346, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.6247317324671605e-05, "perf/tokens_per_sec": 26716.600005287375, "train/loss_prose": 3.542346715927124, "train/loss_code": 2.217360556125641, "train/loss_math": 2.4268550872802734} +{"step": 1295, "train/loss": 2.8599369525909424, "train/lm_loss": 2.8599369525909424, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.6238115033932636e-05, "perf/tokens_per_sec": 25942.01987493979, "train/loss_code": 1.9423097769419353, "train/loss_prose": 3.680436849594116, "train/loss_math": 2.3308184146881104} +{"step": 1296, "train/loss": 2.376954734325409, "train/lm_loss": 2.376954734325409, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.622890239199441e-05, "perf/tokens_per_sec": 25991.78662971105, "train/loss_code": 1.995965600013733, "train/loss_math": 2.4087815284729004, "train/loss_prose": 3.3926141262054443} +{"step": 1297, "train/loss": 3.2161861062049866, "train/lm_loss": 3.2161861062049866, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.621967940334705e-05, "perf/tokens_per_sec": 25995.365562990064, "train/loss_code": 1.4610877633094788, "train/loss_prose": 4.072200536727905, "train/loss_math": 2.4463109970092773} +{"step": 1298, "train/loss": 2.496410548686981, "train/lm_loss": 2.496410548686981, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.6210446072485725e-05, "perf/tokens_per_sec": 26186.310131801365, "train/loss_math": 2.3913252353668213, "train/loss_prose": 3.3474109172821045, "train/loss_code": 1.8555803298950195} +{"step": 1299, "train/loss": 2.8060670495033264, "train/lm_loss": 2.8060670495033264, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.620120240391065e-05, "perf/tokens_per_sec": 26247.802507463417, "train/loss_code": 1.3490238189697266, "train/loss_prose": 3.5012044310569763, "train/loss_math": 2.364898204803467} +{"step": 1300, "train/loss": 2.3924560546875, "train/lm_loss": 2.3924560546875, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.619194840212708e-05, "perf/tokens_per_sec": 25999.69306205469, "train/loss_math": 2.274966835975647, "train/loss_code": 1.584736744562785, "train/loss_prose": 3.2785014311472573} +{"step": 1301, "train/loss": 2.90567284822464, "train/lm_loss": 2.90567284822464, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.61826840716453e-05, "perf/tokens_per_sec": 26193.536617867238, "train/loss_prose": 3.4848811626434326, "train/loss_math": 2.5109094381332397, "train/loss_code": 2.1420196294784546} +{"step": 1302, "train/loss": 2.5475504100322723, "train/lm_loss": 2.5475504100322723, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.617340941698064e-05, "perf/tokens_per_sec": 26215.4400412614, "train/loss_code": 1.7391779124736786, "train/loss_prose": 3.7433676719665527, "train/loss_math": 2.1935884952545166} +{"step": 1303, "train/loss": 2.3020850121974945, "train/lm_loss": 2.3020850121974945, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.616412444265345e-05, "perf/tokens_per_sec": 25902.98396807184, "train/loss_math": 2.258922497431437, "train/loss_code": 1.442920724550883, "train/loss_prose": 3.6555747985839844} +{"step": 1304, "train/loss": 3.0270793437957764, "train/lm_loss": 3.0270793437957764, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.615482915318911e-05, "perf/tokens_per_sec": 26161.148394838387, "train/loss_prose": 3.5052330017089846, "train/loss_math": 2.29054856300354, "train/loss_code": 2.1093738079071045} +{"step": 1305, "train/loss": 2.662462681531906, "train/lm_loss": 2.662462681531906, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.614552355311802e-05, "perf/tokens_per_sec": 26866.382024721013, "train/loss_prose": 3.518768608570099, "train/loss_math": 2.224047303199768, "train/loss_code": 1.3882662057876587} +{"step": 1306, "train/loss": 2.743189513683319, "train/lm_loss": 2.743189513683319, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.613620764697564e-05, "perf/tokens_per_sec": 26499.384839398313, "train/loss_code": 1.8343583345413208, "train/loss_math": 2.4795949459075928, "train/loss_prose": 3.3294020891189575} +{"step": 1307, "train/loss": 2.4723077416419983, "train/lm_loss": 2.4723077416419983, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.612688143930242e-05, "perf/tokens_per_sec": 25763.955358133702, "train/loss_math": 2.39268559217453, "train/loss_prose": 3.5648727416992188, "train/loss_code": 1.538987159729004} +{"step": 1308, "train/loss": 2.3422593772411346, "train/lm_loss": 2.3422593772411346, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.611754493464383e-05, "perf/tokens_per_sec": 25812.420081554646, "train/loss_math": 2.336508274078369, "train/loss_code": 1.7029306292533875, "train/loss_prose": 3.6266679763793945} +{"step": 1309, "train/loss": 2.3166372179985046, "train/lm_loss": 2.3166372179985046, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.610819813755038e-05, "perf/tokens_per_sec": 25900.914203699125, "train/loss_prose": 3.6005969047546387, "train/loss_code": 1.3525046904881795, "train/loss_math": 2.4247963428497314} +{"step": 1310, "train/loss": 2.3861477077007294, "train/lm_loss": 2.3861477077007294, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.6098841052577583e-05, "perf/tokens_per_sec": 25799.628146136933, "train/loss_prose": 3.3664937019348145, "train/loss_math": 2.4032118916511536, "train/loss_code": 1.3716727495193481} +{"step": 1311, "train/loss": 2.3015296459198, "train/lm_loss": 2.3015296459198, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.6089473684285974e-05, "perf/tokens_per_sec": 26134.72291202054, "train/loss_code": 1.3164792656898499, "train/loss_prose": 3.6746036211649575, "train/loss_math": 2.122509241104126} +{"step": 1312, "train/loss": 3.036127060651779, "train/lm_loss": 3.036127060651779, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.6080096037241086e-05, "perf/tokens_per_sec": 25890.53132356177, "train/loss_prose": 4.081571340560913, "train/loss_code": 1.7667157649993896, "train/loss_math": 2.214649796485901} +{"step": 1313, "train/loss": 2.1157615780830383, "train/lm_loss": 2.1157615780830383, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.6070708116013476e-05, "perf/tokens_per_sec": 26406.76126446004, "train/loss_math": 2.431000280380249, "train/loss_code": 1.5903639793395996} +{"step": 1314, "train/loss": 2.558947414159775, "train/lm_loss": 2.558947414159775, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.606130992517869e-05, "perf/tokens_per_sec": 26042.02102174025, "train/loss_prose": 3.94451642036438, "train/loss_math": 2.306805690129598, "train/loss_code": 1.8873766660690308} +{"step": 1315, "train/loss": 2.0645945072174072, "train/lm_loss": 2.0645945072174072, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.605190146931731e-05, "perf/tokens_per_sec": 26006.108278876007, "train/loss_code": 1.7282560825347901, "train/loss_prose": 3.5914697647094727, "train/loss_math": 2.142002582550049} +{"step": 1316, "train/loss": 2.3097270131111145, "train/lm_loss": 2.3097270131111145, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.6042482753014895e-05, "perf/tokens_per_sec": 24593.825419730183, "train/loss_prose": 3.8093091249465942, "train/loss_math": 2.619249184926351, "train/loss_code": 1.0004834334055583} +{"step": 1317, "train/loss": 2.879979372024536, "train/lm_loss": 2.879979372024536, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.603305378086201e-05, "perf/tokens_per_sec": 26186.230303399814, "train/loss_prose": 3.5404582977294923, "train/loss_math": 2.1200353503227234, "train/loss_code": 1.0974725484848022} +{"step": 1318, "train/loss": 2.6182345747947693, "train/lm_loss": 2.6182345747947693, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.602361455745423e-05, "perf/tokens_per_sec": 25785.76516463691, "train/loss_code": 1.64883154630661, "train/loss_math": 2.5974632263183595, "train/loss_prose": 4.660898685455322} +{"step": 1319, "train/loss": 2.0462956726551056, "train/lm_loss": 2.0462956726551056, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.601416508739211e-05, "perf/tokens_per_sec": 23937.529516675586, "train/loss_math": 2.233254551887512, "train/loss_code": 1.3587762117385864, "train/loss_prose": 3.2343751192092896} +{"step": 1320, "train/loss": 2.637212812900543, "train/lm_loss": 2.637212812900543, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.600470537528121e-05, "perf/tokens_per_sec": 25965.388061573805, "train/loss_prose": 3.6049509048461914, "train/loss_code": 1.3452266454696655, "train/loss_math": 2.5307989915211997} +{"step": 1321, "train/loss": 2.5802106857299805, "train/lm_loss": 2.5802106857299805, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.599523542573207e-05, "perf/tokens_per_sec": 25134.405992509364, "train/loss_prose": 3.2739570140838623, "train/loss_code": 1.762227475643158, "train/loss_math": 2.43178653717041} +{"step": 1322, "train/loss": 2.4972691535949707, "train/lm_loss": 2.4972691535949707, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.598575524336025e-05, "perf/tokens_per_sec": 26022.88938673065, "train/loss_code": 1.6607539653778076, "train/loss_math": 2.2234283288319907, "train/loss_prose": 4.1628029346466064} +{"step": 1323, "train/loss": 2.7825194001197815, "train/lm_loss": 2.7825194001197815, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.597626483278625e-05, "perf/tokens_per_sec": 26618.123367729542, "train/loss_math": 2.4086915850639343, "train/loss_prose": 3.506011486053467, "train/loss_code": 2.107353687286377} +{"step": 1324, "train/loss": 3.352063834667206, "train/lm_loss": 3.352063834667206, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.5966764198635606e-05, "perf/tokens_per_sec": 26504.08623304335, "train/loss_math": 2.4256298542022705, "train/loss_prose": 3.8477758169174194, "train/loss_code": 1.3042271137237549} +{"step": 1325, "train/loss": 2.8267692923545837, "train/lm_loss": 2.8267692923545837, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.595725334553879e-05, "perf/tokens_per_sec": 26173.62352657984, "train/loss_prose": 3.524221658706665, "train/loss_code": 2.4286508560180664, "train/loss_math": 2.403209388256073} +{"step": 1326, "train/loss": 2.6390837728977203, "train/lm_loss": 2.6390837728977203, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.5947732278131286e-05, "perf/tokens_per_sec": 26134.444614483582, "train/loss_prose": 4.462023615837097, "train/loss_code": 1.8567403554916382, "train/loss_math": 2.2061335245768228} +{"step": 1327, "train/loss": 2.6902210116386414, "train/lm_loss": 2.6902210116386414, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.593820100105355e-05, "perf/tokens_per_sec": 26378.86539920218, "train/loss_prose": 3.3823596835136414, "train/loss_math": 2.5163934230804443, "train/loss_code": 1.4797716736793518} +{"step": 1328, "train/loss": 2.3536799550056458, "train/lm_loss": 2.3536799550056458, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.5928659518951e-05, "perf/tokens_per_sec": 26778.69095783649, "train/loss_prose": 3.5473784605662027, "train/loss_math": 2.209071397781372, "train/loss_code": 1.4945582151412964} +{"step": 1329, "train/loss": 2.8939250707626343, "train/lm_loss": 2.8939250707626343, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.591910783647404e-05, "perf/tokens_per_sec": 26007.99802592039, "train/loss_code": 1.938850204149882, "train/loss_prose": 3.6745671033859253, "train/loss_math": 2.6365816593170166} +{"step": 1330, "train/loss": 2.4066004157066345, "train/lm_loss": 2.4066004157066345, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.590954595827806e-05, "perf/tokens_per_sec": 26330.027777011474, "train/loss_code": 1.710568110148112, "train/loss_math": 2.505448897679647, "train/loss_prose": 3.3023757934570312} +{"step": 1331, "train/loss": 2.701342821121216, "train/lm_loss": 2.701342821121216, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.589997388902338e-05, "perf/tokens_per_sec": 26341.372037148056, "train/loss_prose": 4.137178421020508, "train/loss_math": 2.297555148601532, "train/loss_code": 2.0730820894241333} +{"step": 1332, "train/loss": 2.4014406204223633, "train/lm_loss": 2.4014406204223633, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.589039163337534e-05, "perf/tokens_per_sec": 25945.154099984746, "train/loss_math": 2.6396538257598876, "train/loss_code": 1.0733346045017242, "train/loss_prose": 3.866586685180664} +{"step": 1333, "train/loss": 2.534031093120575, "train/lm_loss": 2.534031093120575, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.588079919600419e-05, "perf/tokens_per_sec": 26022.101056796168, "train/loss_code": 1.2136348684628804, "train/loss_prose": 3.531732201576233, "train/loss_math": 2.504415273666382} +{"step": 1334, "train/loss": 2.8572464287281036, "train/lm_loss": 2.8572464287281036, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.5871196581585166e-05, "perf/tokens_per_sec": 25982.784661873375, "train/loss_prose": 3.760503649711609, "train/loss_code": 1.5750917196273804, "train/loss_math": 2.332886815071106} +{"step": 1335, "train/loss": 3.2328688502311707, "train/lm_loss": 3.2328688502311707, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.586158379479848e-05, "perf/tokens_per_sec": 26053.671554421195, "train/loss_prose": 3.771734857559204, "train/loss_code": 2.24009370803833, "train/loss_math": 2.382092237472534} +{"step": 1336, "train/loss": 2.061149388551712, "train/lm_loss": 2.061149388551712, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.585196084032928e-05, "perf/tokens_per_sec": 26202.165712420654, "train/loss_code": 1.719963053862254, "train/loss_math": 2.6107876300811768, "train/loss_prose": 3.558628797531128} +{"step": 1337, "train/loss": 3.1421852707862854, "train/lm_loss": 3.1421852707862854, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.584232772286768e-05, "perf/tokens_per_sec": 25922.878372791332, "train/loss_math": 2.313507596651713, "train/loss_prose": 3.6393919944763184} +{"step": 1338, "train/loss": 2.381339818239212, "train/lm_loss": 2.381339818239212, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.583268444710875e-05, "perf/tokens_per_sec": 26255.745106429804, "train/loss_prose": 3.6652096112569175, "train/loss_code": 1.6110178112983704} +{"step": 1339, "train/loss": 2.675035297870636, "train/lm_loss": 2.675035297870636, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.5823031017752485e-05, "perf/tokens_per_sec": 25279.458126716087, "train/loss_math": 2.465626835823059, "train/loss_prose": 3.9364418983459473, "train/loss_code": 1.832445502281189} +{"step": 1340, "train/loss": 2.6457181572914124, "train/lm_loss": 2.6457181572914124, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.5813367439503875e-05, "perf/tokens_per_sec": 24901.176051785566, "train/loss_code": 2.0540053447087607, "train/loss_math": 2.270047426223755, "train/loss_prose": 3.4878780047098794} +{"step": 1341, "train/loss": 2.6944698095321655, "train/lm_loss": 2.6944698095321655, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.5803693717072815e-05, "perf/tokens_per_sec": 26177.133054900783, "train/loss_prose": 3.4600700736045837, "train/loss_code": 1.7090447743733723, "train/loss_math": 2.5883431434631348} +{"step": 1342, "train/loss": 2.3170963525772095, "train/lm_loss": 2.3170963525772095, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.579400985517416e-05, "perf/tokens_per_sec": 26194.535064808333, "train/loss_code": 1.5478370984395344, "train/loss_math": 2.3848206996917725, "train/loss_prose": 3.3693984746932983} +{"step": 1343, "train/loss": 2.632635235786438, "train/lm_loss": 2.632635235786438, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.5784315858527715e-05, "perf/tokens_per_sec": 26022.652882735096, "train/loss_code": 1.505886435508728, "train/loss_prose": 3.7299018700917563, "train/loss_math": 2.676858425140381} +{"step": 1344, "train/loss": 2.7137938141822815, "train/lm_loss": 2.7137938141822815, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.577461173185821e-05, "perf/tokens_per_sec": 25441.82054642575, "train/loss_code": 1.0873801112174988, "train/loss_math": 2.400019645690918, "train/loss_prose": 4.967756390571594} +{"step": 1345, "train/loss": 2.554772824048996, "train/lm_loss": 2.554772824048996, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.5764897479895317e-05, "perf/tokens_per_sec": 25442.837866576625, "train/loss_code": 1.2422181367874146, "train/loss_prose": 3.632132371266683, "train/loss_math": 2.3524499336878457} +{"step": 1346, "train/loss": 3.464400887489319, "train/lm_loss": 3.464400887489319, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.575517310737365e-05, "perf/tokens_per_sec": 26660.835561294007, "train/loss_prose": 4.021353999773662, "train/loss_code": 1.2723133563995361, "train/loss_math": 2.314769983291626} +{"step": 1347, "train/loss": 2.1828150153160095, "train/lm_loss": 2.1828150153160095, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.574543861903274e-05, "perf/tokens_per_sec": 24972.809061215827, "train/loss_math": 2.5706941286722818, "train/loss_code": 1.476719319820404, "train/loss_prose": 3.843559980392456} +{"step": 1348, "train/loss": 2.865832269191742, "train/lm_loss": 2.865832269191742, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.573569401961708e-05, "perf/tokens_per_sec": 25085.704082809738, "train/loss_prose": 3.5983216762542725, "train/loss_math": 2.7823562622070312, "train/loss_code": 1.917004942893982} +{"step": 1349, "train/loss": 2.1292544305324554, "train/lm_loss": 2.1292544305324554, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.572593931387604e-05, "perf/tokens_per_sec": 26639.42103630928, "train/loss_code": 1.528599222501119, "train/loss_math": 2.3681138157844543, "train/loss_prose": 2.975780963897705} +{"step": 1350, "train/loss": 2.938002586364746, "train/lm_loss": 2.938002586364746, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.571617450656397e-05, "perf/tokens_per_sec": 25954.169217033195, "train/loss_prose": 3.6936641534169516, "train/loss_math": 2.619398752848307, "train/loss_code": 2.2824162244796753} +{"step": 1351, "train/loss": 2.270089864730835, "train/lm_loss": 2.270089864730835, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.5706399602440106e-05, "perf/tokens_per_sec": 26156.328926714523, "train/loss_code": 1.9650126844644547, "train/loss_math": 2.3502458333969116, "train/loss_prose": 3.2499303817749023} +{"step": 1352, "train/loss": 2.406019240617752, "train/lm_loss": 2.406019240617752, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.569661460626862e-05, "perf/tokens_per_sec": 26102.9954615692, "train/loss_math": 2.4461538791656494, "train/loss_code": 1.4648743271827698, "train/loss_prose": 3.2668946981430054} +{"step": 1353, "train/loss": 2.253565788269043, "train/lm_loss": 2.253565788269043, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.5686819522818594e-05, "perf/tokens_per_sec": 24937.719362878513, "train/loss_code": 1.7595287561416626, "train/loss_math": 2.3976286252339682, "train/loss_prose": 3.79752516746521} +{"step": 1354, "train/loss": 2.4565287828445435, "train/lm_loss": 2.4565287828445435, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.567701435686404e-05, "perf/tokens_per_sec": 25465.994462067534, "train/loss_code": 1.8152595460414886, "train/loss_math": 2.3125044107437134, "train/loss_prose": 3.883091449737549} +{"step": 1355, "train/loss": 2.816774070262909, "train/lm_loss": 2.816774070262909, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.566719911318389e-05, "perf/tokens_per_sec": 26628.850492900987, "train/loss_prose": 3.6035232543945312, "train/loss_math": 2.4807595014572144, "train/loss_code": 1.8005849123001099} +{"step": 1356, "train/loss": 2.680016279220581, "train/lm_loss": 2.680016279220581, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.565737379656195e-05, "perf/tokens_per_sec": 25610.251906621746, "train/loss_prose": 3.2033899625142417, "train/loss_code": 2.4002596139907837, "train/loss_math": 2.3431471983591714} +{"step": 1357, "train/loss": 2.559308886528015, "train/lm_loss": 2.559308886528015, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.564753841178697e-05, "perf/tokens_per_sec": 25305.746987738847, "train/loss_math": 2.4823760390281677, "train/loss_code": 1.7565765976905823, "train/loss_prose": 3.5159069299697876} +{"step": 1358, "train/loss": 2.3287824392318726, "train/lm_loss": 2.3287824392318726, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.5637692963652596e-05, "perf/tokens_per_sec": 25339.52595551242, "train/loss_math": 2.51771879196167, "train/loss_code": 1.21652752161026, "train/loss_prose": 3.608610153198242} +{"step": 1359, "train/loss": 3.5103739500045776, "train/lm_loss": 3.5103739500045776, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.562783745695738e-05, "perf/tokens_per_sec": 25303.547949704764, "train/loss_prose": 4.064423481623332, "train/loss_code": 1.1213716268539429, "train/loss_math": 2.575077772140503} +{"step": 1360, "train/loss": 2.538542866706848, "train/lm_loss": 2.538542866706848, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.561797189650478e-05, "perf/tokens_per_sec": 25190.497895888257, "train/loss_prose": 2.825040280818939, "train/loss_math": 2.546454350153605, "train/loss_code": 1.3688182830810547} +{"step": 1361, "train/loss": 2.151017725467682, "train/lm_loss": 2.151017725467682, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.560809628710315e-05, "perf/tokens_per_sec": 25470.18525179056, "train/loss_math": 2.3233928084373474, "train/loss_code": 1.4624358018239338, "train/loss_prose": 3.5272629261016846} +{"step": 1362, "train/loss": 2.0194211602211, "train/lm_loss": 2.0194211602211, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.559821063356574e-05, "perf/tokens_per_sec": 25507.964520200145, "train/loss_math": 2.6162428855895996, "train/loss_code": 1.8204805453618367} +{"step": 1363, "train/loss": 2.2377747297286987, "train/lm_loss": 2.2377747297286987, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.558831494071069e-05, "perf/tokens_per_sec": 26882.987983932675, "train/loss_prose": 2.376174509525299, "train/loss_math": 2.4118536710739136, "train/loss_code": 1.7512161135673523} +{"step": 1364, "train/loss": 2.786098062992096, "train/lm_loss": 2.786098062992096, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.557840921336105e-05, "perf/tokens_per_sec": 25187.543336685834, "train/loss_prose": 3.531456172466278, "train/loss_math": 2.3695056438446045, "train/loss_code": 1.0544428825378418} +{"step": 1365, "train/loss": 2.399710178375244, "train/lm_loss": 2.399710178375244, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.556849345634475e-05, "perf/tokens_per_sec": 26292.069443424178, "train/loss_math": 2.3849291801452637, "train/loss_prose": 3.244830012321472, "train/loss_code": 1.9845406413078308} +{"step": 1366, "train/loss": 2.474871039390564, "train/lm_loss": 2.474871039390564, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.555856767449461e-05, "perf/tokens_per_sec": 26228.247310744504, "train/loss_math": 2.4833484888076782, "train/loss_prose": 3.9438998699188232, "train/loss_code": 1.7361180186271667} +{"step": 1367, "train/loss": 2.545690417289734, "train/lm_loss": 2.545690417289734, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.5548631872648326e-05, "perf/tokens_per_sec": 25674.167502054846, "train/loss_prose": 3.8951210379600525, "train/loss_code": 1.1962599009275436} +{"step": 1368, "train/loss": 2.6539350152015686, "train/lm_loss": 2.6539350152015686, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.5538686055648506e-05, "perf/tokens_per_sec": 26048.101694504083, "train/loss_math": 2.427605986595154, "train/loss_prose": 3.1742683251698813, "train/loss_code": 1.9982507228851318} +{"step": 1369, "train/loss": 1.929292619228363, "train/lm_loss": 1.929292619228363, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.5528730228342605e-05, "perf/tokens_per_sec": 25681.075659596096, "train/loss_code": 1.5883370637893677, "train/loss_math": 2.163454055786133, "train/loss_prose": 3.1657471656799316} +{"step": 1370, "train/loss": 2.836356043815613, "train/lm_loss": 2.836356043815613, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.551876439558298e-05, "perf/tokens_per_sec": 25829.806416305077, "train/loss_prose": 3.854238828023275, "train/loss_code": 2.0024896462758384, "train/loss_math": 2.5603307485580444} +{"step": 1371, "train/loss": 2.559221088886261, "train/lm_loss": 2.559221088886261, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.550878856222685e-05, "perf/tokens_per_sec": 25760.710245283415, "train/loss_code": 1.6844029426574707, "train/loss_prose": 3.752420663833618, "train/loss_math": 2.400030493736267} +{"step": 1372, "train/loss": 3.132993757724762, "train/lm_loss": 3.132993757724762, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.549880273313631e-05, "perf/tokens_per_sec": 26033.220921898228, "train/loss_prose": 3.67315616607666, "train/loss_math": 2.232723037401835} +{"step": 1373, "train/loss": 2.717710942029953, "train/lm_loss": 2.717710942029953, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.548880691317835e-05, "perf/tokens_per_sec": 25708.783977230043, "train/loss_code": 1.5430252154668171, "train/loss_prose": 3.7419655323028564, "train/loss_math": 2.1447505950927734} +{"step": 1374, "train/loss": 3.0386452078819275, "train/lm_loss": 3.0386452078819275, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.54788011072248e-05, "perf/tokens_per_sec": 26967.596544102875, "train/loss_math": 2.548529028892517, "train/loss_prose": 3.6029563903808595, "train/loss_code": 1.1973215341567993} +{"step": 1375, "train/loss": 2.7328356504440308, "train/lm_loss": 2.7328356504440308, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.5468785320152365e-05, "perf/tokens_per_sec": 25582.412603678058, "train/loss_code": 1.6357289950052898, "train/loss_math": 2.457557201385498, "train/loss_prose": 4.013461351394653} +{"step": 1376, "train/loss": 1.5412418246269226, "train/lm_loss": 1.5412418246269226, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.5458759556842624e-05, "perf/tokens_per_sec": 25998.98482725223, "train/loss_prose": 3.0276949405670166, "train/loss_code": 1.1841286222139995, "train/loss_math": 2.197467803955078} +{"step": 1377, "train/loss": 2.467275947332382, "train/lm_loss": 2.467275947332382, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.544872382218202e-05, "perf/tokens_per_sec": 25994.22491681936, "train/loss_math": 2.3349125385284424, "train/loss_prose": 3.3188299338022866, "train/loss_code": 1.3884903192520142} +{"step": 1378, "train/loss": 2.319938153028488, "train/lm_loss": 2.319938153028488, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.543867812106183e-05, "perf/tokens_per_sec": 26030.065384749418, "train/loss_code": 1.5090345939000447, "train/loss_math": 2.4024954636891684, "train/loss_prose": 3.4124573469161987} +{"step": 1379, "train/loss": 3.174082577228546, "train/lm_loss": 3.174082577228546, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.542862245837821e-05, "perf/tokens_per_sec": 25875.44026086498, "train/loss_prose": 3.8473584055900574, "train/loss_code": 1.8008780479431152, "train/loss_math": 2.7341160774230957} +{"step": 1380, "train/loss": 2.031559497117996, "train/lm_loss": 2.031559497117996, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.541855683903219e-05, "perf/tokens_per_sec": 25831.864831257095, "train/loss_math": 2.339998245239258, "train/loss_code": 1.3215087056159973, "train/loss_prose": 3.9464457035064697} +{"step": 1381, "train/loss": 2.1070929169654846, "train/lm_loss": 2.1070929169654846, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.5408481267929605e-05, "perf/tokens_per_sec": 26844.67810985776, "train/loss_math": 2.3422634601593018, "train/loss_code": 1.5638447105884552, "train/loss_prose": 3.574573278427124} +{"step": 1382, "train/loss": 2.127393454313278, "train/lm_loss": 2.127393454313278, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.539839574998117e-05, "perf/tokens_per_sec": 25587.74695640518, "train/loss_code": 1.6336918324232101, "train/loss_math": 2.5404038429260254, "train/loss_prose": 2.863168478012085} +{"step": 1383, "train/loss": 2.6544495820999146, "train/lm_loss": 2.6544495820999146, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.5388300290102456e-05, "perf/tokens_per_sec": 25487.226891454444, "train/loss_math": 2.3960145711898804, "train/loss_code": 2.0362406174341836, "train/loss_prose": 3.444948434829712} +{"step": 1384, "train/loss": 2.3972228169441223, "train/lm_loss": 2.3972228169441223, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.537819489321386e-05, "perf/tokens_per_sec": 25016.555296036346, "train/loss_math": 2.4430502355098724, "train/loss_code": 1.3414077162742615, "train/loss_prose": 3.361383557319641} +{"step": 1385, "train/loss": 2.465052008628845, "train/lm_loss": 2.465052008628845, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.536807956424063e-05, "perf/tokens_per_sec": 24567.378025900332, "train/loss_code": 1.2947765191396077, "train/loss_prose": 3.6081321239471436, "train/loss_math": 2.505845308303833} +{"step": 1386, "train/loss": 2.6694014966487885, "train/lm_loss": 2.6694014966487885, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.535795430811285e-05, "perf/tokens_per_sec": 25259.12736842879, "train/loss_prose": 3.6678882439931235, "train/loss_math": 2.2839534282684326, "train/loss_code": 1.927879810333252} +{"step": 1387, "train/loss": 3.173107147216797, "train/lm_loss": 3.173107147216797, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.534781912976546e-05, "perf/tokens_per_sec": 24247.645693751878, "train/loss_math": 2.406508445739746, "train/loss_prose": 3.4286399682362876} +{"step": 1388, "train/loss": 2.8894318342208862, "train/lm_loss": 2.8894318342208862, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.53376740341382e-05, "perf/tokens_per_sec": 24838.78405274306, "train/loss_prose": 3.9358487129211426, "train/loss_math": 2.2831132411956787, "train/loss_code": 2.175455093383789} +{"step": 1389, "train/loss": 2.5372451543807983, "train/lm_loss": 2.5372451543807983, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.532751902617569e-05, "perf/tokens_per_sec": 24060.127225543703, "train/loss_prose": 3.7936434745788574, "train/loss_code": 1.5750358700752258, "train/loss_math": 2.6168863773345947} +{"step": 1390, "train/loss": 3.2012038230895996, "train/lm_loss": 3.2012038230895996, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.531735411082735e-05, "perf/tokens_per_sec": 24432.37553899539, "train/loss_math": 2.403290629386902, "train/loss_prose": 3.4671748876571655} +{"step": 1391, "train/loss": 2.4924160540103912, "train/lm_loss": 2.4924160540103912, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.530717929304743e-05, "perf/tokens_per_sec": 24987.810254098003, "train/loss_code": 1.7861366868019104, "train/loss_math": 2.0532660484313965, "train/loss_prose": 4.344124674797058} +{"step": 1392, "train/loss": 1.9905321598052979, "train/lm_loss": 1.9905321598052979, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.529699457779503e-05, "perf/tokens_per_sec": 25228.303470313196, "train/loss_code": 1.295575588941574, "train/loss_math": 2.2337438265482583, "train/loss_prose": 4.0407233238220215} +{"step": 1393, "train/loss": 3.0011793971061707, "train/lm_loss": 3.0011793971061707, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.528679997003403e-05, "perf/tokens_per_sec": 25090.356910854072, "train/loss_code": 1.7272351582845051, "train/loss_math": 2.488215446472168, "train/loss_prose": 4.084878623485565} +{"step": 1394, "train/loss": 2.702042043209076, "train/lm_loss": 2.702042043209076, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.527659547473317e-05, "perf/tokens_per_sec": 25854.95707713354, "train/loss_prose": 3.5196820735931396, "train/loss_code": 1.3393084804217021} +{"step": 1395, "train/loss": 2.6304070949554443, "train/lm_loss": 2.6304070949554443, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.5266381096866e-05, "perf/tokens_per_sec": 24425.358754098175, "train/loss_prose": 3.3254987597465515, "train/loss_code": 1.7926593621571858, "train/loss_math": 2.363283157348633} +{"step": 1396, "train/loss": 2.598148614168167, "train/lm_loss": 2.598148614168167, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.5256156841410886e-05, "perf/tokens_per_sec": 25270.682525440036, "train/loss_prose": 3.4241981506347656, "train/loss_math": 2.691347678502401, "train/loss_code": 1.2192756533622742} +{"step": 1397, "train/loss": 2.2427673041820526, "train/lm_loss": 2.2427673041820526, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.5245922713350996e-05, "perf/tokens_per_sec": 24739.241205118382, "train/loss_code": 2.1421643495559692, "train/loss_math": 2.3433703184127808} +{"step": 1398, "train/loss": 2.860895127058029, "train/lm_loss": 2.860895127058029, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.523567871767434e-05, "perf/tokens_per_sec": 24512.659763689313, "train/loss_code": 1.733755648136139, "train/loss_prose": 4.078266461690267, "train/loss_math": 2.3949498335520425} +{"step": 1399, "train/loss": 2.1861715018749237, "train/lm_loss": 2.1861715018749237, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.522542485937369e-05, "perf/tokens_per_sec": 25096.184538704365, "train/loss_code": 1.5469979643821716, "train/loss_prose": 3.4562015533447266, "train/loss_math": 2.1944881677627563} +{"step": 1400, "train/loss": 2.8136405646800995, "train/lm_loss": 2.8136405646800995, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.521516114344666e-05, "perf/tokens_per_sec": 26547.376431684024, "train/loss_code": 1.8777121901512146, "train/loss_prose": 3.265478563308716, "train/loss_math": 2.426306962966919} +{"step": 1400, "eval/loss": 2.256046937129194, "eval/lm_loss": 2.256046937129194, "eval/lb_loss": 0.0, "eval/z_loss": 0.0, "eval/perplexity": 9.54528139028457, "eval/loss_code": 1.621177386203313, "eval/ppl_code": 5.059043259155239, "eval/loss_prose": 3.4902092971299825, "eval/ppl_prose": 32.79281042913961, "eval/loss_math": 2.2441155697471906, "eval/ppl_math": 9.432069858481027} +{"step": 1401, "train/loss": 2.115513324737549, "train/lm_loss": 2.115513324737549, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.5204887574895684e-05, "perf/tokens_per_sec": 25161.57307017467, "train/loss_math": 2.1994301875432334, "train/loss_code": 1.7112175822257996, "train/loss_prose": 3.480945348739624} +{"step": 1402, "train/loss": 2.2084085047245026, "train/lm_loss": 2.2084085047245026, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.519460415872794e-05, "perf/tokens_per_sec": 22639.019773054617, "train/loss_math": 2.3647122979164124, "train/loss_code": 1.3702737092971802, "train/loss_prose": 4.097597599029541} +{"step": 1403, "train/loss": 2.9411003589630127, "train/lm_loss": 2.9411003589630127, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.5184310899955465e-05, "perf/tokens_per_sec": 23966.949888046427, "train/loss_prose": 3.863506317138672, "train/loss_code": 1.712544023990631, "train/loss_math": 2.3248450756073} +{"step": 1404, "train/loss": 2.6033072471618652, "train/lm_loss": 2.6033072471618652, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.5174007803595055e-05, "perf/tokens_per_sec": 24483.385493697406, "train/loss_code": 1.490350365638733, "train/loss_prose": 3.4865947365760803, "train/loss_math": 2.4090287685394287} +{"step": 1405, "train/loss": 2.5400233268737793, "train/lm_loss": 2.5400233268737793, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.516369487466832e-05, "perf/tokens_per_sec": 25854.139980616772, "train/loss_math": 2.1124573945999146, "train/loss_prose": 3.611148993174235, "train/loss_code": 1.0369096994400024} +{"step": 1406, "train/loss": 2.1521447896957397, "train/lm_loss": 2.1521447896957397, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.515337211820165e-05, "perf/tokens_per_sec": 25572.968636405585, "train/loss_code": 1.392968088388443, "train/loss_prose": 3.3734630346298218, "train/loss_math": 2.4491801261901855} +{"step": 1407, "train/loss": 2.076186776161194, "train/lm_loss": 2.076186776161194, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.5143039539226234e-05, "perf/tokens_per_sec": 25871.89427079255, "train/loss_prose": 3.5024921894073486, "train/loss_code": 1.3822791179021199, "train/loss_math": 1.8192238410313923} +{"step": 1408, "train/loss": 2.6512001156806946, "train/lm_loss": 2.6512001156806946, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.513269714277805e-05, "perf/tokens_per_sec": 26610.990490913005, "train/loss_math": 2.0549420515696206, "train/loss_prose": 3.3707112669944763, "train/loss_code": 1.5619300603866577} +{"step": 1409, "train/loss": 2.7977352142333984, "train/lm_loss": 2.7977352142333984, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.512234493389785e-05, "perf/tokens_per_sec": 26213.880010314737, "train/loss_math": 2.54064679145813, "train/loss_code": 1.3962587118148804, "train/loss_prose": 3.6076783339182534} +{"step": 1410, "train/loss": 2.2126195430755615, "train/lm_loss": 2.2126195430755615, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.5111982917631194e-05, "perf/tokens_per_sec": 25629.92844153035, "train/loss_math": 2.3179477055867515, "train/loss_code": 1.8966346383094788} +{"step": 1411, "train/loss": 3.08420193195343, "train/lm_loss": 3.08420193195343, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.510161109902837e-05, "perf/tokens_per_sec": 25417.842905290443, "train/loss_prose": 3.6345801830291746, "train/loss_code": 1.3865196704864502, "train/loss_math": 2.557097315788269} +{"step": 1412, "train/loss": 2.6411924362182617, "train/lm_loss": 2.6411924362182617, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.5091229483144495e-05, "perf/tokens_per_sec": 26120.69614770013, "train/loss_math": 2.461030149459839, "train/loss_prose": 3.3584818840026855, "train/loss_code": 2.107424736022949} +{"step": 1413, "train/loss": 2.1995041370391846, "train/lm_loss": 2.1995041370391846, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.508083807503945e-05, "perf/tokens_per_sec": 26025.09696467498, "train/loss_math": 2.4719332695007323, "train/loss_code": 1.7454557220141094} +{"step": 1414, "train/loss": 2.7143827080726624, "train/lm_loss": 2.7143827080726624, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.5070436879777865e-05, "perf/tokens_per_sec": 25588.39485070592, "train/loss_code": 2.0515175660451255, "train/loss_prose": 3.507197539011637, "train/loss_math": 2.519457459449768} +{"step": 1415, "train/loss": 2.237657755613327, "train/lm_loss": 2.237657755613327, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.5060025902429174e-05, "perf/tokens_per_sec": 26130.906776747877, "train/loss_prose": 3.40046226978302, "train/loss_code": 1.4526737928390503, "train/loss_math": 2.247438391049703} +{"step": 1416, "train/loss": 2.589703857898712, "train/lm_loss": 2.589703857898712, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.504960514806753e-05, "perf/tokens_per_sec": 25784.371946149575, "train/loss_prose": 3.6516709327697754, "train/loss_code": 0.7945213615894318, "train/loss_math": 2.260952115058899} +{"step": 1417, "train/loss": 2.03360053896904, "train/lm_loss": 2.03360053896904, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.503917462177192e-05, "perf/tokens_per_sec": 26100.37811501573, "train/loss_prose": 3.0541369915008545, "train/loss_math": 2.3854631582895913, "train/loss_code": 1.5145691335201263} +{"step": 1418, "train/loss": 2.384787678718567, "train/lm_loss": 2.384787678718567, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.502873432862603e-05, "perf/tokens_per_sec": 26108.866571733823, "train/loss_prose": 3.299336791038513, "train/loss_code": 1.5207649767398834, "train/loss_math": 2.359524428844452} +{"step": 1419, "train/loss": 2.46171897649765, "train/lm_loss": 2.46171897649765, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.5018284273718336e-05, "perf/tokens_per_sec": 26251.973778387644, "train/loss_code": 1.5258371829986572, "train/loss_prose": 3.4336443742116294, "train/loss_math": 2.4076536893844604} +{"step": 1420, "train/loss": 2.3227389752864838, "train/lm_loss": 2.3227389752864838, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.5007824462142076e-05, "perf/tokens_per_sec": 25795.715572292367, "train/loss_code": 1.6451914310455322, "train/loss_math": 2.2390147844950357, "train/loss_prose": 3.4646464586257935} +{"step": 1421, "train/loss": 3.152507722377777, "train/lm_loss": 3.152507722377777, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.499735489899524e-05, "perf/tokens_per_sec": 26793.89316672541, "train/loss_code": 1.439707636833191, "train/loss_prose": 3.977165174484253, "train/loss_math": 2.4548215866088867} +{"step": 1422, "train/loss": 2.848150134086609, "train/lm_loss": 2.848150134086609, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.498687558938055e-05, "perf/tokens_per_sec": 26763.797846105434, "train/loss_math": 2.22752837340037, "train/loss_prose": 4.133195638656616, "train/loss_code": 1.851514995098114} +{"step": 1423, "train/loss": 2.718615859746933, "train/lm_loss": 2.718615859746933, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.4976386538405495e-05, "perf/tokens_per_sec": 26261.123128613224, "train/loss_math": 2.602051258087158, "train/loss_code": 1.4379740953445435, "train/loss_prose": 3.688941796620687} +{"step": 1424, "train/loss": 2.688528835773468, "train/lm_loss": 2.688528835773468, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.496588775118232e-05, "perf/tokens_per_sec": 26611.814905518044, "train/loss_prose": 3.4528733491897583, "train/loss_math": 2.322459578514099, "train/loss_code": 1.525909185409546} +{"step": 1425, "train/loss": 2.294509172439575, "train/lm_loss": 2.294509172439575, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.495537923282801e-05, "perf/tokens_per_sec": 26085.714825392923, "train/loss_math": 2.462033176422119, "train/loss_code": 1.4362958669662476, "train/loss_prose": 3.1733155250549316} +{"step": 1426, "train/loss": 2.76318496465683, "train/lm_loss": 2.76318496465683, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.4944860988464276e-05, "perf/tokens_per_sec": 27101.04790044154, "train/loss_math": 2.4155553579330444, "train/loss_code": 1.729536771774292, "train/loss_prose": 4.028586308161418} +{"step": 1427, "train/loss": 2.3443633913993835, "train/lm_loss": 2.3443633913993835, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.493433302321759e-05, "perf/tokens_per_sec": 26076.845594451872, "train/loss_prose": 3.5197563966115317, "train/loss_code": 1.4778787791728973, "train/loss_math": 2.2841217517852783} +{"step": 1428, "train/loss": 2.010329157114029, "train/lm_loss": 2.010329157114029, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.492379534221916e-05, "perf/tokens_per_sec": 26347.47210183268, "train/loss_code": 1.4667964696884155, "train/loss_math": 2.4725329875946045, "train/loss_prose": 3.8035852909088135} +{"step": 1429, "train/loss": 2.3786996603012085, "train/lm_loss": 2.3786996603012085, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.491324795060491e-05, "perf/tokens_per_sec": 25500.619239309846, "train/loss_math": 2.4973228772481284, "train/loss_prose": 3.8662421703338623, "train/loss_code": 1.2683814366658528} +{"step": 1430, "train/loss": 2.336366504430771, "train/lm_loss": 2.336366504430771, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.490269085351552e-05, "perf/tokens_per_sec": 25972.257523394863, "train/loss_math": 2.2508599758148193, "train/loss_prose": 3.509998162587484, "train/loss_code": 1.2197391390800476} +{"step": 1431, "train/loss": 1.9988613724708557, "train/lm_loss": 1.9988613724708557, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.4892124056096386e-05, "perf/tokens_per_sec": 25874.465990933324, "train/loss_code": 1.6115468442440033, "train/loss_math": 2.2203332583109536, "train/loss_prose": 2.8837039470672607} +{"step": 1432, "train/loss": 2.8193111419677734, "train/lm_loss": 2.8193111419677734, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.488154756349764e-05, "perf/tokens_per_sec": 26747.96342445044, "train/loss_math": 2.5836390256881714, "train/loss_prose": 3.402825037638346, "train/loss_code": 2.0114586353302} +{"step": 1433, "train/loss": 2.306482195854187, "train/lm_loss": 2.306482195854187, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.487096138087414e-05, "perf/tokens_per_sec": 25440.727959839478, "train/loss_math": 2.300718287626902, "train/loss_prose": 3.5289418697357178, "train/loss_code": 1.1186050176620483} +{"step": 1434, "train/loss": 3.094200551509857, "train/lm_loss": 3.094200551509857, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.4860365513385456e-05, "perf/tokens_per_sec": 25488.28566087563, "train/loss_prose": 3.5664884090423583, "train/loss_math": 2.307054122289022} +{"step": 1435, "train/loss": 2.2037580013275146, "train/lm_loss": 2.2037580013275146, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.484975996619589e-05, "perf/tokens_per_sec": 26077.320578926996, "train/loss_math": 2.5198774337768555, "train/loss_prose": 3.2858142852783203, "train/loss_code": 1.8608987092971803} +{"step": 1436, "train/loss": 2.8350837230682373, "train/lm_loss": 2.8350837230682373, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.483914474447445e-05, "perf/tokens_per_sec": 27103.95532072995, "train/loss_prose": 3.5353384613990784, "train/loss_code": 1.9547370274861653, "train/loss_math": 2.6751041412353516} +{"step": 1437, "train/loss": 2.1997388005256653, "train/lm_loss": 2.1997388005256653, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.482851985339487e-05, "perf/tokens_per_sec": 25858.693033301977, "train/loss_code": 1.6683506965637207, "train/loss_prose": 4.1809821128845215, "train/loss_math": 2.2478415171305337} +{"step": 1438, "train/loss": 2.79704749584198, "train/lm_loss": 2.79704749584198, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.481788529813559e-05, "perf/tokens_per_sec": 25980.54496649573, "train/loss_prose": 4.7652366161346436, "train/loss_code": 1.927614649136861, "train/loss_math": 2.35435422261556} +{"step": 1439, "train/loss": 2.9308362007141113, "train/lm_loss": 2.9308362007141113, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.480724108387977e-05, "perf/tokens_per_sec": 27098.48304754574, "train/loss_prose": 3.607501447200775, "train/loss_code": 2.1933997869491577, "train/loss_math": 2.3149421215057373} +{"step": 1440, "train/loss": 2.3969048261642456, "train/lm_loss": 2.3969048261642456, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.479658721581527e-05, "perf/tokens_per_sec": 25961.93507653389, "train/loss_code": 1.3876610100269318, "train/loss_prose": 3.732668320337931, "train/loss_math": 2.4265904426574707} +{"step": 1441, "train/loss": 2.4156199991703033, "train/lm_loss": 2.4156199991703033, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.478592369913465e-05, "perf/tokens_per_sec": 25944.723099134364, "train/loss_math": 2.43027001619339, "train/loss_code": 1.9221099615097046, "train/loss_prose": 3.8375496864318848} +{"step": 1442, "train/loss": 2.619023561477661, "train/lm_loss": 2.619023561477661, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.4775250539035174e-05, "perf/tokens_per_sec": 25360.510497087504, "train/loss_prose": 3.8055940866470337, "train/loss_code": 1.7460709810256958, "train/loss_math": 2.462214708328247} +{"step": 1443, "train/loss": 2.808308035135269, "train/lm_loss": 2.808308035135269, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.4764567740718825e-05, "perf/tokens_per_sec": 25828.1754525227, "train/loss_code": 1.415505329767863, "train/loss_prose": 3.6439899444580077} +{"step": 1444, "train/loss": 2.245747745037079, "train/lm_loss": 2.245747745037079, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.4753875309392266e-05, "perf/tokens_per_sec": 25998.67006861401, "train/loss_code": 1.7615590333938598, "train/loss_math": 2.7483922243118286, "train/loss_prose": 3.661402702331543} +{"step": 1445, "train/loss": 2.536227583885193, "train/lm_loss": 2.536227583885193, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.474317325026684e-05, "perf/tokens_per_sec": 25787.119826963808, "train/loss_prose": 3.8070813417434692, "train/loss_math": 2.6628723740577698, "train/loss_code": 1.0120839178562164} +{"step": 1446, "train/loss": 2.6580260396003723, "train/lm_loss": 2.6580260396003723, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.473246156855863e-05, "perf/tokens_per_sec": 25466.409703991496, "train/loss_code": 1.6458899974822998, "train/loss_prose": 3.6701618432998657} +{"step": 1447, "train/loss": 2.1490163803100586, "train/lm_loss": 2.1490163803100586, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.4721740269488355e-05, "perf/tokens_per_sec": 25928.66872930445, "train/loss_prose": 3.194953441619873, "train/loss_code": 1.9185918331146241, "train/loss_math": 2.202109396457672} +{"step": 1448, "train/loss": 2.583341598510742, "train/lm_loss": 2.583341598510742, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.4711009358281456e-05, "perf/tokens_per_sec": 26328.454078872557, "train/loss_code": 1.3509070873260498, "train/loss_prose": 3.480437676111857, "train/loss_math": 2.5078686078389487} +{"step": 1449, "train/loss": 3.0865013003349304, "train/lm_loss": 3.0865013003349304, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.4700268840168045e-05, "perf/tokens_per_sec": 26549.345663367807, "train/loss_prose": 3.521091858545939, "train/loss_code": 1.7827297449111938} +{"step": 1450, "train/loss": 2.542033612728119, "train/lm_loss": 2.542033612728119, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.4689518720382937e-05, "perf/tokens_per_sec": 26808.94530496578, "train/loss_code": 2.551421125729879, "train/loss_math": 2.2816095650196075, "train/loss_prose": 3.555567502975464} +{"step": 1451, "train/loss": 2.625313013792038, "train/lm_loss": 2.625313013792038, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.4678759004165584e-05, "perf/tokens_per_sec": 25937.08500574455, "train/loss_math": 2.382651170094808, "train/loss_code": 1.6885203123092651, "train/loss_prose": 3.4925034840901694} +{"step": 1452, "train/loss": 2.508772134780884, "train/lm_loss": 2.508772134780884, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.466798969676015e-05, "perf/tokens_per_sec": 25827.981303887005, "train/loss_math": 2.283069372177124, "train/loss_prose": 3.6432987451553345, "train/loss_code": 1.978123704592387} +{"step": 1453, "train/loss": 2.214189291000366, "train/lm_loss": 2.214189291000366, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.465721080341547e-05, "perf/tokens_per_sec": 26817.98470832488, "train/loss_code": 1.4886049032211304, "train/loss_math": 2.19893279671669, "train/loss_prose": 4.451968193054199} +{"step": 1454, "train/loss": 1.919953167438507, "train/lm_loss": 1.919953167438507, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.464642232938505e-05, "perf/tokens_per_sec": 26406.111853329687, "train/loss_code": 1.5623925129572551, "train/loss_math": 2.464939832687378, "train/loss_prose": 3.520329475402832} +{"step": 1455, "train/loss": 2.443814992904663, "train/lm_loss": 2.443814992904663, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.4635624279927044e-05, "perf/tokens_per_sec": 26803.716951841874, "train/loss_code": 1.791043480237325, "train/loss_math": 2.5650552908579507, "train/loss_prose": 3.241111159324646} +{"step": 1456, "train/loss": 2.0097315907478333, "train/lm_loss": 2.0097315907478333, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.4624816660304314e-05, "perf/tokens_per_sec": 26727.198762885317, "train/loss_code": 1.463066965341568, "train/loss_math": 2.309883236885071, "train/loss_prose": 3.2959346771240234} +{"step": 1457, "train/loss": 2.3844622373580933, "train/lm_loss": 2.3844622373580933, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.4613999475784336e-05, "perf/tokens_per_sec": 26887.40568051634, "train/loss_prose": 3.431631565093994, "train/loss_code": 1.5097901423772175, "train/loss_math": 2.561021566390991} +{"step": 1458, "train/loss": 2.1888467371463776, "train/lm_loss": 2.1888467371463776, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.460317273163929e-05, "perf/tokens_per_sec": 26515.785631051178, "train/loss_code": 1.4984952211380005, "train/loss_math": 2.4189637502034507} +{"step": 1459, "train/loss": 2.1719639599323273, "train/lm_loss": 2.1719639599323273, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.4592336433146e-05, "perf/tokens_per_sec": 27210.504766626174, "train/loss_code": 1.7657569646835327, "train/loss_prose": 3.5781593322753906, "train/loss_math": 2.484383702278137} +{"step": 1460, "train/loss": 1.9032142758369446, "train/lm_loss": 1.9032142758369446, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.458149058558594e-05, "perf/tokens_per_sec": 27241.74326919265, "train/loss_code": 1.2196269929409027, "train/loss_math": 2.2475308577219644, "train/loss_prose": 3.6046130657196045} +{"step": 1461, "train/loss": 2.7989489436149597, "train/lm_loss": 2.7989489436149597, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.457063519424525e-05, "perf/tokens_per_sec": 27207.876065438766, "train/loss_math": 2.1008682250976562, "train/loss_code": 1.822783629099528, "train/loss_prose": 3.705593228340149} +{"step": 1462, "train/loss": 2.4721820950508118, "train/lm_loss": 2.4721820950508118, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.45597702644147e-05, "perf/tokens_per_sec": 27049.246668020714, "train/loss_code": 1.3376838564872742, "train/loss_prose": 3.85160756111145, "train/loss_math": 2.349718391895294} +{"step": 1463, "train/loss": 2.4802942276000977, "train/lm_loss": 2.4802942276000977, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.454889580138975e-05, "perf/tokens_per_sec": 27203.4816668039, "train/loss_math": 2.6638779640197754, "train/loss_code": 1.9212814331054688, "train/loss_prose": 3.78603458404541} +{"step": 1464, "train/loss": 2.2022073566913605, "train/lm_loss": 2.2022073566913605, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.453801181047047e-05, "perf/tokens_per_sec": 27101.646435980154, "train/loss_math": 2.2640939950942993, "train/loss_code": 1.3279890716075897, "train/loss_prose": 3.8887572288513184} +{"step": 1465, "train/loss": 3.1589707732200623, "train/lm_loss": 3.1589707732200623, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.452711829696158e-05, "perf/tokens_per_sec": 23198.640457494716, "train/loss_prose": 4.021084308624268, "train/loss_math": 2.288360834121704, "train/loss_code": 1.438992977142334} +{"step": 1466, "train/loss": 2.472085177898407, "train/lm_loss": 2.472085177898407, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.4516215266172453e-05, "perf/tokens_per_sec": 26175.49781666042, "train/loss_code": 1.7055223782857258, "train/loss_math": 2.4436307748158774, "train/loss_prose": 3.664610743522644} +{"step": 1467, "train/loss": 3.1124690175056458, "train/lm_loss": 3.1124690175056458, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.450530272341709e-05, "perf/tokens_per_sec": 26608.847251972835, "train/loss_prose": 3.566742022832235, "train/loss_code": 1.7496497631072998} +{"step": 1468, "train/loss": 2.825546234846115, "train/lm_loss": 2.825546234846115, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.449438067401413e-05, "perf/tokens_per_sec": 26940.024562966122, "train/loss_code": 1.5992679595947266, "train/loss_math": 2.240232547124227, "train/loss_prose": 3.5711010098457336} +{"step": 1469, "train/loss": 2.24028542637825, "train/lm_loss": 2.24028542637825, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.448344912328686e-05, "perf/tokens_per_sec": 26984.8789032558, "train/loss_prose": 3.8734090328216553, "train/loss_code": 1.6390237510204315, "train/loss_math": 2.49759308497111} +{"step": 1470, "train/loss": 2.5572198927402496, "train/lm_loss": 2.5572198927402496, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.4472508076563166e-05, "perf/tokens_per_sec": 26682.201661826144, "train/loss_prose": 3.4398316740989685, "train/loss_math": 2.3196340799331665, "train/loss_code": 1.0295827686786652} +{"step": 1471, "train/loss": 2.5942731499671936, "train/lm_loss": 2.5942731499671936, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.4461557539175594e-05, "perf/tokens_per_sec": 26991.577533032727, "train/loss_prose": 3.2466349601745605, "train/loss_code": 1.8384156823158264, "train/loss_math": 2.4458159605662027} +{"step": 1472, "train/loss": 2.7582591772079468, "train/lm_loss": 2.7582591772079468, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.4450597516461287e-05, "perf/tokens_per_sec": 27017.51305515672, "train/loss_prose": 3.8473438024520874, "train/loss_code": 1.3831377228101094, "train/loss_math": 2.5272843837738037} +{"step": 1473, "train/loss": 2.808694511651993, "train/lm_loss": 2.808694511651993, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.443962801376205e-05, "perf/tokens_per_sec": 27092.28665686837, "train/loss_math": 2.2667667071024575, "train/loss_prose": 3.656100630760193, "train/loss_code": 1.0448538064956665} +{"step": 1474, "train/loss": 2.4364627301692963, "train/lm_loss": 2.4364627301692963, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.442864903642428e-05, "perf/tokens_per_sec": 25970.333707724192, "train/loss_prose": 3.3371325731277466, "train/loss_math": 2.5659099817276, "train/loss_code": 1.276897817850113} +{"step": 1475, "train/loss": 2.429415702819824, "train/lm_loss": 2.429415702819824, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.441766058979898e-05, "perf/tokens_per_sec": 27036.17837089261, "train/loss_math": 2.3148890336354575, "train/loss_code": 2.2739221155643463, "train/loss_prose": 3.3949697017669678} +{"step": 1476, "train/loss": 2.179183751344681, "train/lm_loss": 2.179183751344681, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.44066626792418e-05, "perf/tokens_per_sec": 27126.423333322808, "train/loss_prose": 3.0535378456115723, "train/loss_math": 2.007107436656952, "train/loss_code": 1.6489824056625366} +{"step": 1477, "train/loss": 2.9674354195594788, "train/lm_loss": 2.9674354195594788, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.439565531011299e-05, "perf/tokens_per_sec": 27099.124215255124, "train/loss_code": 2.0185214281082153, "train/loss_math": 2.5375149250030518, "train/loss_prose": 3.4329851627349854} +{"step": 1478, "train/loss": 2.5613134503364563, "train/lm_loss": 2.5613134503364563, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.43846384877774e-05, "perf/tokens_per_sec": 27274.958736388216, "train/loss_prose": 3.9559531211853027, "train/loss_math": 2.2266483306884766, "train/loss_code": 1.3897838195164998} +{"step": 1479, "train/loss": 2.5891440510749817, "train/lm_loss": 2.5891440510749817, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.4373612217604496e-05, "perf/tokens_per_sec": 27165.11262064691, "train/loss_math": 2.432338237762451, "train/loss_code": 1.7708156108856201, "train/loss_prose": 3.291502078374227} +{"step": 1480, "train/loss": 3.3581066131591797, "train/lm_loss": 3.3581066131591797, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.436257650496834e-05, "perf/tokens_per_sec": 24982.505109964575, "train/loss_math": 2.648424983024597, "train/loss_prose": 3.594667077064514} +{"step": 1481, "train/loss": 2.2949923872947693, "train/lm_loss": 2.2949923872947693, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.435153135524763e-05, "perf/tokens_per_sec": 27122.397751886583, "train/loss_code": 2.155044937133789, "train/loss_math": 2.1868804693222046, "train/loss_prose": 3.210953712463379} +{"step": 1482, "train/loss": 3.0408583283424377, "train/lm_loss": 3.0408583283424377, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.4340476773825625e-05, "perf/tokens_per_sec": 27183.208282239168, "train/loss_prose": 3.784566593170166, "train/loss_code": 1.5927294492721558, "train/loss_math": 2.2185750007629395} +{"step": 1483, "train/loss": 2.046331614255905, "train/lm_loss": 2.046331614255905, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.432941276609018e-05, "perf/tokens_per_sec": 27260.89283118746, "train/loss_code": 1.2639621138572692, "train/loss_prose": 3.8532108068466187, "train/loss_math": 2.3444206714630127} +{"step": 1484, "train/loss": 2.8547146916389465, "train/lm_loss": 2.8547146916389465, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.431833933743378e-05, "perf/tokens_per_sec": 27161.891455954872, "train/loss_code": 1.588887890179952, "train/loss_prose": 3.6142107009887696} +{"step": 1485, "train/loss": 3.29928857088089, "train/lm_loss": 3.29928857088089, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.4307256493253457e-05, "perf/tokens_per_sec": 26532.821387037737, "train/loss_math": 2.422833522160848, "train/loss_prose": 3.8251617431640623} +{"step": 1486, "train/loss": 2.859391510486603, "train/lm_loss": 2.859391510486603, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.4296164238950874e-05, "perf/tokens_per_sec": 27026.693678883308, "train/loss_math": 2.3946654319763185, "train/loss_prose": 3.633934815724691} +{"step": 1487, "train/loss": 3.069841682910919, "train/lm_loss": 3.069841682910919, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.428506257993226e-05, "perf/tokens_per_sec": 27149.615088853243, "train/loss_prose": 3.8093405246734617, "train/loss_code": 1.8373433748881023} +{"step": 1488, "train/loss": 2.235652416944504, "train/lm_loss": 2.235652416944504, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.427395152160841e-05, "perf/tokens_per_sec": 27135.120954380112, "train/loss_prose": 3.4284714460372925, "train/loss_code": 1.7171640634536742, "train/loss_math": 2.442456007003784} +{"step": 1489, "train/loss": 2.6376965641975403, "train/lm_loss": 2.6376965641975403, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.426283106939474e-05, "perf/tokens_per_sec": 27170.697782993353, "train/loss_prose": 3.506600022315979, "train/loss_code": 1.577224353949229, "train/loss_math": 2.3435003757476807} +{"step": 1490, "train/loss": 2.0761553049087524, "train/lm_loss": 2.0761553049087524, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.42517012287112e-05, "perf/tokens_per_sec": 26697.2529964709, "train/loss_code": 1.7587636709213257, "train/loss_math": 2.155697822570801, "train/loss_prose": 3.107093572616577} +{"step": 1491, "train/loss": 2.646951586008072, "train/lm_loss": 2.646951586008072, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.4240562004982364e-05, "perf/tokens_per_sec": 26726.49191743038, "train/loss_code": 1.4603891968727112, "train/loss_math": 2.269170125325521, "train/loss_prose": 3.815774997075399} +{"step": 1492, "train/loss": 2.019335627555847, "train/lm_loss": 2.019335627555847, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.4229413403637345e-05, "perf/tokens_per_sec": 27076.530685129703, "train/loss_math": 2.405324618021647, "train/loss_code": 1.3606411516666412, "train/loss_prose": 3.4961464405059814} +{"step": 1493, "train/loss": 3.305196225643158, "train/lm_loss": 3.305196225643158, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.421825543010983e-05, "perf/tokens_per_sec": 26914.617010543465, "train/loss_prose": 3.6803166468938193, "train/loss_math": 2.179835319519043} +{"step": 1494, "train/loss": 2.4057729244232178, "train/lm_loss": 2.4057729244232178, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.420708808983809e-05, "perf/tokens_per_sec": 27150.001238985045, "train/loss_math": 2.308285176753998, "train/loss_code": 1.7437299489974976, "train/loss_prose": 3.2627915143966675} +{"step": 1495, "train/loss": 2.2096338272094727, "train/lm_loss": 2.2096338272094727, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.4195911388264946e-05, "perf/tokens_per_sec": 26637.89274367733, "train/loss_math": 2.265641713142395, "train/loss_code": 1.2401849627494812, "train/loss_prose": 3.8684914112091064} +{"step": 1496, "train/loss": 2.602072387933731, "train/lm_loss": 2.602072387933731, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.418472533083777e-05, "perf/tokens_per_sec": 27075.93325841953, "train/loss_prose": 3.843862533569336, "train/loss_math": 2.4892993768056235, "train/loss_code": 1.886985182762146} +{"step": 1497, "train/loss": 2.8399264216423035, "train/lm_loss": 2.8399264216423035, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.417352992300854e-05, "perf/tokens_per_sec": 26854.161385672774, "train/loss_math": 2.354853709538778, "train/loss_prose": 3.6065216660499573, "train/loss_code": 1.2287647724151611} +{"step": 1498, "train/loss": 2.865520656108856, "train/lm_loss": 2.865520656108856, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.4162325170233745e-05, "perf/tokens_per_sec": 26845.936566225428, "train/loss_prose": 3.595276117324829, "train/loss_code": 1.670267403125763, "train/loss_math": 2.6012628078460693} +{"step": 1499, "train/loss": 2.5534690022468567, "train/lm_loss": 2.5534690022468567, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.415111107797445e-05, "perf/tokens_per_sec": 26687.09252010091, "train/loss_code": 1.2952587803204854, "train/loss_math": 2.2310492396354675, "train/loss_prose": 4.026625792185466} +{"step": 1500, "train/loss": 2.3052913546562195, "train/lm_loss": 2.3052913546562195, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.4139887651696265e-05, "perf/tokens_per_sec": 27152.361284041464, "train/loss_code": 1.6144161820411682, "train/loss_prose": 3.51808762550354, "train/loss_math": 2.1876354217529297} +{"step": 1501, "train/loss": 2.3948265612125397, "train/lm_loss": 2.3948265612125397, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.412865489686936e-05, "perf/tokens_per_sec": 26342.01826782278, "train/loss_code": 1.2827494442462921, "train/loss_prose": 3.506903648376465} +{"step": 1502, "train/loss": 2.968285620212555, "train/lm_loss": 2.968285620212555, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.4117412818968426e-05, "perf/tokens_per_sec": 24180.004734700542, "train/loss_prose": 3.4023983478546143, "train/loss_code": 2.1934860944747925, "train/loss_math": 2.347320795059204} +{"step": 1503, "train/loss": 1.8285655975341797, "train/lm_loss": 1.8285655975341797, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.410616142347273e-05, "perf/tokens_per_sec": 27021.33755695679, "train/loss_code": 1.5308987895647685, "train/loss_prose": 2.9845714569091797, "train/loss_math": 2.4585609436035156} +{"step": 1504, "train/loss": 1.943942278623581, "train/lm_loss": 1.943942278623581, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.4094900715866064e-05, "perf/tokens_per_sec": 26510.18008542577, "train/loss_code": 1.2623157739639281, "train/loss_prose": 3.4864394664764404, "train/loss_math": 2.267080307006836} +{"step": 1505, "train/loss": 2.735874056816101, "train/lm_loss": 2.735874056816101, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.408363070163675e-05, "perf/tokens_per_sec": 26935.336175309923, "train/loss_code": 1.9914931058883667, "train/loss_math": 2.27262814839681, "train/loss_prose": 3.6953741709391275} +{"step": 1506, "train/loss": 2.868570923805237, "train/lm_loss": 2.868570923805237, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.4072351386277654e-05, "perf/tokens_per_sec": 27154.335455012446, "train/loss_code": 2.379742980003357, "train/loss_prose": 3.879856506983439, "train/loss_math": 2.0848841667175293} +{"step": 1507, "train/loss": 3.4524391889572144, "train/lm_loss": 3.4524391889572144, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.40610627752862e-05, "perf/tokens_per_sec": 26324.097127158002, "train/loss_prose": 3.7786521514256797, "train/loss_code": 2.7601609230041504, "train/loss_math": 2.1874399185180664} +{"step": 1508, "train/loss": 2.38177490234375, "train/lm_loss": 2.38177490234375, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.40497648741643e-05, "perf/tokens_per_sec": 26398.32141046168, "train/loss_prose": 3.517097234725952, "train/loss_code": 1.7110880017280579, "train/loss_math": 2.5878262519836426} +{"step": 1509, "train/loss": 2.607108026742935, "train/lm_loss": 2.607108026742935, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.403845768841842e-05, "perf/tokens_per_sec": 27210.504766626174, "train/loss_code": 1.82969468832016, "train/loss_prose": 3.7697067260742188, "train/loss_math": 2.414515256881714} +{"step": 1510, "train/loss": 3.037646174430847, "train/lm_loss": 3.037646174430847, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.402714122355955e-05, "perf/tokens_per_sec": 26899.488129218735, "train/loss_code": 2.1330136954784393, "train/loss_prose": 3.942278802394867} +{"step": 1511, "train/loss": 2.6325452625751495, "train/lm_loss": 2.6325452625751495, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.401581548510318e-05, "perf/tokens_per_sec": 27192.15755743556, "train/loss_math": 2.283212035894394, "train/loss_code": 2.457541584968567, "train/loss_prose": 3.5062156915664673} +{"step": 1512, "train/loss": 2.324930429458618, "train/lm_loss": 2.324930429458618, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.4004480478569353e-05, "perf/tokens_per_sec": 27000.443489590292, "train/loss_code": 1.560998837153117, "train/loss_math": 2.5442253748575845, "train/loss_prose": 3.141885280609131} +{"step": 1513, "train/loss": 2.5005082488059998, "train/lm_loss": 2.5005082488059998, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.399313620948262e-05, "perf/tokens_per_sec": 26960.571329928975, "train/loss_code": 1.2148312330245972, "train/loss_prose": 3.3486626744270325, "train/loss_math": 2.0898767709732056} +{"step": 1514, "train/loss": 2.484495222568512, "train/lm_loss": 2.484495222568512, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.3981782683372016e-05, "perf/tokens_per_sec": 27197.53856285719, "train/loss_math": 2.3728466033935547, "train/loss_code": 1.4956727822621663, "train/loss_prose": 4.135201573371887} +{"step": 1515, "train/loss": 2.0440275073051453, "train/lm_loss": 2.0440275073051453, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.3970419905771145e-05, "perf/tokens_per_sec": 26845.517067659657, "train/loss_code": 1.446331548690796, "train/loss_prose": 3.5994019508361816, "train/loss_math": 1.921758770942688} +{"step": 1516, "train/loss": 2.437093496322632, "train/lm_loss": 2.437093496322632, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.395904788221805e-05, "perf/tokens_per_sec": 26412.404368073032, "train/loss_math": 2.444836378097534, "train/loss_code": 1.8846958875656128, "train/loss_prose": 3.5341458320617676} +{"step": 1517, "train/loss": 2.6548094749450684, "train/lm_loss": 2.6548094749450684, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.394766661825533e-05, "perf/tokens_per_sec": 26936.729849824234, "train/loss_prose": 3.4312081933021545, "train/loss_code": 1.6172737677892048, "train/loss_math": 2.66182279586792} +{"step": 1518, "train/loss": 2.8196056485176086, "train/lm_loss": 2.8196056485176086, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.3936276119430096e-05, "perf/tokens_per_sec": 27147.984798276928, "train/loss_math": 2.321524143218994, "train/loss_prose": 3.459855794906616, "train/loss_code": 1.7528504133224487} +{"step": 1519, "train/loss": 2.384540170431137, "train/lm_loss": 2.384540170431137, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.3924876391293915e-05, "perf/tokens_per_sec": 26271.082297315068, "train/loss_code": 1.7007507880528767, "train/loss_math": 2.2196404933929443, "train/loss_prose": 3.6575732231140137} +{"step": 1520, "train/loss": 2.3425740003585815, "train/lm_loss": 2.3425740003585815, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.391346743940288e-05, "perf/tokens_per_sec": 26810.66064904765, "train/loss_code": 1.7251171271006267, "train/loss_prose": 3.7833409309387207, "train/loss_math": 1.9995193481445312} +{"step": 1521, "train/loss": 2.8575921654701233, "train/lm_loss": 2.8575921654701233, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.390204926931758e-05, "perf/tokens_per_sec": 26805.975350173274, "train/loss_prose": 3.710914194583893, "train/loss_code": 1.7139975428581238, "train/loss_math": 2.294542908668518} +{"step": 1522, "train/loss": 2.8816896080970764, "train/lm_loss": 2.8816896080970764, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.389062188660309e-05, "perf/tokens_per_sec": 26095.462095217263, "train/loss_math": 2.4697184562683105, "train/loss_prose": 3.8250667254130044, "train/loss_code": 2.212960163752238} +{"step": 1523, "train/loss": 2.5737545490264893, "train/lm_loss": 2.5737545490264893, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.387918529682898e-05, "perf/tokens_per_sec": 26268.190147671776, "train/loss_prose": 3.2445878187815347, "train/loss_code": 2.057467500368754, "train/loss_math": 2.341935157775879} +{"step": 1524, "train/loss": 2.7505224347114563, "train/lm_loss": 2.7505224347114563, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.386773950556931e-05, "perf/tokens_per_sec": 25930.42982354193, "train/loss_prose": 3.862185001373291, "train/loss_math": 2.345626425743103, "train/loss_code": 2.5516774654388428} +{"step": 1525, "train/loss": 2.8624835908412933, "train/lm_loss": 2.8624835908412933, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.3856284518402594e-05, "perf/tokens_per_sec": 25890.37525355581, "train/loss_math": 2.471116224924723, "train/loss_prose": 4.054716269175212, "train/loss_code": 1.6611857414245605} +{"step": 1526, "train/loss": 2.890972137451172, "train/lm_loss": 2.890972137451172, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.384482034091189e-05, "perf/tokens_per_sec": 26213.12006249695, "train/loss_code": 1.8939971923828125, "train/loss_prose": 3.676061987876892, "train/loss_math": 2.3177675008773804} +{"step": 1527, "train/loss": 2.967186838388443, "train/lm_loss": 2.967186838388443, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.3833346978684675e-05, "perf/tokens_per_sec": 25773.85027049247, "train/loss_prose": 3.4313011169433594, "train/loss_code": 1.6347825527191162, "train/loss_math": 2.4731035232543945} +{"step": 1528, "train/loss": 2.561418056488037, "train/lm_loss": 2.561418056488037, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.382186443731293e-05, "perf/tokens_per_sec": 26826.108629390896, "train/loss_code": 1.837132140994072, "train/loss_prose": 3.691246191660563, "train/loss_math": 2.0690770149230957} +{"step": 1529, "train/loss": 2.616615355014801, "train/lm_loss": 2.616615355014801, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.381037272239311e-05, "perf/tokens_per_sec": 26834.195559984568, "train/loss_math": 2.122098982334137, "train/loss_prose": 3.519461989402771, "train/loss_code": 1.30543851852417} +{"step": 1530, "train/loss": 2.2632384300231934, "train/lm_loss": 2.2632384300231934, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.379887183952614e-05, "perf/tokens_per_sec": 26185.312311763057, "train/loss_math": 2.4166651725769044, "train/loss_prose": 3.2266438007354736, "train/loss_code": 1.3979696035385132} +{"step": 1531, "train/loss": 2.46035897731781, "train/lm_loss": 2.46035897731781, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.3787361794317405e-05, "perf/tokens_per_sec": 26703.72655673186, "train/loss_math": 2.5271363258361816, "train/loss_code": 1.3635522425174713, "train/loss_prose": 3.9005086421966553} +{"step": 1532, "train/loss": 2.5608886182308197, "train/lm_loss": 2.5608886182308197, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.377584259237676e-05, "perf/tokens_per_sec": 25963.50450133219, "train/loss_prose": 3.707561492919922, "train/loss_code": 1.1885685324668884, "train/loss_math": 2.0911571979522705} +{"step": 1533, "train/loss": 2.721530854701996, "train/lm_loss": 2.721530854701996, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.376431423931853e-05, "perf/tokens_per_sec": 26860.669276663873, "train/loss_prose": 3.7326388359069824, "train/loss_math": 2.4862140417099, "train/loss_code": 1.8673007885615032} +{"step": 1534, "train/loss": 2.2336732149124146, "train/lm_loss": 2.2336732149124146, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.375277674076149e-05, "perf/tokens_per_sec": 26235.937680774317, "train/loss_math": 2.3052493731180825, "train/loss_prose": 3.651088237762451, "train/loss_code": 0.3868009150028229} +{"step": 1535, "train/loss": 2.3143686056137085, "train/lm_loss": 2.3143686056137085, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.374123010232888e-05, "perf/tokens_per_sec": 26181.720789176106, "train/loss_prose": 3.6283962726593018, "train/loss_math": 2.4816466172536216, "train/loss_code": 1.2710721095403035} +{"step": 1536, "train/loss": 2.081213414669037, "train/lm_loss": 2.081213414669037, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.372967432964838e-05, "perf/tokens_per_sec": 26093.044410152976, "train/loss_prose": 4.100649356842041, "train/loss_code": 1.6008391618728637, "train/loss_math": 2.2724310159683228} +{"step": 1537, "train/loss": 2.710068464279175, "train/lm_loss": 2.710068464279175, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.371810942835215e-05, "perf/tokens_per_sec": 25922.800142440687, "train/loss_prose": 3.538321113586426, "train/loss_code": 1.0641263723373413, "train/loss_math": 1.8606880903244019} +{"step": 1538, "train/loss": 2.1137249767780304, "train/lm_loss": 2.1137249767780304, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.3706535404076784e-05, "perf/tokens_per_sec": 25834.19550830445, "train/loss_math": 2.9330339431762695, "train/loss_code": 1.5569120645523071, "train/loss_prose": 3.096102714538574} +{"step": 1539, "train/loss": 2.6510919332504272, "train/lm_loss": 2.6510919332504272, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.36949522624633e-05, "perf/tokens_per_sec": 26653.348967601505, "train/loss_math": 2.2087999284267426, "train/loss_prose": 3.617051442464193, "train/loss_code": 1.5223809480667114} +{"step": 1540, "train/loss": 2.576692968606949, "train/lm_loss": 2.576692968606949, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.368336000915719e-05, "perf/tokens_per_sec": 25832.486304016697, "train/loss_prose": 3.8542980353037515, "train/loss_code": 1.5320353905359905, "train/loss_math": 2.2272716760635376} +{"step": 1541, "train/loss": 2.998282939195633, "train/lm_loss": 2.998282939195633, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.367175864980839e-05, "perf/tokens_per_sec": 26082.34855877986, "train/loss_code": 1.6507356961568196, "train/loss_prose": 3.8068111419677733} +{"step": 1542, "train/loss": 2.628419578075409, "train/lm_loss": 2.628419578075409, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.366014819007124e-05, "perf/tokens_per_sec": 26157.28471309747, "train/loss_code": 1.9374683141708373, "train/loss_prose": 4.452320098876953, "train/loss_math": 2.435375452041626} +{"step": 1543, "train/loss": 2.1033808887004852, "train/lm_loss": 2.1033808887004852, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.3648528635604556e-05, "perf/tokens_per_sec": 26099.54543300053, "train/loss_prose": 3.048650026321411, "train/loss_code": 1.7275126695632934, "train/loss_math": 2.5704163312911987} +{"step": 1544, "train/loss": 2.4633870720863342, "train/lm_loss": 2.4633870720863342, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.363689999207156e-05, "perf/tokens_per_sec": 25885.030991458476, "train/loss_code": 1.5767864286899567, "train/loss_prose": 4.047687888145447, "train/loss_math": 2.6522876024246216} +{"step": 1545, "train/loss": 2.9995137453079224, "train/lm_loss": 2.9995137453079224, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.3625262265139906e-05, "perf/tokens_per_sec": 26033.69431845906, "train/loss_prose": 3.5990793108940125, "train/loss_math": 2.3999478816986084} +{"step": 1546, "train/loss": 2.270264148712158, "train/lm_loss": 2.270264148712158, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.361361546048169e-05, "perf/tokens_per_sec": 26341.53359184418, "train/loss_math": 2.3422521114349366, "train/loss_code": 1.5652359127998352, "train/loss_prose": 3.320380449295044} +{"step": 1547, "train/loss": 2.7032755613327026, "train/lm_loss": 2.7032755613327026, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.3601959583773415e-05, "perf/tokens_per_sec": 27028.90476093082, "train/loss_math": 2.4686904430389403, "train/loss_code": 1.679862380027771, "train/loss_prose": 3.8014447689056396} +{"step": 1548, "train/loss": 1.9596625864505768, "train/lm_loss": 1.9596625864505768, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.3590294640696025e-05, "perf/tokens_per_sec": 26226.005135329047, "train/loss_prose": 3.661306142807007, "train/loss_math": 2.2283066511154175, "train/loss_code": 1.5118762969970703} +{"step": 1549, "train/loss": 2.8387382328510284, "train/lm_loss": 2.8387382328510284, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.357862063693486e-05, "perf/tokens_per_sec": 26344.80550086181, "train/loss_prose": 3.716977536678314, "train/loss_math": 2.341097831726074, "train/loss_code": 1.5798998475074768} +{"step": 1550, "train/loss": 2.490076571702957, "train/lm_loss": 2.490076571702957, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.356693757817969e-05, "perf/tokens_per_sec": 25981.684412382983, "train/loss_math": 2.4753419160842896, "train/loss_prose": 3.64080540339152, "train/loss_code": 1.3491708040237427} +{"step": 1551, "train/loss": 2.406635284423828, "train/lm_loss": 2.406635284423828, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.355524547012471e-05, "perf/tokens_per_sec": 25946.682309302516, "train/loss_code": 1.3273304998874664, "train/loss_math": 3.0674123764038086, "train/loss_prose": 3.6254493395487466} +{"step": 1552, "train/loss": 2.3450052440166473, "train/lm_loss": 2.3450052440166473, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.3543544318468485e-05, "perf/tokens_per_sec": 25863.01407873978, "train/loss_math": 2.4517057623182024, "train/loss_code": 1.5981014966964722} +{"step": 1553, "train/loss": 3.0177854895591736, "train/lm_loss": 3.0177854895591736, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.3531834128914025e-05, "perf/tokens_per_sec": 25847.06064025372, "train/loss_prose": 3.748687446117401, "train/loss_math": 2.5560195446014404, "train/loss_code": 1.479475736618042} +{"step": 1554, "train/loss": 2.76015567779541, "train/lm_loss": 2.76015567779541, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.352011490716875e-05, "perf/tokens_per_sec": 26154.696286552913, "train/loss_math": 2.529864013195038, "train/loss_prose": 3.459221522013346, "train/loss_code": 1.5841243267059326} +{"step": 1555, "train/loss": 2.1100158989429474, "train/lm_loss": 2.1100158989429474, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.350838665894446e-05, "perf/tokens_per_sec": 26396.90177296955, "train/loss_math": 2.4811967849731444, "train/loss_code": 1.491381009419759} +{"step": 1556, "train/loss": 3.1639583706855774, "train/lm_loss": 3.1639583706855774, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.349664938995734e-05, "perf/tokens_per_sec": 25995.365562990064, "train/loss_math": 2.2244255542755127, "train/loss_prose": 4.1470935344696045, "train/loss_code": 2.1662891705830893} +{"step": 1557, "train/loss": 2.1219389140605927, "train/lm_loss": 2.1219389140605927, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.348490310592801e-05, "perf/tokens_per_sec": 26028.132995985154, "train/loss_code": 1.3421016484498978, "train/loss_math": 2.2318941354751587, "train/loss_prose": 3.5716580152511597} +{"step": 1558, "train/loss": 2.5588854253292084, "train/lm_loss": 2.5588854253292084, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.347314781258147e-05, "perf/tokens_per_sec": 26026.674035392305, "train/loss_math": 2.4519787788391114, "train/loss_prose": 3.439214587211609, "train/loss_code": 1.3327596187591553} +{"step": 1559, "train/loss": 2.8954216837882996, "train/lm_loss": 2.8954216837882996, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.3461383515647106e-05, "perf/tokens_per_sec": 26317.80629763245, "train/loss_prose": 3.560892184575399, "train/loss_code": 2.526244878768921, "train/loss_math": 2.45098078250885} +{"step": 1560, "train/loss": 1.8977736830711365, "train/lm_loss": 1.8977736830711365, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.344961022085867e-05, "perf/tokens_per_sec": 25932.77832136059, "train/loss_prose": 3.592466354370117, "train/loss_code": 1.332876205444336} +{"step": 1561, "train/loss": 2.637417495250702, "train/lm_loss": 2.637417495250702, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.343782793395435e-05, "perf/tokens_per_sec": 25886.66914384991, "train/loss_prose": 3.7267795403798423, "train/loss_math": 2.207229495048523, "train/loss_code": 1.8348474105199177} +{"step": 1562, "train/loss": 2.513170838356018, "train/lm_loss": 2.513170838356018, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.3426036660676686e-05, "perf/tokens_per_sec": 26142.398307883104, "train/loss_prose": 3.4699074029922485, "train/loss_math": 2.3121543884277345, "train/loss_code": 1.6047791242599487} +{"step": 1563, "train/loss": 1.9041962325572968, "train/lm_loss": 1.9041962325572968, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.3414236406772584e-05, "perf/tokens_per_sec": 26936.476443730873, "train/loss_code": 1.4626548886299133, "train/loss_math": 2.298130750656128, "train/loss_prose": 3.324033737182617} +{"step": 1564, "train/loss": 2.4189442694187164, "train/lm_loss": 2.4189442694187164, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.3402427177993366e-05, "perf/tokens_per_sec": 25761.405557788163, "train/loss_math": 2.4509037494659425, "train/loss_prose": 3.255042791366577, "train/loss_code": 1.9209964275360107} +{"step": 1565, "train/loss": 2.649228513240814, "train/lm_loss": 2.649228513240814, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.339060898009469e-05, "perf/tokens_per_sec": 26214.000006103422, "train/loss_prose": 3.3282373547554016, "train/loss_math": 2.2854623794555664, "train/loss_code": 1.0244909524917603} +{"step": 1566, "train/loss": 2.99159699678421, "train/lm_loss": 2.99159699678421, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.337878181883661e-05, "perf/tokens_per_sec": 25921.196524300176, "train/loss_prose": 3.7685575008392336, "train/loss_code": 1.3169155716896057, "train/loss_math": 2.456157684326172} +{"step": 1567, "train/loss": 2.5741822123527527, "train/lm_loss": 2.5741822123527527, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.336694569998354e-05, "perf/tokens_per_sec": 25725.722750067758, "train/loss_prose": 3.965616146723429, "train/loss_code": 1.7393216133117675} +{"step": 1568, "train/loss": 1.9143853187561035, "train/lm_loss": 1.9143853187561035, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.3355100629304254e-05, "perf/tokens_per_sec": 25467.2779808653, "train/loss_code": 1.5460428476333619, "train/loss_math": 2.528289715449015} +{"step": 1569, "train/loss": 2.389588952064514, "train/lm_loss": 2.389588952064514, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.334324661257191e-05, "perf/tokens_per_sec": 25869.75156115653, "train/loss_code": 1.9401548306147258, "train/loss_math": 2.2093211809794107, "train/loss_prose": 3.334141969680786} +{"step": 1570, "train/loss": 2.5173842012882233, "train/lm_loss": 2.5173842012882233, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.3331383655564006e-05, "perf/tokens_per_sec": 26366.35575820732, "train/loss_math": 2.4193804264068604, "train/loss_prose": 3.392402410507202, "train/loss_code": 1.2573658227920532} +{"step": 1571, "train/loss": 2.690000295639038, "train/lm_loss": 2.690000295639038, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.331951176406239e-05, "perf/tokens_per_sec": 26296.335297665148, "train/loss_prose": 3.446729302406311, "train/loss_code": 1.4913206100463867, "train/loss_math": 2.375222086906433} +{"step": 1572, "train/loss": 2.675257086753845, "train/lm_loss": 2.675257086753845, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.330763094385329e-05, "perf/tokens_per_sec": 26048.615128022793, "train/loss_code": 1.6885632872581482, "train/loss_prose": 3.4397024512290955, "train/loss_math": 2.1330599784851074} +{"step": 1573, "train/loss": 2.4712078273296356, "train/lm_loss": 2.4712078273296356, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.329574120072728e-05, "perf/tokens_per_sec": 25954.718165570608, "train/loss_math": 2.5437777042388916, "train/loss_code": 1.2368873953819275, "train/loss_prose": 3.378805637359619} +{"step": 1574, "train/loss": 2.959171950817108, "train/lm_loss": 2.959171950817108, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.3283842540479264e-05, "perf/tokens_per_sec": 26098.435272957075, "train/loss_prose": 3.871201276779175, "train/loss_math": 2.359277327855428, "train/loss_code": 2.4909698963165283} +{"step": 1575, "train/loss": 2.614157199859619, "train/lm_loss": 2.614157199859619, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.3271934968908514e-05, "perf/tokens_per_sec": 25929.334003456264, "train/loss_prose": 3.579547882080078, "train/loss_math": 2.299903313318888, "train/loss_code": 1.637451410293579} +{"step": 1576, "train/loss": 2.561888247728348, "train/lm_loss": 2.561888247728348, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.326001849181862e-05, "perf/tokens_per_sec": 26323.814781899866, "train/loss_math": 2.474421501159668, "train/loss_code": 1.4743300080299377, "train/loss_prose": 3.3743934631347656} +{"step": 1577, "train/loss": 2.21612012386322, "train/lm_loss": 2.21612012386322, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.324809311501754e-05, "perf/tokens_per_sec": 26157.722805201134, "train/loss_prose": 2.8431636095046997, "train/loss_code": 1.7077066898345947, "train/loss_math": 2.3065043290456138} +{"step": 1578, "train/loss": 2.8218705654144287, "train/lm_loss": 2.8218705654144287, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.323615884431756e-05, "perf/tokens_per_sec": 25381.642320000235, "train/loss_math": 2.299196243286133, "train/loss_code": 1.534911572933197, "train/loss_prose": 3.726686954498291} +{"step": 1579, "train/loss": 2.849599540233612, "train/lm_loss": 2.849599540233612, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.3224215685535294e-05, "perf/tokens_per_sec": 27162.406790969617, "train/loss_code": 1.9302138090133667, "train/loss_math": 2.430946171283722, "train/loss_prose": 3.7142661412556968} +{"step": 1580, "train/loss": 2.04943785071373, "train/lm_loss": 2.04943785071373, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.321226364449169e-05, "perf/tokens_per_sec": 23882.290804552962, "train/loss_math": 2.475047747294108, "train/loss_code": 1.561584711074829, "train/loss_prose": 2.724020004272461} +{"step": 1581, "train/loss": 2.8585237562656403, "train/lm_loss": 2.8585237562656403, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.320030272701203e-05, "perf/tokens_per_sec": 25083.14026955091, "train/loss_prose": 3.6902856826782227, "train/loss_code": 1.899642785390218, "train/loss_math": 2.4081180095672607} +{"step": 1582, "train/loss": 3.032815933227539, "train/lm_loss": 3.032815933227539, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.318833293892592e-05, "perf/tokens_per_sec": 25433.5719547581, "train/loss_prose": 3.395333194732666, "train/loss_math": 2.4286204179128013} +{"step": 1583, "train/loss": 1.9960315227508545, "train/lm_loss": 1.9960315227508545, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.31763542860673e-05, "perf/tokens_per_sec": 24799.91625128656, "train/loss_math": 2.3429996490478517, "train/loss_code": 1.41775115331014} +{"step": 1584, "train/loss": 2.3768419325351715, "train/lm_loss": 2.3768419325351715, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.31643667742744e-05, "perf/tokens_per_sec": 24884.079838237765, "train/loss_math": 2.2990947564442954, "train/loss_prose": 3.2052172422409058, "train/loss_code": 1.902338703473409} +{"step": 1585, "train/loss": 2.5261858701705933, "train/lm_loss": 2.5261858701705933, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.3152370409389795e-05, "perf/tokens_per_sec": 24818.044190323202, "train/loss_code": 1.7091410160064697, "train/loss_math": 2.4012701511383057, "train/loss_prose": 3.939126968383789} +{"step": 1586, "train/loss": 2.3097869157791138, "train/lm_loss": 2.3097869157791138, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.314036519726038e-05, "perf/tokens_per_sec": 25149.197040628413, "train/loss_code": 1.5358407497406006, "train/loss_math": 2.3448980331420897, "train/loss_prose": 3.682122230529785} +{"step": 1587, "train/loss": 2.8125943541526794, "train/lm_loss": 2.8125943541526794, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.3128351143737335e-05, "perf/tokens_per_sec": 24742.626027770137, "train/loss_math": 2.4129749139149985, "train/loss_code": 1.605531394481659, "train/loss_prose": 4.01692263285319} +{"step": 1588, "train/loss": 2.274529755115509, "train/lm_loss": 2.274529755115509, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.311632825467617e-05, "perf/tokens_per_sec": 25439.82382121502, "train/loss_code": 2.099410812060038, "train/loss_prose": 2.959421396255493, "train/loss_math": 2.2346460819244385} +{"step": 1589, "train/loss": 2.139141619205475, "train/lm_loss": 2.139141619205475, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.3104296535936695e-05, "perf/tokens_per_sec": 25544.22274493944, "train/loss_math": 2.1489882866541543, "train/loss_prose": 4.178276062011719, "train/loss_code": 1.6219729483127594} +{"step": 1590, "train/loss": 2.17657533288002, "train/lm_loss": 2.17657533288002, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.309225599338301e-05, "perf/tokens_per_sec": 24758.350867990004, "train/loss_code": 1.1329758167266846, "train/loss_math": 2.372133255004883, "train/loss_prose": 3.285984992980957} +{"step": 1591, "train/loss": 2.092737227678299, "train/lm_loss": 2.092737227678299, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.3080206632883554e-05, "perf/tokens_per_sec": 24707.895489696097, "train/loss_code": 1.323101595044136, "train/loss_math": 2.314585566520691, "train/loss_prose": 3.4101600646972656} +{"step": 1592, "train/loss": 2.42109951376915, "train/lm_loss": 2.42109951376915, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.306814846031102e-05, "perf/tokens_per_sec": 24805.17993854968, "train/loss_code": 1.797087589899699, "train/loss_math": 2.5061217149098716, "train/loss_prose": 3.2295838594436646} +{"step": 1593, "train/loss": 2.233930915594101, "train/lm_loss": 2.233930915594101, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.305608148154242e-05, "perf/tokens_per_sec": 24606.54181496828, "train/loss_math": 2.450732707977295, "train/loss_code": 1.8402570962905884, "train/loss_prose": 3.7686963081359863} +{"step": 1594, "train/loss": 2.367619603872299, "train/lm_loss": 2.367619603872299, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.304400570245906e-05, "perf/tokens_per_sec": 25375.718862072023, "train/loss_code": 1.891900897026062, "train/loss_math": 2.214989423751831, "train/loss_prose": 3.6332309246063232} +{"step": 1595, "train/loss": 2.2370797097682953, "train/lm_loss": 2.2370797097682953, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.303192112894652e-05, "perf/tokens_per_sec": 24952.895870975986, "train/loss_code": 1.1645554701487224, "train/loss_prose": 3.5080891847610474, "train/loss_math": 2.462264140446981} +{"step": 1596, "train/loss": 2.5132853388786316, "train/lm_loss": 2.5132853388786316, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.301982776689467e-05, "perf/tokens_per_sec": 24880.18778167831, "train/loss_math": 2.3139588832855225, "train/loss_prose": 3.6339787244796753, "train/loss_code": 1.2685306072235107} +{"step": 1597, "train/loss": 2.823174297809601, "train/lm_loss": 2.823174297809601, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.3007725622197674e-05, "perf/tokens_per_sec": 24809.29934712633, "train/loss_math": 2.1636440753936768, "train/loss_prose": 3.5885691046714783, "train/loss_code": 1.9519148468971252} +{"step": 1598, "train/loss": 2.3170666694641113, "train/lm_loss": 2.3170666694641113, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.299561470075397e-05, "perf/tokens_per_sec": 24944.888776602154, "train/loss_math": 2.377440174420675, "train/loss_code": 1.4292049209276836, "train/loss_prose": 3.558298945426941} +{"step": 1599, "train/loss": 2.73928701877594, "train/lm_loss": 2.73928701877594, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.2983495008466276e-05, "perf/tokens_per_sec": 24943.440077937954, "train/loss_code": 2.1354719003041587, "train/loss_prose": 3.748814900716146, "train/loss_math": 2.1307179927825928} +{"step": 1600, "train/loss": 2.17126926779747, "train/lm_loss": 2.17126926779747, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.297136655124159e-05, "perf/tokens_per_sec": 25674.052397583215, "train/loss_code": 1.4331265538930893, "train/loss_math": 2.367336630821228, "train/loss_prose": 3.451487421989441} +{"step": 1600, "eval/loss": 2.2382657815369784, "eval/lm_loss": 2.2382657815369784, "eval/lb_loss": 0.0, "eval/z_loss": 0.0, "eval/perplexity": 9.377055316019078, "eval/loss_code": 1.608110904965771, "eval/ppl_code": 4.993369363083724, "eval/loss_prose": 3.4843135915304484, "eval/ppl_prose": 32.60004248297152, "eval/loss_math": 2.21001254201345, "eval/ppl_math": 9.115830723194877} +{"step": 1601, "train/loss": 2.3544577062129974, "train/lm_loss": 2.3544577062129974, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.2959229334991156e-05, "perf/tokens_per_sec": 25531.315755527616, "train/loss_code": 1.4134226441383362, "train/loss_prose": 3.4000488917032876, "train/loss_math": 2.197623610496521} +{"step": 1602, "train/loss": 2.2306171357631683, "train/lm_loss": 2.2306171357631683, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.2947083365630514e-05, "perf/tokens_per_sec": 24441.691161657836, "train/loss_code": 1.8178705275058746, "train/loss_math": 2.3614673614501953, "train/loss_prose": 3.4890530109405518} +{"step": 1603, "train/loss": 2.474234849214554, "train/lm_loss": 2.474234849214554, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.293492864907947e-05, "perf/tokens_per_sec": 26715.353645242903, "train/loss_code": 2.0212216079235077, "train/loss_prose": 3.480372190475464, "train/loss_math": 2.374123692512512} +{"step": 1604, "train/loss": 2.2804808616638184, "train/lm_loss": 2.2804808616638184, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.292276519126207e-05, "perf/tokens_per_sec": 25883.04208512241, "train/loss_code": 1.4306561748186748, "train/loss_math": 2.380624771118164, "train/loss_prose": 3.405001997947693} +{"step": 1605, "train/loss": 2.374565362930298, "train/lm_loss": 2.374565362930298, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.291059299810665e-05, "perf/tokens_per_sec": 27082.164861963458, "train/loss_prose": 3.472921053568522, "train/loss_math": 2.1971789598464966, "train/loss_code": 1.3944672743479412} +{"step": 1606, "train/loss": 2.0276105999946594, "train/lm_loss": 2.0276105999946594, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.289841207554578e-05, "perf/tokens_per_sec": 27189.747557572908, "train/loss_code": 1.2643038034439087, "train/loss_prose": 3.723623514175415, "train/loss_math": 2.4521186351776123} +{"step": 1607, "train/loss": 2.724947899580002, "train/lm_loss": 2.724947899580002, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.2886222429516296e-05, "perf/tokens_per_sec": 24249.083149017257, "train/loss_prose": 3.3187427520751953, "train/loss_math": 2.3455286026000977, "train/loss_code": 1.4880247116088867} +{"step": 1608, "train/loss": 1.992042601108551, "train/lm_loss": 1.992042601108551, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.287402406595929e-05, "perf/tokens_per_sec": 25504.556389548692, "train/loss_math": 2.279792904853821, "train/loss_code": 1.7042922675609589} +{"step": 1609, "train/loss": 2.3880305886268616, "train/lm_loss": 2.3880305886268616, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.2861816990820084e-05, "perf/tokens_per_sec": 26696.838131704368, "train/loss_math": 2.516841093699137, "train/loss_code": 1.4254429737726848, "train/loss_prose": 3.6386961936950684} +{"step": 1610, "train/loss": 2.4705777168273926, "train/lm_loss": 2.4705777168273926, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.2849601210048274e-05, "perf/tokens_per_sec": 26276.506794777084, "train/loss_code": 1.5764018595218658, "train/loss_math": 2.1558632850646973, "train/loss_prose": 3.7677164872487388} +{"step": 1611, "train/loss": 2.1997691690921783, "train/lm_loss": 2.1997691690921783, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.283737672959766e-05, "perf/tokens_per_sec": 24889.920206770457, "train/loss_code": 1.7479480504989624, "train/loss_math": 2.5631914138793945, "train/loss_prose": 2.916787624359131} +{"step": 1612, "train/loss": 2.442884922027588, "train/lm_loss": 2.442884922027588, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.2825143555426326e-05, "perf/tokens_per_sec": 25693.0970348026, "train/loss_code": 1.6041567921638489, "train/loss_math": 2.481487989425659, "train/loss_prose": 4.081737756729126} +{"step": 1613, "train/loss": 2.5576274394989014, "train/lm_loss": 2.5576274394989014, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.2812901693496564e-05, "perf/tokens_per_sec": 23765.044070797685, "train/loss_code": 1.7205488234758377, "train/loss_math": 1.5332964658737183, "train/loss_prose": 4.015175978342692} +{"step": 1614, "train/loss": 2.38369944691658, "train/lm_loss": 2.38369944691658, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.280065114977492e-05, "perf/tokens_per_sec": 24288.645726622002, "train/loss_code": 1.4134257237116497, "train/loss_prose": 3.3561158974965415, "train/loss_math": 2.3804848194122314} +{"step": 1615, "train/loss": 2.328337699174881, "train/lm_loss": 2.328337699174881, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.278839193023214e-05, "perf/tokens_per_sec": 25914.002241472674, "train/loss_code": 1.4757259488105774, "train/loss_math": 2.2682918310165405, "train/loss_prose": 3.301041007041931} +{"step": 1616, "train/loss": 2.7948079705238342, "train/lm_loss": 2.7948079705238342, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.277612404084322e-05, "perf/tokens_per_sec": 26472.51673266869, "train/loss_prose": 3.5918875535329184, "train/loss_math": 2.5356925328572593, "train/loss_code": 1.987862229347229} +{"step": 1617, "train/loss": 2.711487114429474, "train/lm_loss": 2.711487114429474, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.276384748758741e-05, "perf/tokens_per_sec": 26390.535562976776, "train/loss_math": 2.4833409786224365, "train/loss_code": 1.8746539950370789, "train/loss_prose": 3.2439770102500916} +{"step": 1618, "train/loss": 2.785919964313507, "train/lm_loss": 2.785919964313507, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.2751562276448124e-05, "perf/tokens_per_sec": 26316.072815504238, "train/loss_code": 1.4070613384246826, "train/loss_prose": 3.981100877126058, "train/loss_math": 2.5099779764811196} +{"step": 1619, "train/loss": 3.203022837638855, "train/lm_loss": 3.203022837638855, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.273926841341302e-05, "perf/tokens_per_sec": 26264.696316340647, "train/loss_math": 2.390809655189514, "train/loss_prose": 3.8064044952392577, "train/loss_code": 1.8105406761169434} +{"step": 1620, "train/loss": 2.480074465274811, "train/lm_loss": 2.480074465274811, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.2726965904474e-05, "perf/tokens_per_sec": 24861.465280511242, "train/loss_math": 2.176664650440216, "train/loss_prose": 3.0283451080322266, "train/loss_code": 2.0489022731781006} +{"step": 1621, "train/loss": 1.853965938091278, "train/lm_loss": 1.853965938091278, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.271465475562716e-05, "perf/tokens_per_sec": 25897.90777681803, "train/loss_code": 1.649040122826894, "train/loss_math": 2.4687434434890747} +{"step": 1622, "train/loss": 2.2816615998744965, "train/lm_loss": 2.2816615998744965, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.2702334972872776e-05, "perf/tokens_per_sec": 26131.22474545438, "train/loss_prose": 3.149242719014486, "train/loss_code": 1.6887246072292328, "train/loss_math": 2.050666332244873} +{"step": 1623, "train/loss": 1.9961987137794495, "train/lm_loss": 1.9961987137794495, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.2690006562215384e-05, "perf/tokens_per_sec": 26228.727826785263, "train/loss_math": 2.2449986139933267, "train/loss_prose": 3.265676975250244, "train/loss_code": 1.4922291338443756} +{"step": 1624, "train/loss": 2.6702512204647064, "train/lm_loss": 2.6702512204647064, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.267766952966369e-05, "perf/tokens_per_sec": 25788.087530058812, "train/loss_prose": 3.549364964167277, "train/loss_math": 2.5759161710739136, "train/loss_code": 1.854027509689331} +{"step": 1625, "train/loss": 2.2532348036766052, "train/lm_loss": 2.2532348036766052, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.2665323881230624e-05, "perf/tokens_per_sec": 25969.39153463947, "train/loss_code": 1.3311256567637126, "train/loss_math": 2.292687694231669, "train/loss_prose": 3.577218532562256} +{"step": 1626, "train/loss": 2.5013891458511353, "train/lm_loss": 2.5013891458511353, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.265296962293329e-05, "perf/tokens_per_sec": 26788.29474209443, "train/loss_code": 1.732087214787801, "train/loss_prose": 3.5561869939168296, "train/loss_math": 2.073144793510437} +{"step": 1627, "train/loss": 2.7018088698387146, "train/lm_loss": 2.7018088698387146, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.264060676079302e-05, "perf/tokens_per_sec": 25311.414067242244, "train/loss_math": 2.25783771276474, "train/loss_prose": 3.7296269734700522, "train/loss_code": 1.3942400217056274} +{"step": 1628, "train/loss": 2.0671142041683197, "train/lm_loss": 2.0671142041683197, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.2628235300835314e-05, "perf/tokens_per_sec": 26683.403486264466, "train/loss_math": 2.4806507428487143, "train/loss_prose": 3.4217145442962646, "train/loss_code": 1.4183116853237152} +{"step": 1629, "train/loss": 2.687751144170761, "train/lm_loss": 2.687751144170761, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.261585524908987e-05, "perf/tokens_per_sec": 26215.960092839996, "train/loss_code": 1.933600664138794, "train/loss_math": 2.1447757482528687, "train/loss_prose": 3.803885539372762} +{"step": 1630, "train/loss": 2.0962537825107574, "train/lm_loss": 2.0962537825107574, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.260346661159058e-05, "perf/tokens_per_sec": 25988.95564447201, "train/loss_code": 1.3666462302207947, "train/loss_prose": 3.2979871034622192, "train/loss_math": 2.3537352085113525} +{"step": 1631, "train/loss": 2.365007668733597, "train/lm_loss": 2.365007668733597, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.259106939437551e-05, "perf/tokens_per_sec": 26183.476521564193, "train/loss_code": 1.2350874543190002, "train/loss_math": 2.1947007179260254, "train/loss_prose": 3.2885943253835044} +{"step": 1632, "train/loss": 2.43924343585968, "train/lm_loss": 2.43924343585968, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.257866360348692e-05, "perf/tokens_per_sec": 26053.39498036123, "train/loss_prose": 3.143977165222168, "train/loss_math": 2.391936206817627, "train/loss_code": 1.266311764717102} +{"step": 1633, "train/loss": 2.7372212409973145, "train/lm_loss": 2.7372212409973145, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.256624924497123e-05, "perf/tokens_per_sec": 24872.191265422178, "train/loss_prose": 3.21754994392395, "train/loss_math": 2.1221728324890137, "train/loss_code": 1.5656743049621582} +{"step": 1634, "train/loss": 2.268517255783081, "train/lm_loss": 2.268517255783081, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.2553826324879064e-05, "perf/tokens_per_sec": 26303.421283579988, "train/loss_prose": 3.1970874071121216, "train/loss_math": 2.16447643438975, "train/loss_code": 1.7535115480422974} +{"step": 1635, "train/loss": 2.312249332666397, "train/lm_loss": 2.312249332666397, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.254139484926519e-05, "perf/tokens_per_sec": 25888.38552590139, "train/loss_code": 1.5381397406260173, "train/loss_math": 2.3971540133158364, "train/loss_prose": 3.346056342124939} +{"step": 1636, "train/loss": 2.60277396440506, "train/lm_loss": 2.60277396440506, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.252895482418856e-05, "perf/tokens_per_sec": 25405.889810356453, "train/loss_prose": 3.6526378790537515, "train/loss_math": 2.2115213871002197, "train/loss_code": 1.0181920528411865} +{"step": 1637, "train/loss": 2.572795867919922, "train/lm_loss": 2.572795867919922, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.2516506255712296e-05, "perf/tokens_per_sec": 26575.75336028055, "train/loss_prose": 3.3572638034820557, "train/loss_math": 2.377020279566447, "train/loss_code": 1.6897570490837097} +{"step": 1638, "train/loss": 2.513566642999649, "train/lm_loss": 2.513566642999649, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.250404914990367e-05, "perf/tokens_per_sec": 26093.282195376076, "train/loss_code": 1.3066814541816711, "train/loss_prose": 3.604302088419596, "train/loss_math": 2.227421601613363} +{"step": 1639, "train/loss": 2.914005994796753, "train/lm_loss": 2.914005994796753, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.249158351283414e-05, "perf/tokens_per_sec": 26545.65359357955, "train/loss_prose": 3.541950798034668, "train/loss_math": 2.4232170581817627, "train/loss_code": 1.5895385146141052} +{"step": 1640, "train/loss": 2.6462835371494293, "train/lm_loss": 2.6462835371494293, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.247910935057929e-05, "perf/tokens_per_sec": 27275.824804677886, "train/loss_math": 2.3663430213928223, "train/loss_prose": 4.0319297313690186, "train/loss_code": 1.8205180168151855} +{"step": 1641, "train/loss": 2.2096011638641357, "train/lm_loss": 2.2096011638641357, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.246662666921888e-05, "perf/tokens_per_sec": 26249.366578709738, "train/loss_math": 2.2112103700637817, "train/loss_code": 1.176234245300293, "train/loss_prose": 3.2397494316101074} +{"step": 1642, "train/loss": 2.1690730154514313, "train/lm_loss": 2.1690730154514313, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.245413547483682e-05, "perf/tokens_per_sec": 26756.378336201593, "train/loss_code": 1.3388027250766754, "train/loss_prose": 3.7308117151260376, "train/loss_math": 2.2678747177124023} +{"step": 1643, "train/loss": 2.464896261692047, "train/lm_loss": 2.464896261692047, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.244163577352116e-05, "perf/tokens_per_sec": 26118.631154382354, "train/loss_code": 1.5037894248962402, "train/loss_prose": 3.4152727127075195, "train/loss_math": 2.480991840362549} +{"step": 1644, "train/loss": 2.4769004583358765, "train/lm_loss": 2.4769004583358765, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.242912757136412e-05, "perf/tokens_per_sec": 26040.560503838664, "train/loss_code": 1.6996413469314575, "train/loss_math": 1.97005695104599, "train/loss_prose": 3.411777973175049} +{"step": 1645, "train/loss": 2.6296299397945404, "train/lm_loss": 2.6296299397945404, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.241661087446202e-05, "perf/tokens_per_sec": 26732.854872792344, "train/loss_code": 1.3744616508483887, "train/loss_math": 2.268489181995392, "train/loss_prose": 3.4377843737602234} +{"step": 1646, "train/loss": 2.456896185874939, "train/lm_loss": 2.456896185874939, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.2404085688915364e-05, "perf/tokens_per_sec": 25362.569675997347, "train/loss_prose": 3.5414340496063232, "train/loss_math": 2.1932674050331116, "train/loss_code": 1.899616539478302} +{"step": 1647, "train/loss": 2.169951409101486, "train/lm_loss": 2.169951409101486, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.2391552020828775e-05, "perf/tokens_per_sec": 26079.814531333966, "train/loss_math": 2.111394852399826, "train/loss_prose": 3.275370717048645, "train/loss_code": 1.181645154953003} +{"step": 1648, "train/loss": 1.7613191604614258, "train/lm_loss": 1.7613191604614258, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.2379009876311e-05, "perf/tokens_per_sec": 27230.99155167112, "train/loss_code": 1.4547473669052124, "train/loss_math": 2.2722721894582114} +{"step": 1649, "train/loss": 2.3574644327163696, "train/lm_loss": 2.3574644327163696, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.2366459261474933e-05, "perf/tokens_per_sec": 25984.7889271891, "train/loss_math": 2.408250331878662, "train/loss_prose": 3.3139816522598267, "train/loss_code": 1.6690001487731934} +{"step": 1650, "train/loss": 2.999600350856781, "train/lm_loss": 2.999600350856781, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.23539001824376e-05, "perf/tokens_per_sec": 26437.07556321556, "train/loss_code": 2.4242522716522217, "train/loss_math": 2.322261333465576, "train/loss_prose": 3.6514413356781006} +{"step": 1651, "train/loss": 2.6615084409713745, "train/lm_loss": 2.6615084409713745, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.234133264532012e-05, "perf/tokens_per_sec": 26000.086542506986, "train/loss_prose": 3.5963364243507385, "train/loss_code": 1.7266803681850433} +{"step": 1652, "train/loss": 2.5491302013397217, "train/lm_loss": 2.5491302013397217, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.2328756656247795e-05, "perf/tokens_per_sec": 26146.138216418014, "train/loss_code": 1.660493552684784, "train/loss_prose": 3.5281168619791665, "train/loss_math": 2.1625680923461914} +{"step": 1653, "train/loss": 1.818696677684784, "train/lm_loss": 1.818696677684784, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.2316172221349973e-05, "perf/tokens_per_sec": 25973.985274240807, "train/loss_code": 1.2948549538850784, "train/loss_math": 2.342538356781006} +{"step": 1654, "train/loss": 2.0883069336414337, "train/lm_loss": 2.0883069336414337, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.230357934676017e-05, "perf/tokens_per_sec": 26100.25915718917, "train/loss_code": 1.0015418132146199, "train/loss_prose": 3.2150912284851074, "train/loss_math": 2.4238827228546143} +{"step": 1655, "train/loss": 2.675930142402649, "train/lm_loss": 2.675930142402649, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.2290978038616e-05, "perf/tokens_per_sec": 26291.626839700966, "train/loss_prose": 3.793432633082072, "train/loss_math": 2.048911929130554, "train/loss_code": 1.8314945697784424} +{"step": 1656, "train/loss": 2.8046914637088776, "train/lm_loss": 2.8046914637088776, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.22783683030592e-05, "perf/tokens_per_sec": 26321.112640818257, "train/loss_prose": 3.3726881742477417, "train/loss_math": 2.492638111114502, "train/loss_code": 1.4688639640808105} +{"step": 1657, "train/loss": 1.9888756275177002, "train/lm_loss": 1.9888756275177002, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.226575014623557e-05, "perf/tokens_per_sec": 26016.111309825774, "train/loss_prose": 3.148674964904785, "train/loss_code": 1.449609100818634, "train/loss_math": 2.3656094074249268} +{"step": 1658, "train/loss": 2.771596848964691, "train/lm_loss": 2.771596848964691, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.225312357429508e-05, "perf/tokens_per_sec": 26291.14401627064, "train/loss_prose": 3.5919149518013, "train/loss_code": 1.9512783586978912} +{"step": 1659, "train/loss": 1.8978047370910645, "train/lm_loss": 1.8978047370910645, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.224048859339175e-05, "perf/tokens_per_sec": 26152.50647956877, "train/loss_prose": 3.789397954940796, "train/loss_code": 1.3363965988159179, "train/loss_math": 2.3555281162261963} +{"step": 1660, "train/loss": 2.6095307171344757, "train/lm_loss": 2.6095307171344757, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.2227845209683716e-05, "perf/tokens_per_sec": 25749.937699065173, "train/loss_math": 2.5023523966471353, "train/loss_code": 1.5763606230417888, "train/loss_prose": 4.320053577423096} +{"step": 1661, "train/loss": 2.6399320662021637, "train/lm_loss": 2.6399320662021637, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.221519342933321e-05, "perf/tokens_per_sec": 26494.358237911278, "train/loss_prose": 3.325761079788208, "train/loss_math": 2.3942525386810303, "train/loss_code": 1.5651626586914062} +{"step": 1662, "train/loss": 2.4583045542240143, "train/lm_loss": 2.4583045542240143, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.220253325850657e-05, "perf/tokens_per_sec": 25929.412273247977, "train/loss_code": 1.5530455509821575, "train/loss_prose": 3.8806668519973755, "train/loss_math": 2.4153220653533936} +{"step": 1663, "train/loss": 2.877279222011566, "train/lm_loss": 2.877279222011566, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.218986470337419e-05, "perf/tokens_per_sec": 25682.841674091527, "train/loss_math": 2.413165867328644, "train/loss_prose": 3.3413926362991333} +{"step": 1664, "train/loss": 1.7054109275341034, "train/lm_loss": 1.7054109275341034, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.2177187770110576e-05, "perf/tokens_per_sec": 26051.22210276512, "train/loss_code": 1.5212258100509644, "train/loss_math": 2.2579660415649414} +{"step": 1665, "train/loss": 2.3316179513931274, "train/lm_loss": 2.3316179513931274, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.2164502464894316e-05, "perf/tokens_per_sec": 25867.063936400868, "train/loss_code": 1.3086681962013245, "train/loss_prose": 3.354567766189575} +{"step": 1666, "train/loss": 1.9316615164279938, "train/lm_loss": 1.9316615164279938, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.215180879390808e-05, "perf/tokens_per_sec": 26044.192364657854, "train/loss_math": 2.446930170059204, "train/loss_code": 1.5718919038772583, "train/loss_prose": 3.5750107765197754} +{"step": 1667, "train/loss": 1.8083410859107971, "train/lm_loss": 1.8083410859107971, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.213910676333859e-05, "perf/tokens_per_sec": 25944.331292605355, "train/loss_code": 1.5857242941856384, "train/loss_prose": 3.3666586875915527} +{"step": 1668, "train/loss": 2.567527413368225, "train/lm_loss": 2.567527413368225, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.212639637937668e-05, "perf/tokens_per_sec": 25881.131462988043, "train/loss_prose": 4.867523431777954, "train/loss_code": 1.7768023490905762, "train/loss_math": 1.9211617708206177} +{"step": 1669, "train/loss": 2.2471576631069183, "train/lm_loss": 2.2471576631069183, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.211367764821722e-05, "perf/tokens_per_sec": 26648.51180423277, "train/loss_code": 1.9589306831359863, "train/loss_math": 2.3929054737091064, "train/loss_prose": 3.396796703338623} +{"step": 1670, "train/loss": 2.6246445178985596, "train/lm_loss": 2.6246445178985596, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.210095057605917e-05, "perf/tokens_per_sec": 26135.080731848684, "train/loss_prose": 3.3844884037971497, "train/loss_math": 2.3016421794891357, "train/loss_code": 1.7191867431004841} +{"step": 1671, "train/loss": 2.461034417152405, "train/lm_loss": 2.461034417152405, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.208821516910557e-05, "perf/tokens_per_sec": 25931.878013584905, "train/loss_code": 1.1768697500228882, "train/loss_prose": 3.4407424330711365, "train/loss_math": 2.394695520401001} +{"step": 1672, "train/loss": 2.389074385166168, "train/lm_loss": 2.389074385166168, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.207547143356347e-05, "perf/tokens_per_sec": 25736.320822609785, "train/loss_code": 1.7532532811164856, "train/loss_math": 2.3482306003570557, "train/loss_prose": 3.1065837144851685} +{"step": 1673, "train/loss": 2.206845462322235, "train/lm_loss": 2.206845462322235, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.206271937564405e-05, "perf/tokens_per_sec": 26145.382191176246, "train/loss_math": 2.3400778770446777, "train/loss_code": 1.2176645398139954, "train/loss_prose": 3.51904559135437} +{"step": 1674, "train/loss": 2.5444201231002808, "train/lm_loss": 2.5444201231002808, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.2049959001562464e-05, "perf/tokens_per_sec": 26055.844840660775, "train/loss_math": 2.2013673186302185, "train/loss_prose": 3.413487434387207, "train/loss_code": 2.361458420753479} +{"step": 1675, "train/loss": 1.929220050573349, "train/lm_loss": 1.929220050573349, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.2037190317538e-05, "perf/tokens_per_sec": 25974.731419534415, "train/loss_code": 1.3154573142528534, "train/loss_math": 2.3875274658203125, "train/loss_prose": 3.0093483924865723} +{"step": 1676, "train/loss": 2.84365713596344, "train/lm_loss": 2.84365713596344, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.202441332979394e-05, "perf/tokens_per_sec": 25379.35508765399, "train/loss_prose": 3.419541915257772, "train/loss_math": 2.4981261253356934} +{"step": 1677, "train/loss": 1.8920934796333313, "train/lm_loss": 1.8920934796333313, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.201162804455763e-05, "perf/tokens_per_sec": 26443.70846435998, "train/loss_math": 2.0808132886886597, "train/loss_code": 1.056090608239174, "train/loss_prose": 3.3753795623779297} +{"step": 1678, "train/loss": 2.664925217628479, "train/lm_loss": 2.664925217628479, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.199883446806048e-05, "perf/tokens_per_sec": 25955.698488575177, "train/loss_prose": 3.8335959116617837, "train/loss_math": 2.37982185681661, "train/loss_code": 1.3395739197731018} +{"step": 1679, "train/loss": 2.644337236881256, "train/lm_loss": 2.644337236881256, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.198603260653792e-05, "perf/tokens_per_sec": 26070.316312129922, "train/loss_code": 1.877718448638916, "train/loss_math": 2.3512288331985474, "train/loss_prose": 3.2906880378723145} +{"step": 1680, "train/loss": 2.166335940361023, "train/lm_loss": 2.166335940361023, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.1973222466229404e-05, "perf/tokens_per_sec": 26085.160321709303, "train/loss_code": 1.4368210236231487, "train/loss_math": 2.340790331363678, "train/loss_prose": 3.657062530517578} +{"step": 1681, "train/loss": 2.526519477367401, "train/lm_loss": 2.526519477367401, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.1960404053378454e-05, "perf/tokens_per_sec": 26969.459262933822, "train/loss_prose": 3.684020519256592, "train/loss_code": 1.458389441172282, "train/loss_math": 2.3924630880355835} +{"step": 1682, "train/loss": 2.44466769695282, "train/lm_loss": 2.44466769695282, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.194757737423261e-05, "perf/tokens_per_sec": 26233.333665197202, "train/loss_math": 2.236338257789612, "train/loss_code": 1.3194589217503865, "train/loss_prose": 3.708762248357137} +{"step": 1683, "train/loss": 2.201164871454239, "train/lm_loss": 2.201164871454239, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.193474243504343e-05, "perf/tokens_per_sec": 26959.767472588865, "train/loss_code": 1.4643190801143646, "train/loss_prose": 3.2536226908365884, "train/loss_math": 1.9911748170852661} +{"step": 1684, "train/loss": 2.5763877630233765, "train/lm_loss": 2.5763877630233765, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.192189924206652e-05, "perf/tokens_per_sec": 26689.165458550695, "train/loss_prose": 3.1447013219197593, "train/loss_math": 2.2603634198506675, "train/loss_code": 2.197953462600708} +{"step": 1685, "train/loss": 2.2168857157230377, "train/lm_loss": 2.2168857157230377, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.1909047801561484e-05, "perf/tokens_per_sec": 27250.90403579762, "train/loss_prose": 3.134660840034485, "train/loss_code": 1.4197728633880615, "train/loss_math": 2.402148485183716} +{"step": 1686, "train/loss": 2.5559509694576263, "train/lm_loss": 2.5559509694576263, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.189618811979197e-05, "perf/tokens_per_sec": 24738.279371890592, "train/loss_code": 1.8034455478191376, "train/loss_prose": 3.7015079657236734, "train/loss_math": 2.1293015480041504} +{"step": 1687, "train/loss": 2.0268620550632477, "train/lm_loss": 2.0268620550632477, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.188332020302561e-05, "perf/tokens_per_sec": 26360.36828691592, "train/loss_code": 1.5798139810562133, "train/loss_math": 2.558395743370056, "train/loss_prose": 3.1990344524383545} +{"step": 1688, "train/loss": 2.1546111404895782, "train/lm_loss": 2.1546111404895782, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.1870444057534095e-05, "perf/tokens_per_sec": 26127.01228800156, "train/loss_code": 1.4999306201934814, "train/loss_math": 2.057854950428009, "train/loss_prose": 3.560728430747986} +{"step": 1689, "train/loss": 2.2944062650203705, "train/lm_loss": 2.2944062650203705, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.185755968959308e-05, "perf/tokens_per_sec": 25665.652557919962, "train/loss_prose": 3.2766669591267905, "train/loss_code": 1.2986985445022583, "train/loss_math": 2.314576506614685} +{"step": 1690, "train/loss": 2.574201285839081, "train/lm_loss": 2.574201285839081, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.184466710548227e-05, "perf/tokens_per_sec": 25904.350974963963, "train/loss_code": 2.2033921082814536, "train/loss_math": 2.4048781394958496, "train/loss_prose": 3.0578925609588623} +{"step": 1691, "train/loss": 2.357679784297943, "train/lm_loss": 2.357679784297943, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.183176631148534e-05, "perf/tokens_per_sec": 25910.71922249152, "train/loss_prose": 3.4373064041137695, "train/loss_math": 2.2654015123844147, "train/loss_code": 1.462609589099884} +{"step": 1692, "train/loss": 2.514669716358185, "train/lm_loss": 2.514669716358185, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.181885731388999e-05, "perf/tokens_per_sec": 26029.276619986333, "train/loss_prose": 3.8512015342712402, "train/loss_code": 1.3095167875289917, "train/loss_math": 2.2210874795913695} +{"step": 1693, "train/loss": 2.5091338753700256, "train/lm_loss": 2.5091338753700256, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.180594011898791e-05, "perf/tokens_per_sec": 26195.373819064524, "train/loss_prose": 3.4004430770874023, "train/loss_code": 1.3818052212397258, "train/loss_math": 2.3258824348449707} +{"step": 1694, "train/loss": 2.427006244659424, "train/lm_loss": 2.427006244659424, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.179301473307476e-05, "perf/tokens_per_sec": 26362.9166753623, "train/loss_math": 2.5197505950927734, "train/loss_code": 1.6523982286453247, "train/loss_prose": 3.512500524520874} +{"step": 1695, "train/loss": 2.1279843151569366, "train/lm_loss": 2.1279843151569366, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.178008116245024e-05, "perf/tokens_per_sec": 25987.854872306085, "train/loss_code": 1.8989969968795777, "train/loss_prose": 3.2071001529693604, "train/loss_math": 2.1608946323394775} +{"step": 1696, "train/loss": 2.5854617953300476, "train/lm_loss": 2.5854617953300476, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.1767139413418e-05, "perf/tokens_per_sec": 26875.418166231517, "train/loss_math": 2.4487962325414023, "train/loss_code": 2.6245803833007812, "train/loss_prose": 3.36633563041687} +{"step": 1697, "train/loss": 2.882832109928131, "train/lm_loss": 2.882832109928131, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.1754189492285714e-05, "perf/tokens_per_sec": 26257.99237014575, "train/loss_prose": 3.7280879616737366, "train/loss_math": 2.1094915866851807, "train/loss_code": 1.821830153465271} +{"step": 1698, "train/loss": 2.4822827875614166, "train/lm_loss": 2.4822827875614166, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.174123140536499e-05, "perf/tokens_per_sec": 26675.531432493157, "train/loss_math": 2.383941411972046, "train/loss_code": 1.4707811673482258, "train/loss_prose": 3.5593454043070474} +{"step": 1699, "train/loss": 2.4923474192619324, "train/lm_loss": 2.4923474192619324, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.172826515897146e-05, "perf/tokens_per_sec": 26192.138920015794, "train/loss_math": 2.2517593801021576, "train/loss_prose": 3.1815507411956787, "train/loss_code": 1.3870903253555298} +{"step": 1700, "train/loss": 2.7539008259773254, "train/lm_loss": 2.7539008259773254, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.1715290759424705e-05, "perf/tokens_per_sec": 25903.06207886472, "train/loss_prose": 3.5399657487869263, "train/loss_code": 1.5787684917449951, "train/loss_math": 2.0975250005722046} +{"step": 1701, "train/loss": 2.8037731051445007, "train/lm_loss": 2.8037731051445007, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.170230821304829e-05, "perf/tokens_per_sec": 26097.761293292155, "train/loss_math": 2.0753329594930015, "train/loss_prose": 3.7219589948654175, "train/loss_code": 1.3163503408432007} +{"step": 1702, "train/loss": 2.66578871011734, "train/lm_loss": 2.66578871011734, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.1689317526169766e-05, "perf/tokens_per_sec": 26009.88804762359, "train/loss_prose": 3.4741620421409607, "train/loss_code": 1.3623587489128113, "train/loss_math": 2.3524723052978516} +{"step": 1703, "train/loss": 2.591208577156067, "train/lm_loss": 2.591208577156067, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.1676318705120616e-05, "perf/tokens_per_sec": 25551.25114929451, "train/loss_prose": 3.2330901622772217, "train/loss_math": 2.2268885374069214, "train/loss_code": 2.1228435039520264} +{"step": 1704, "train/loss": 2.068816602230072, "train/lm_loss": 2.068816602230072, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.166331175623631e-05, "perf/tokens_per_sec": 26068.37794276749, "train/loss_code": 1.8018681208292644, "train/loss_prose": 3.6567258834838867, "train/loss_math": 2.0825984477996826} +{"step": 1705, "train/loss": 1.7733739614486694, "train/lm_loss": 1.7733739614486694, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.165029668585629e-05, "perf/tokens_per_sec": 25967.546643691712, "train/loss_prose": 2.63805890083313, "train/loss_math": 2.156745751698812, "train/loss_code": 1.2696737945079803} +{"step": 1706, "train/loss": 2.991073250770569, "train/lm_loss": 2.991073250770569, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.163727350032394e-05, "perf/tokens_per_sec": 25984.317307532332, "train/loss_prose": 3.8212236762046814, "train/loss_math": 2.500615437825521, "train/loss_code": 1.1418448686599731} +{"step": 1707, "train/loss": 2.2277429699897766, "train/lm_loss": 2.2277429699897766, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.162424220598658e-05, "perf/tokens_per_sec": 25958.44378700013, "train/loss_code": 1.5353306531906128, "train/loss_math": 2.226415514945984, "train/loss_prose": 3.2683523893356323} +{"step": 1708, "train/loss": 2.239477038383484, "train/lm_loss": 2.239477038383484, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.161120280919554e-05, "perf/tokens_per_sec": 26169.198077051966, "train/loss_code": 1.673503190279007, "train/loss_math": 2.1476470232009888, "train/loss_prose": 3.46325421333313} +{"step": 1709, "train/loss": 2.204556792974472, "train/lm_loss": 2.204556792974472, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.1598155316306044e-05, "perf/tokens_per_sec": 26202.44545411563, "train/loss_code": 1.3867865800857544, "train/loss_prose": 3.322471857070923, "train/loss_math": 2.2770503759384155} +{"step": 1710, "train/loss": 2.9401773810386658, "train/lm_loss": 2.9401773810386658, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.158509973367728e-05, "perf/tokens_per_sec": 26329.866363416226, "train/loss_math": 2.459092676639557, "train/loss_prose": 3.421261966228485} +{"step": 1711, "train/loss": 2.748128741979599, "train/lm_loss": 2.748128741979599, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.157203606767238e-05, "perf/tokens_per_sec": 26338.625910475585, "train/loss_code": 1.728404128551483, "train/loss_prose": 4.447669982910156} +{"step": 1712, "train/loss": 2.089357852935791, "train/lm_loss": 2.089357852935791, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.155896432465842e-05, "perf/tokens_per_sec": 25663.5055360696, "train/loss_code": 1.5764833986759186, "train/loss_prose": 3.577826976776123, "train/loss_math": 2.277033885320028} +{"step": 1713, "train/loss": 2.554775357246399, "train/lm_loss": 2.554775357246399, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.154588451100642e-05, "perf/tokens_per_sec": 26120.735862310135, "train/loss_math": 2.2283153533935547, "train/loss_code": 1.5798535346984863, "train/loss_prose": 3.3675816655158997} +{"step": 1714, "train/loss": 2.3267744183540344, "train/lm_loss": 2.3267744183540344, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.1532796633091296e-05, "perf/tokens_per_sec": 26120.85500686476, "train/loss_prose": 3.1889703273773193, "train/loss_math": 2.090200400352478, "train/loss_code": 1.7852526903152466} +{"step": 1715, "train/loss": 2.4885982275009155, "train/lm_loss": 2.4885982275009155, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.1519700697291944e-05, "perf/tokens_per_sec": 25848.188407345275, "train/loss_code": 1.6609803438186646, "train/loss_math": 2.4592071374257407, "train/loss_prose": 3.77411150932312} +{"step": 1716, "train/loss": 2.261030673980713, "train/lm_loss": 2.261030673980713, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.150659670999116e-05, "perf/tokens_per_sec": 26299.11287120225, "train/loss_code": 1.6259020566940308, "train/loss_prose": 3.426236391067505, "train/loss_math": 2.366082549095154} +{"step": 1717, "train/loss": 2.359922617673874, "train/lm_loss": 2.359922617673874, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.149348467757566e-05, "perf/tokens_per_sec": 25880.000819489385, "train/loss_prose": 3.399373173713684, "train/loss_code": 1.9120444655418396, "train/loss_math": 2.064136505126953} +{"step": 1718, "train/loss": 2.3936188519001007, "train/lm_loss": 2.3936188519001007, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.148036460643608e-05, "perf/tokens_per_sec": 26511.366463020375, "train/loss_prose": 3.6606620152791343, "train/loss_code": 1.4647671282291412, "train/loss_math": 2.307896614074707} +{"step": 1719, "train/loss": 2.700453132390976, "train/lm_loss": 2.700453132390976, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.146723650296701e-05, "perf/tokens_per_sec": 25942.764184820197, "train/loss_prose": 3.578230917453766, "train/loss_code": 1.2849962711334229, "train/loss_math": 2.3603543043136597} +{"step": 1720, "train/loss": 3.1305973529815674, "train/lm_loss": 3.1305973529815674, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.145410037356692e-05, "perf/tokens_per_sec": 25873.29696385542, "train/loss_prose": 3.55633544921875, "train/loss_math": 2.421034018198649} +{"step": 1721, "train/loss": 2.483995020389557, "train/lm_loss": 2.483995020389557, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.1440956224638184e-05, "perf/tokens_per_sec": 26130.032402703673, "train/loss_code": 1.7075658639272053, "train/loss_math": 2.246588706970215, "train/loss_prose": 3.4186949729919434} +{"step": 1722, "train/loss": 2.3729456961154938, "train/lm_loss": 2.3729456961154938, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.1427804062587116e-05, "perf/tokens_per_sec": 26076.726851036703, "train/loss_code": 1.8023871580759685, "train/loss_prose": 3.22984516620636, "train/loss_math": 2.3722381591796875} +{"step": 1723, "train/loss": 2.384616255760193, "train/lm_loss": 2.384616255760193, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.1414643893823914e-05, "perf/tokens_per_sec": 26150.63548050256, "train/loss_math": 2.453326185544332, "train/loss_prose": 3.2900962829589844, "train/loss_code": 1.0668765306472778} +{"step": 1724, "train/loss": 2.477666914463043, "train/lm_loss": 2.477666914463043, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.140147572476268e-05, "perf/tokens_per_sec": 25938.69059043432, "train/loss_prose": 3.853788375854492, "train/loss_code": 1.8917950689792633, "train/loss_math": 2.27328884601593} +{"step": 1725, "train/loss": 2.5250861644744873, "train/lm_loss": 2.5250861644744873, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.138829956182144e-05, "perf/tokens_per_sec": 25675.89419298914, "train/loss_code": 1.547523299853007, "train/loss_prose": 3.2103201150894165, "train/loss_math": 2.7168383598327637} +{"step": 1726, "train/loss": 2.631521552801132, "train/lm_loss": 2.631521552801132, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.137511541142207e-05, "perf/tokens_per_sec": 25644.65784571292, "train/loss_code": 1.4729719161987305, "train/loss_math": 2.4274845918019614, "train/loss_prose": 3.607924779256185} +{"step": 1727, "train/loss": 2.313115268945694, "train/lm_loss": 2.313115268945694, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.136192327999037e-05, "perf/tokens_per_sec": 25495.283312928048, "train/loss_code": 1.558341097831726, "train/loss_prose": 3.571072260538737} +{"step": 1728, "train/loss": 2.7022687196731567, "train/lm_loss": 2.7022687196731567, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.134872317395604e-05, "perf/tokens_per_sec": 25456.523074780216, "train/loss_math": 2.3380011320114136, "train/loss_prose": 3.5423268477121987, "train/loss_code": 1.6391644477844238} +{"step": 1729, "train/loss": 2.6236909329891205, "train/lm_loss": 2.6236909329891205, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.133551509975264e-05, "perf/tokens_per_sec": 26176.335352187143, "train/loss_math": 2.4621872901916504, "train/loss_code": 1.6543700297673543, "train/loss_prose": 3.70068097114563} +{"step": 1730, "train/loss": 2.702609062194824, "train/lm_loss": 2.702609062194824, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.1322299063817624e-05, "perf/tokens_per_sec": 25586.870445748125, "train/loss_code": 1.8050351738929749, "train/loss_prose": 3.537489573160807, "train/loss_math": 2.4661107858022056} +{"step": 1731, "train/loss": 2.402475357055664, "train/lm_loss": 2.402475357055664, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.130907507259233e-05, "perf/tokens_per_sec": 24034.310266002245, "train/loss_code": 2.263549566268921, "train/loss_math": 2.2828078985214235, "train/loss_prose": 3.2786638736724854} +{"step": 1732, "train/loss": 2.005432903766632, "train/lm_loss": 2.005432903766632, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.1295843132521973e-05, "perf/tokens_per_sec": 25128.92815675422, "train/loss_math": 2.268359343210856, "train/loss_code": 1.5487378537654877, "train/loss_prose": 3.043433427810669} +{"step": 1733, "train/loss": 3.126273810863495, "train/lm_loss": 3.126273810863495, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.128260325005564e-05, "perf/tokens_per_sec": 25917.40338829635, "train/loss_prose": 3.800597333908081, "train/loss_code": 1.7404370307922363, "train/loss_math": 2.526329517364502} +{"step": 1734, "train/loss": 2.3985603749752045, "train/lm_loss": 2.3985603749752045, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.1269355431646274e-05, "perf/tokens_per_sec": 25354.74657457422, "train/loss_code": 1.4765416383743286, "train/loss_math": 2.2629352807998657, "train/loss_prose": 3.4109957218170166} +{"step": 1735, "train/loss": 2.781590700149536, "train/lm_loss": 2.781590700149536, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.125609968375072e-05, "perf/tokens_per_sec": 26786.248141085707, "train/loss_math": 2.2888824343681335, "train/loss_prose": 3.274298906326294} +{"step": 1736, "train/loss": 2.04313924908638, "train/lm_loss": 2.04313924908638, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.1242836012829665e-05, "perf/tokens_per_sec": 25977.08794929137, "train/loss_code": 1.522492265701294, "train/loss_prose": 3.424265742301941, "train/loss_math": 1.8841220140457153} +{"step": 1737, "train/loss": 2.8275970816612244, "train/lm_loss": 2.8275970816612244, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.1229564425347654e-05, "perf/tokens_per_sec": 25760.82612809436, "train/loss_code": 2.057330369949341, "train/loss_prose": 3.9359078407287598, "train/loss_math": 2.3205312490463257} +{"step": 1738, "train/loss": 2.568361312150955, "train/lm_loss": 2.568361312150955, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.121628492777311e-05, "perf/tokens_per_sec": 26768.009622845533, "train/loss_prose": 4.044568061828613, "train/loss_math": 2.3759709199269614, "train/loss_code": 1.7766135533650715} +{"step": 1739, "train/loss": 2.458353191614151, "train/lm_loss": 2.458353191614151, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.1202997526578276e-05, "perf/tokens_per_sec": 27307.64760468144, "train/loss_prose": 3.489316701889038, "train/loss_math": 1.9595876336097717, "train/loss_code": 1.36052405834198} +{"step": 1740, "train/loss": 2.5953714549541473, "train/lm_loss": 2.5953714549541473, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.118970222823929e-05, "perf/tokens_per_sec": 26447.453657335067, "train/loss_prose": 3.237931489944458, "train/loss_math": 2.1779142220815024, "train/loss_code": 1.2775026559829712} +{"step": 1741, "train/loss": 2.0109120905399323, "train/lm_loss": 2.0109120905399323, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.1176399039236116e-05, "perf/tokens_per_sec": 26250.329176744905, "train/loss_math": 1.9620654344558717, "train/loss_code": 1.6360602378845215, "train/loss_prose": 3.004849433898926} +{"step": 1742, "train/loss": 2.212472230195999, "train/lm_loss": 2.212472230195999, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.116308796605255e-05, "perf/tokens_per_sec": 26180.803113680107, "train/loss_prose": 3.520660400390625, "train/loss_math": 2.2576069831848145, "train/loss_code": 0.8741943637530009} +{"step": 1743, "train/loss": 2.363215297460556, "train/lm_loss": 2.363215297460556, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.1149769015176275e-05, "perf/tokens_per_sec": 26303.461555774327, "train/loss_code": 1.1952123641967773, "train/loss_math": 2.4169812202453613, "train/loss_prose": 3.423686146736145} +{"step": 1744, "train/loss": 2.704296499490738, "train/lm_loss": 2.704296499490738, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.113644219309877e-05, "perf/tokens_per_sec": 26162.781840637293, "train/loss_math": 2.319952885309855, "train/loss_prose": 3.400313436985016, "train/loss_code": 1.073258876800537} +{"step": 1745, "train/loss": 2.804305672645569, "train/lm_loss": 2.804305672645569, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.112310750631536e-05, "perf/tokens_per_sec": 26460.488681817133, "train/loss_code": 2.101742386817932, "train/loss_prose": 3.4897250533103943, "train/loss_math": 2.136029839515686} +{"step": 1746, "train/loss": 1.9681456089019775, "train/lm_loss": 1.9681456089019775, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.110976496132522e-05, "perf/tokens_per_sec": 26715.104387188472, "train/loss_prose": 3.294069766998291, "train/loss_math": 2.1561049818992615, "train/loss_code": 1.2112037986516953} +{"step": 1747, "train/loss": 2.623614728450775, "train/lm_loss": 2.623614728450775, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.109641456463135e-05, "perf/tokens_per_sec": 26316.274371460564, "train/loss_code": 1.6861662069956462, "train/loss_prose": 3.4270108938217163, "train/loss_math": 2.2223758697509766} +{"step": 1748, "train/loss": 2.777156352996826, "train/lm_loss": 2.777156352996826, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.108305632274055e-05, "perf/tokens_per_sec": 26272.167017627566, "train/loss_prose": 3.506097733974457, "train/loss_math": 2.2904807329177856, "train/loss_code": 1.805949628353119} +{"step": 1749, "train/loss": 2.213103622198105, "train/lm_loss": 2.213103622198105, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.1069690242163484e-05, "perf/tokens_per_sec": 26125.224391230495, "train/loss_math": 2.272752857208252, "train/loss_code": 1.2571206092834473, "train/loss_prose": 3.82682466506958} +{"step": 1750, "train/loss": 2.139402151107788, "train/lm_loss": 2.139402151107788, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.1056316329414616e-05, "perf/tokens_per_sec": 26205.722873133134, "train/loss_code": 2.0167378187179565, "train/loss_math": 2.213000702857971} +{"step": 1751, "train/loss": 2.5347803235054016, "train/lm_loss": 2.5347803235054016, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.104293459101222e-05, "perf/tokens_per_sec": 23915.469980149173, "train/loss_code": 1.3293572664260864, "train/loss_prose": 3.740203082561493} +{"step": 1752, "train/loss": 2.2588155269622803, "train/lm_loss": 2.2588155269622803, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.102954503347839e-05, "perf/tokens_per_sec": 26007.68304900775, "train/loss_math": 2.2681624412536623, "train/loss_code": 1.8255958557128906, "train/loss_prose": 3.078519105911255} +{"step": 1753, "train/loss": 2.7740582823753357, "train/lm_loss": 2.7740582823753357, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.101614766333904e-05, "perf/tokens_per_sec": 26223.803371875598, "train/loss_math": 2.379136085510254, "train/loss_code": 1.192650318145752, "train/loss_prose": 3.8277573585510254} +{"step": 1754, "train/loss": 2.5196517407894135, "train/lm_loss": 2.5196517407894135, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.100274248712389e-05, "perf/tokens_per_sec": 26015.20520671494, "train/loss_prose": 3.5492021242777505, "train/loss_code": 1.8153908550739288, "train/loss_math": 2.2480454444885254} +{"step": 1755, "train/loss": 2.067286729812622, "train/lm_loss": 2.067286729812622, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.098932951136645e-05, "perf/tokens_per_sec": 25668.413542507096, "train/loss_prose": 3.2551910877227783, "train/loss_math": 2.1664775609970093, "train/loss_code": 1.5390640497207642} +{"step": 1756, "train/loss": 2.0889499187469482, "train/lm_loss": 2.0889499187469482, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.0975908742604055e-05, "perf/tokens_per_sec": 25427.097357811417, "train/loss_code": 1.1024013310670853, "train/loss_prose": 3.7996914386749268, "train/loss_math": 2.351305603981018} +{"step": 1757, "train/loss": 2.407699078321457, "train/lm_loss": 2.407699078321457, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.096248018737781e-05, "perf/tokens_per_sec": 25787.313361772645, "train/loss_math": 2.397839069366455, "train/loss_code": 1.3642139832178752, "train/loss_prose": 3.987716794013977} +{"step": 1758, "train/loss": 2.6215617954730988, "train/lm_loss": 2.6215617954730988, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.094904385223264e-05, "perf/tokens_per_sec": 25895.44849208359, "train/loss_math": 1.9514906406402588, "train/loss_prose": 3.5680935978889465, "train/loss_code": 1.3985691666603088} +{"step": 1759, "train/loss": 2.1046315729618073, "train/lm_loss": 2.1046315729618073, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.093559974371725e-05, "perf/tokens_per_sec": 25490.74385761872, "train/loss_code": 1.673308539390564, "train/loss_math": 2.11857533454895, "train/loss_prose": 3.17596697807312} +{"step": 1760, "train/loss": 2.485605776309967, "train/lm_loss": 2.485605776309967, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.092214786838413e-05, "perf/tokens_per_sec": 25082.078271863767, "train/loss_code": 1.2338414192199707, "train/loss_prose": 3.977466662724813, "train/loss_math": 2.125460982322693} +{"step": 1761, "train/loss": 1.909933477640152, "train/lm_loss": 1.909933477640152, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.090868823278956e-05, "perf/tokens_per_sec": 25915.56587627109, "train/loss_math": 2.262689620256424, "train/loss_code": 1.5571774691343307} +{"step": 1762, "train/loss": 2.173718959093094, "train/lm_loss": 2.173718959093094, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.0895220843493606e-05, "perf/tokens_per_sec": 26027.817531186654, "train/loss_prose": 3.710410714149475, "train/loss_code": 1.5338459014892578, "train/loss_math": 2.2997007369995117} +{"step": 1763, "train/loss": 2.5127245783805847, "train/lm_loss": 2.5127245783805847, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.088174570706011e-05, "perf/tokens_per_sec": 24916.48878459401, "train/loss_math": 2.623650312423706, "train/loss_code": 1.7989134192466736, "train/loss_prose": 3.3857176303863525} +{"step": 1764, "train/loss": 1.6949146389961243, "train/lm_loss": 1.6949146389961243, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.086826283005669e-05, "perf/tokens_per_sec": 26749.087883411703, "train/loss_code": 1.3368185758590698, "train/loss_math": 2.291741371154785} +{"step": 1765, "train/loss": 2.51470148563385, "train/lm_loss": 2.51470148563385, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.085477221905474e-05, "perf/tokens_per_sec": 25079.405365398623, "train/loss_code": 1.4631501038869221, "train/loss_math": 2.132561683654785, "train/loss_prose": 3.3989001512527466} +{"step": 1766, "train/loss": 2.4732767939567566, "train/lm_loss": 2.4732767939567566, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.0841273880629416e-05, "perf/tokens_per_sec": 26448.797300293125, "train/loss_prose": 3.4989094734191895, "train/loss_code": 1.668296257654826, "train/loss_math": 2.142298936843872} +{"step": 1767, "train/loss": 1.965634435415268, "train/lm_loss": 1.965634435415268, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.082776782135964e-05, "perf/tokens_per_sec": 26418.04988405494, "train/loss_math": 2.174144357442856, "train/loss_code": 1.7571243047714233} +{"step": 1768, "train/loss": 2.4351431727409363, "train/lm_loss": 2.4351431727409363, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.0814254047828116e-05, "perf/tokens_per_sec": 24867.439016411477, "train/loss_code": 1.9287395000457763, "train/loss_prose": 3.279149055480957} +{"step": 1769, "train/loss": 2.4876279830932617, "train/lm_loss": 2.4876279830932617, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.080073256662127e-05, "perf/tokens_per_sec": 25878.441473304934, "train/loss_math": 2.215102583169937, "train/loss_prose": 3.407879670461019, "train/loss_code": 0.8169746994972229} +{"step": 1770, "train/loss": 2.129494071006775, "train/lm_loss": 2.129494071006775, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.078720338432933e-05, "perf/tokens_per_sec": 26160.590679571436, "train/loss_math": 2.5063859621683755, "train/loss_code": 1.5345653593540192, "train/loss_prose": 3.378533363342285} +{"step": 1771, "train/loss": 2.5951567590236664, "train/lm_loss": 2.5951567590236664, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.077366650754624e-05, "perf/tokens_per_sec": 26109.97761941454, "train/loss_prose": 3.134167969226837, "train/loss_math": 2.1875497102737427, "train/loss_code": 1.9247414469718933} +{"step": 1772, "train/loss": 2.1443091332912445, "train/lm_loss": 2.1443091332912445, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.0760121942869725e-05, "perf/tokens_per_sec": 26908.50441143072, "train/loss_math": 2.4016387462615967, "train/loss_prose": 3.652259588241577, "train/loss_code": 1.7397871017456055} +{"step": 1773, "train/loss": 2.590255558490753, "train/lm_loss": 2.590255558490753, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.074656969690122e-05, "perf/tokens_per_sec": 24366.32766861542, "train/loss_math": 2.1794714530309043, "train/loss_code": 2.3857207894325256, "train/loss_prose": 3.1373958587646484} +{"step": 1774, "train/loss": 2.037076860666275, "train/lm_loss": 2.037076860666275, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.073300977624594e-05, "perf/tokens_per_sec": 25619.07395438935, "train/loss_math": 2.2580984234809875, "train/loss_prose": 3.425994396209717, "train/loss_code": 1.2794088125228882} +{"step": 1775, "train/loss": 2.529324471950531, "train/lm_loss": 2.529324471950531, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.071944218751282e-05, "perf/tokens_per_sec": 25661.167224554847, "train/loss_prose": 3.22310471534729, "train/loss_math": 2.537705103556315, "train/loss_code": 2.058423558870951} +{"step": 1776, "train/loss": 2.7178280353546143, "train/lm_loss": 2.7178280353546143, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.070586693731454e-05, "perf/tokens_per_sec": 25484.429138507, "train/loss_prose": 3.6796998182932534, "train/loss_code": 1.8966928720474243, "train/loss_math": 2.506723642349243} +{"step": 1777, "train/loss": 2.547096997499466, "train/lm_loss": 2.547096997499466, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.0692284032267516e-05, "perf/tokens_per_sec": 26389.48158252075, "train/loss_prose": 3.4759281277656555, "train/loss_math": 1.7615797519683838, "train/loss_code": 1.5704941749572754} +{"step": 1778, "train/loss": 2.5015558004379272, "train/lm_loss": 2.5015558004379272, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.067869347899188e-05, "perf/tokens_per_sec": 25499.59729328174, "train/loss_code": 1.8532111247380574, "train/loss_prose": 3.4881744384765625, "train/loss_math": 2.492154598236084} +{"step": 1779, "train/loss": 1.814675122499466, "train/lm_loss": 1.814675122499466, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.066509528411152e-05, "perf/tokens_per_sec": 25574.37717283379, "train/loss_code": 1.2206046283245087, "train/loss_math": 2.1207077503204346, "train/loss_prose": 3.2728588581085205} +{"step": 1780, "train/loss": 2.4484075009822845, "train/lm_loss": 2.4484075009822845, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.065148945425401e-05, "perf/tokens_per_sec": 25983.609910145635, "train/loss_prose": 3.479432741800944, "train/loss_code": 1.6973002552986145, "train/loss_math": 2.359760284423828} +{"step": 1781, "train/loss": 2.2623053193092346, "train/lm_loss": 2.2623053193092346, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.063787599605068e-05, "perf/tokens_per_sec": 24317.800091440153, "train/loss_prose": 3.3382481733957925, "train/loss_math": 1.9517462253570557, "train/loss_code": 1.1142297387123108} +{"step": 1782, "train/loss": 2.566572904586792, "train/lm_loss": 2.566572904586792, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.062425491613656e-05, "perf/tokens_per_sec": 26133.530250019776, "train/loss_code": 1.7527421861886978, "train/loss_math": 2.585197925567627, "train/loss_prose": 3.645471970240275} +{"step": 1783, "train/loss": 2.4536964297294617, "train/lm_loss": 2.4536964297294617, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.0610626221150394e-05, "perf/tokens_per_sec": 26791.80392866277, "train/loss_math": 2.4358646392822267, "train/loss_code": 2.1625200510025024, "train/loss_prose": 3.125208616256714} +{"step": 1784, "train/loss": 2.3898446559906006, "train/lm_loss": 2.3898446559906006, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.059698991773466e-05, "perf/tokens_per_sec": 25873.998367425622, "train/loss_prose": 3.664582371711731, "train/loss_code": 1.4533901413281758, "train/loss_math": 2.4764742056528726} +{"step": 1785, "train/loss": 2.6887535750865936, "train/lm_loss": 2.6887535750865936, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.0583346012535506e-05, "perf/tokens_per_sec": 25849.471848236266, "train/loss_math": 2.236721992492676, "train/loss_prose": 3.746342579523722, "train/loss_code": 1.3241130113601685} +{"step": 1786, "train/loss": 2.521104574203491, "train/lm_loss": 2.521104574203491, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.056969451220282e-05, "perf/tokens_per_sec": 26536.46889895985, "train/loss_prose": 3.585349162419637, "train/loss_math": 2.2463162740071616, "train/loss_code": 1.336920142173767} +{"step": 1787, "train/loss": 2.038950562477112, "train/lm_loss": 2.038950562477112, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.055603542339016e-05, "perf/tokens_per_sec": 25891.11660284262, "train/loss_code": 1.3662432034810383, "train/loss_math": 2.3343430161476135, "train/loss_prose": 2.8755030632019043} +{"step": 1788, "train/loss": 2.019592523574829, "train/lm_loss": 2.019592523574829, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.0542368752754825e-05, "perf/tokens_per_sec": 25827.088257755702, "train/loss_code": 1.3708301782608032, "train/loss_prose": 3.404058814048767, "train/loss_math": 2.494471311569214} +{"step": 1789, "train/loss": 2.8373967707157135, "train/lm_loss": 2.8373967707157135, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.052869450695776e-05, "perf/tokens_per_sec": 25845.03868360798, "train/loss_math": 2.2981042861938477, "train/loss_code": 1.667296826839447, "train/loss_prose": 3.413295125961304} +{"step": 1790, "train/loss": 1.9518311023712158, "train/lm_loss": 1.9518311023712158, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.0515012692663646e-05, "perf/tokens_per_sec": 25381.942315477194, "train/loss_prose": 3.270203113555908, "train/loss_math": 2.2477548122406006, "train/loss_code": 1.365297555923462} +{"step": 1791, "train/loss": 2.6039879620075226, "train/lm_loss": 2.6039879620075226, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.050132331654082e-05, "perf/tokens_per_sec": 25032.046777354422, "train/loss_math": 2.335354232788086, "train/loss_code": 1.23183012008667, "train/loss_prose": 3.9616504907608032} +{"step": 1792, "train/loss": 2.49336439371109, "train/lm_loss": 2.49336439371109, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.048762638526132e-05, "perf/tokens_per_sec": 25262.804551456815, "train/loss_prose": 3.4997125466664634, "train/loss_code": 1.3262454271316528, "train/loss_math": 2.2650957902272544} +{"step": 1793, "train/loss": 2.700348138809204, "train/lm_loss": 2.700348138809204, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.047392190550087e-05, "perf/tokens_per_sec": 25701.28415244957, "train/loss_code": 2.372305750846863, "train/loss_math": 2.178372383117676, "train/loss_prose": 3.44101881980896} +{"step": 1794, "train/loss": 2.9042540192604065, "train/lm_loss": 2.9042540192604065, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.046020988393885e-05, "perf/tokens_per_sec": 26256.38714252528, "train/loss_prose": 3.548889696598053, "train/loss_math": 2.2596183121204376} +{"step": 1795, "train/loss": 2.5328336656093597, "train/lm_loss": 2.5328336656093597, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.044649032725836e-05, "perf/tokens_per_sec": 26196.25257161329, "train/loss_prose": 3.562978823979696, "train/loss_math": 2.3445587952931723, "train/loss_code": 1.270027995109558} +{"step": 1796, "train/loss": 2.4901923537254333, "train/lm_loss": 2.4901923537254333, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.043276324214613e-05, "perf/tokens_per_sec": 26083.576155360915, "train/loss_code": 1.7636876702308655, "train/loss_math": 2.3557727932929993, "train/loss_prose": 3.485535979270935} +{"step": 1797, "train/loss": 2.5392589569091797, "train/lm_loss": 2.5392589569091797, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.041902863529256e-05, "perf/tokens_per_sec": 25921.118304100186, "train/loss_math": 2.05591282248497, "train/loss_prose": 3.555052399635315, "train/loss_code": 2.490158200263977} +{"step": 1798, "train/loss": 2.5651814937591553, "train/lm_loss": 2.5651814937591553, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.040528651339176e-05, "perf/tokens_per_sec": 26052.407263794004, "train/loss_prose": 3.038693348566691, "train/loss_math": 2.3973583579063416, "train/loss_code": 1.8159382343292236} +{"step": 1799, "train/loss": 2.7174757719039917, "train/lm_loss": 2.7174757719039917, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.039153688314145e-05, "perf/tokens_per_sec": 25930.077585556322, "train/loss_code": 1.7516026496887207, "train/loss_prose": 3.312848389148712, "train/loss_math": 2.4926035404205322} +{"step": 1800, "train/loss": 3.2989426255226135, "train/lm_loss": 3.2989426255226135, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.037777975124306e-05, "perf/tokens_per_sec": 25991.31475600808, "train/loss_code": 2.2544533014297485, "train/loss_math": 2.597078561782837, "train/loss_prose": 3.8571109771728516} +{"step": 1800, "eval/loss": 2.22566059573956, "eval/lm_loss": 2.22566059573956, "eval/lb_loss": 0.0, "eval/z_loss": 0.0, "eval/perplexity": 9.259597634716465, "eval/loss_code": 1.6007206242650611, "eval/ppl_code": 4.956602986107957, "eval/loss_prose": 3.48743702654253, "eval/ppl_prose": 32.702025783252445, "eval/loss_math": 2.177308349777333, "eval/ppl_math": 8.822527113437284} +{"step": 1801, "train/loss": 2.7361448407173157, "train/lm_loss": 2.7361448407173157, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.036401512440161e-05, "perf/tokens_per_sec": 26398.605356284006, "train/loss_math": 2.4184274673461914, "train/loss_code": 1.5340327024459839, "train/loss_prose": 3.8552699089050293} +{"step": 1802, "train/loss": 2.923318862915039, "train/lm_loss": 2.923318862915039, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.035024300932584e-05, "perf/tokens_per_sec": 24163.986080987946, "train/loss_prose": 3.5527018308639526, "train/loss_math": 2.293935865163803} +{"step": 1803, "train/loss": 2.6623531877994537, "train/lm_loss": 2.6623531877994537, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.033646341272811e-05, "perf/tokens_per_sec": 26169.237939286297, "train/loss_math": 2.150377333164215, "train/loss_code": 1.963070809841156, "train/loss_prose": 3.2679824233055115} +{"step": 1804, "train/loss": 2.6625587046146393, "train/lm_loss": 2.6625587046146393, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.0322676341324415e-05, "perf/tokens_per_sec": 25880.468659990118, "train/loss_math": 2.3139021396636963, "train/loss_prose": 3.4353528022766113, "train/loss_code": 0.6173524260520935} +{"step": 1805, "train/loss": 2.394455760717392, "train/lm_loss": 2.394455760717392, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.030888180183441e-05, "perf/tokens_per_sec": 26040.244737723628, "train/loss_math": 2.4018760919570923, "train/loss_code": 1.6797743439674377, "train/loss_prose": 3.0942968130111694} +{"step": 1806, "train/loss": 2.0545820593833923, "train/lm_loss": 2.0545820593833923, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.0295079800981395e-05, "perf/tokens_per_sec": 26793.47529304612, "train/loss_prose": 3.4255549907684326, "train/loss_code": 1.1999933570623398, "train/loss_math": 2.3927865028381348} +{"step": 1807, "train/loss": 2.4169912934303284, "train/lm_loss": 2.4169912934303284, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.028127034549229e-05, "perf/tokens_per_sec": 26220.161265903153, "train/loss_prose": 3.201003313064575, "train/loss_code": 1.678626537322998, "train/loss_math": 2.013573557138443} +{"step": 1808, "train/loss": 2.699060708284378, "train/lm_loss": 2.699060708284378, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.0267453442097664e-05, "perf/tokens_per_sec": 26488.312496627263, "train/loss_math": 2.567118525505066, "train/loss_prose": 3.631738543510437, "train/loss_code": 0.9656474888324738} +{"step": 1809, "train/loss": 2.817505359649658, "train/lm_loss": 2.817505359649658, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.02536290975317e-05, "perf/tokens_per_sec": 25973.239171813228, "train/loss_math": 2.229515790939331, "train/loss_prose": 3.4054946899414062} +{"step": 1810, "train/loss": 3.1682965755462646, "train/lm_loss": 3.1682965755462646, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.02397973185322e-05, "perf/tokens_per_sec": 26582.78509248342, "train/loss_math": 2.3467607498168945, "train/loss_prose": 3.892283022403717, "train/loss_code": 2.73695707321167} +{"step": 1811, "train/loss": 3.17412132024765, "train/lm_loss": 3.17412132024765, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.022595811184064e-05, "perf/tokens_per_sec": 26155.094472677007, "train/loss_prose": 3.874182367324829, "train/loss_math": 2.0241423845291138, "train/loss_code": 1.9737725257873535} +{"step": 1812, "train/loss": 2.2854265570640564, "train/lm_loss": 2.2854265570640564, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.021211148420205e-05, "perf/tokens_per_sec": 26035.588076882857, "train/loss_code": 1.6435926258563995, "train/loss_math": 2.1091991662979126, "train/loss_prose": 3.7453218698501587} +{"step": 1813, "train/loss": 2.402858078479767, "train/lm_loss": 2.402858078479767, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.019825744236514e-05, "perf/tokens_per_sec": 25669.44906488085, "train/loss_code": 1.5851839184761047, "train/loss_prose": 3.389423211415609, "train/loss_math": 2.149521589279175} +{"step": 1814, "train/loss": 2.43569353222847, "train/lm_loss": 2.43569353222847, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.018439599308217e-05, "perf/tokens_per_sec": 25831.12687119221, "train/loss_math": 2.0993184248606362, "train/loss_code": 1.3943498730659485, "train/loss_prose": 3.466297705968221} +{"step": 1815, "train/loss": 2.452742338180542, "train/lm_loss": 2.452742338180542, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.017052714310906e-05, "perf/tokens_per_sec": 25676.393058054797, "train/loss_prose": 3.4558791319529214, "train/loss_math": 2.8256375789642334, "train/loss_code": 1.607166051864624} +{"step": 1816, "train/loss": 2.573189914226532, "train/lm_loss": 2.573189914226532, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.015665089920531e-05, "perf/tokens_per_sec": 26014.10220985273, "train/loss_prose": 3.6373724937438965, "train/loss_math": 2.2788476943969727, "train/loss_code": 1.41842919588089} +{"step": 1817, "train/loss": 2.0590606927871704, "train/lm_loss": 2.0590606927871704, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.014276726813404e-05, "perf/tokens_per_sec": 26519.30565198935, "train/loss_code": 1.2114931742350261, "train/loss_math": 2.2373289664586387, "train/loss_prose": 3.06300950050354} +{"step": 1818, "train/loss": 2.16576611995697, "train/lm_loss": 2.16576611995697, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.012887625666195e-05, "perf/tokens_per_sec": 26327.203324792506, "train/loss_prose": 3.643609046936035, "train/loss_math": 2.1935778856277466, "train/loss_code": 1.636069377263387} +{"step": 1819, "train/loss": 2.2347868978977203, "train/lm_loss": 2.2347868978977203, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.011497787155938e-05, "perf/tokens_per_sec": 25106.3068516773, "train/loss_math": 2.4031585216522218, "train/loss_code": 1.0153003633022308, "train/loss_prose": 3.831902265548706} +{"step": 1820, "train/loss": 2.3074785470962524, "train/lm_loss": 2.3074785470962524, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.0101072119600196e-05, "perf/tokens_per_sec": 25137.899819292536, "train/loss_math": 2.18696665763855, "train/loss_code": 1.4547271430492401, "train/loss_prose": 3.4846510092417398} +{"step": 1821, "train/loss": 2.502673089504242, "train/lm_loss": 2.502673089504242, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.0087159007561916e-05, "perf/tokens_per_sec": 25249.438106071964, "train/loss_math": 2.6306581497192383, "train/loss_code": 1.494811773300171, "train/loss_prose": 3.8784701824188232} +{"step": 1822, "train/loss": 2.1083116233348846, "train/lm_loss": 2.1083116233348846, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.007323854222562e-05, "perf/tokens_per_sec": 25559.69034154731, "train/loss_code": 1.5588586807250977, "train/loss_prose": 3.250149726867676, "train/loss_math": 2.5719006061553955} +{"step": 1823, "train/loss": 2.4476691484451294, "train/lm_loss": 2.4476691484451294, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.005931073037596e-05, "perf/tokens_per_sec": 25504.442800878263, "train/loss_code": 1.753288745880127, "train/loss_math": 2.489925265312195, "train/loss_prose": 3.794173836708069} +{"step": 1824, "train/loss": 2.308199852705002, "train/lm_loss": 2.308199852705002, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.0045375578801214e-05, "perf/tokens_per_sec": 25999.18155527138, "train/loss_math": 2.2587509155273438, "train/loss_code": 1.3867346048355103, "train/loss_prose": 3.32856285572052} +{"step": 1825, "train/loss": 2.5701574087142944, "train/lm_loss": 2.5701574087142944, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.003143309429317e-05, "perf/tokens_per_sec": 26848.369916469364, "train/loss_math": 2.384589958190918, "train/loss_prose": 3.3816956281661987, "train/loss_code": 1.8749181032180786} +{"step": 1826, "train/loss": 1.9867632687091827, "train/lm_loss": 1.9867632687091827, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.001748328364724e-05, "perf/tokens_per_sec": 24708.215305403344, "train/loss_math": 2.337526798248291, "train/loss_prose": 3.1071945428848267, "train/loss_code": 1.468438160419464} +{"step": 1827, "train/loss": 3.4281508326530457, "train/lm_loss": 3.4281508326530457, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.000352615366239e-05, "perf/tokens_per_sec": 25694.05769391833, "train/loss_prose": 3.5608493259974887, "train/loss_code": 2.499260425567627} +{"step": 1828, "train/loss": 3.0100515484809875, "train/lm_loss": 3.0100515484809875, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.998956171114116e-05, "perf/tokens_per_sec": 25633.140686430295, "train/loss_math": 2.5867974758148193, "train/loss_prose": 3.264004039764404} +{"step": 1829, "train/loss": 2.310371220111847, "train/lm_loss": 2.310371220111847, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.997558996288965e-05, "perf/tokens_per_sec": 25148.16625435851, "train/loss_code": 1.1455848415692647, "train/loss_math": 2.350263237953186, "train/loss_prose": 3.997713327407837} +{"step": 1830, "train/loss": 2.930719256401062, "train/lm_loss": 2.930719256401062, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.9961610915717515e-05, "perf/tokens_per_sec": 26184.87329503627, "train/loss_prose": 3.7613487243652344, "train/loss_code": 1.5463364124298096} +{"step": 1831, "train/loss": 2.5525336265563965, "train/lm_loss": 2.5525336265563965, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.9947624576437975e-05, "perf/tokens_per_sec": 25317.90449860441, "train/loss_prose": 3.03057599067688, "train/loss_code": 1.7178505063056946, "train/loss_math": 2.431132435798645} +{"step": 1832, "train/loss": 3.055371403694153, "train/lm_loss": 3.055371403694153, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.9933630951867805e-05, "perf/tokens_per_sec": 25331.82911649044, "train/loss_math": 2.086824059486389, "train/loss_prose": 3.3782203594843545} +{"step": 1833, "train/loss": 2.288767457008362, "train/lm_loss": 2.288767457008362, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.9919630048827314e-05, "perf/tokens_per_sec": 23534.09961383509, "train/loss_prose": 3.322336435317993, "train/loss_code": 1.2497413754463196, "train/loss_math": 2.2914957404136658} +{"step": 1834, "train/loss": 2.1294513046741486, "train/lm_loss": 2.1294513046741486, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.99056218741404e-05, "perf/tokens_per_sec": 25552.5432767891, "train/loss_math": 2.1396950483322144, "train/loss_code": 1.538057565689087, "train/loss_prose": 3.862657308578491} +{"step": 1835, "train/loss": 2.868697464466095, "train/lm_loss": 2.868697464466095, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.989160643463445e-05, "perf/tokens_per_sec": 24033.839545619878, "train/loss_math": 2.3707834243774415, "train/loss_prose": 3.6985538800557456} +{"step": 1836, "train/loss": 2.626252770423889, "train/lm_loss": 2.626252770423889, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.987758373714044e-05, "perf/tokens_per_sec": 26011.108832640406, "train/loss_prose": 3.696943998336792, "train/loss_code": 1.35187824567159, "train/loss_math": 2.1666100025177} +{"step": 1837, "train/loss": 3.019303470849991, "train/lm_loss": 3.019303470849991, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.9863553788492834e-05, "perf/tokens_per_sec": 26198.52961386611, "train/loss_code": 1.8026152849197388, "train/loss_math": 2.5031219720840454, "train/loss_prose": 3.8857381343841553} +{"step": 1838, "train/loss": 2.4476780891418457, "train/lm_loss": 2.4476780891418457, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.984951659552968e-05, "perf/tokens_per_sec": 26471.74171519086, "train/loss_code": 1.341914415359497, "train/loss_prose": 3.5344876448313394, "train/loss_math": 2.476109266281128} +{"step": 1839, "train/loss": 2.524433195590973, "train/lm_loss": 2.524433195590973, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.983547216509254e-05, "perf/tokens_per_sec": 26304.46840071289, "train/loss_prose": 4.002233028411865, "train/loss_code": 1.511507272720337, "train/loss_math": 2.1427366733551025} +{"step": 1840, "train/loss": 2.5129595398902893, "train/lm_loss": 2.5129595398902893, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.982142050402649e-05, "perf/tokens_per_sec": 25065.720227665002, "train/loss_prose": 3.3584108352661133, "train/loss_code": 1.7673401435216267, "train/loss_math": 2.3632116317749023} +{"step": 1841, "train/loss": 2.489879786968231, "train/lm_loss": 2.489879786968231, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.980736161918013e-05, "perf/tokens_per_sec": 24535.309669956157, "train/loss_prose": 3.462047576904297, "train/loss_code": 1.3004823525746663, "train/loss_math": 2.1694016456604004} +{"step": 1842, "train/loss": 2.512272924184799, "train/lm_loss": 2.512272924184799, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.97932955174056e-05, "perf/tokens_per_sec": 23432.441198855377, "train/loss_math": 1.951329231262207, "train/loss_prose": 3.4471792380015054} +{"step": 1843, "train/loss": 2.586172938346863, "train/lm_loss": 2.586172938346863, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.977922220555855e-05, "perf/tokens_per_sec": 24371.927502184695, "train/loss_prose": 3.423218568166097, "train/loss_math": 2.3006338278452554, "train/loss_code": 1.7589130997657776} +{"step": 1844, "train/loss": 2.80969899892807, "train/lm_loss": 2.80969899892807, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.976514169049814e-05, "perf/tokens_per_sec": 24091.50520327187, "train/loss_math": 2.550536572933197, "train/loss_code": 1.5726189613342285, "train/loss_prose": 3.5676092306772866} +{"step": 1845, "train/loss": 2.0115458220243454, "train/lm_loss": 2.0115458220243454, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.9751053979087035e-05, "perf/tokens_per_sec": 25516.792024679293, "train/loss_code": 1.171927958726883, "train/loss_math": 2.2888489961624146, "train/loss_prose": 3.4134777784347534} +{"step": 1846, "train/loss": 2.5078386664390564, "train/lm_loss": 2.5078386664390564, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.973695907819142e-05, "perf/tokens_per_sec": 26305.51560121882, "train/loss_math": 2.1387446522712708, "train/loss_code": 1.574566086133321, "train/loss_prose": 3.687173922856649} +{"step": 1847, "train/loss": 2.8332454562187195, "train/lm_loss": 2.8332454562187195, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.9722856994680966e-05, "perf/tokens_per_sec": 27067.998518969012, "train/loss_math": 2.347258281707764, "train/loss_prose": 3.6432243982950845} +{"step": 1848, "train/loss": 2.6150554716587067, "train/lm_loss": 2.6150554716587067, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.9708747735428886e-05, "perf/tokens_per_sec": 25569.467002039026, "train/loss_math": 2.075601577758789, "train/loss_prose": 3.655505895614624, "train/loss_code": 1.8635609149932861} +{"step": 1849, "train/loss": 2.9266282320022583, "train/lm_loss": 2.9266282320022583, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.969463130731183e-05, "perf/tokens_per_sec": 26351.836795297742, "train/loss_prose": 3.4691454966863, "train/loss_code": 1.299076795578003} +{"step": 1850, "train/loss": 2.9438533782958984, "train/lm_loss": 2.9438533782958984, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.968050771720999e-05, "perf/tokens_per_sec": 26615.855275572252, "train/loss_prose": 3.313467264175415, "train/loss_math": 2.1368114948272705, "train/loss_code": 2.7098681926727295} +{"step": 1851, "train/loss": 2.274038851261139, "train/lm_loss": 2.274038851261139, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.966637697200703e-05, "perf/tokens_per_sec": 26855.21083220524, "train/loss_math": 2.412104061671666, "train/loss_code": 1.3075820207595825} +{"step": 1852, "train/loss": 2.458141654729843, "train/lm_loss": 2.458141654729843, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.965223907859011e-05, "perf/tokens_per_sec": 27329.367815049718, "train/loss_prose": 3.4269184271494546, "train/loss_math": 2.2819511890411377, "train/loss_code": 1.2692621350288391} +{"step": 1853, "train/loss": 2.6508179903030396, "train/lm_loss": 2.6508179903030396, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.963809404384985e-05, "perf/tokens_per_sec": 25299.747124303994, "train/loss_math": 2.46614933013916, "train/loss_prose": 3.586481809616089, "train/loss_code": 0.9641583859920502} +{"step": 1854, "train/loss": 3.023262143135071, "train/lm_loss": 3.023262143135071, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.962394187468039e-05, "perf/tokens_per_sec": 25982.470295367584, "train/loss_code": 1.9475295543670654, "train/loss_prose": 3.5660120010375977, "train/loss_math": 2.4609780311584473} +{"step": 1855, "train/loss": 2.169805556535721, "train/lm_loss": 2.169805556535721, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.960978257797931e-05, "perf/tokens_per_sec": 26017.293278309924, "train/loss_code": 1.71254829565684, "train/loss_prose": 3.0232657194137573, "train/loss_math": 2.0580894947052} +{"step": 1856, "train/loss": 2.707939565181732, "train/lm_loss": 2.707939565181732, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.9595616160647674e-05, "perf/tokens_per_sec": 26074.114769474538, "train/loss_math": 2.214658808708191, "train/loss_prose": 3.5300740400950112} +{"step": 1857, "train/loss": 2.512168675661087, "train/lm_loss": 2.512168675661087, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.958144262959004e-05, "perf/tokens_per_sec": 25899.89896851887, "train/loss_prose": 3.5410939852396646, "train/loss_math": 2.3767717679341636, "train/loss_code": 1.1718761026859283} +{"step": 1858, "train/loss": 2.4466127157211304, "train/lm_loss": 2.4466127157211304, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.9567261991714404e-05, "perf/tokens_per_sec": 25966.87940726141, "train/loss_math": 2.276904265085856, "train/loss_prose": 4.2012481689453125, "train/loss_code": 1.7102277278900146} +{"step": 1859, "train/loss": 2.407476097345352, "train/lm_loss": 2.407476097345352, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.955307425393224e-05, "perf/tokens_per_sec": 25490.2900009941, "train/loss_code": 1.3877451022466023, "train/loss_math": 2.3485605716705322, "train/loss_prose": 3.466484228769938} +{"step": 1860, "train/loss": 2.7580707669258118, "train/lm_loss": 2.7580707669258118, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.953887942315847e-05, "perf/tokens_per_sec": 26199.608350998125, "train/loss_math": 2.0875293612480164, "train/loss_prose": 3.757731278737386, "train/loss_code": 2.205437699953715} +{"step": 1861, "train/loss": 2.706043630838394, "train/lm_loss": 2.706043630838394, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.95246775063115e-05, "perf/tokens_per_sec": 26485.576480382333, "train/loss_prose": 3.6851227283477783, "train/loss_math": 2.3248794078826904, "train/loss_code": 1.8091719150543213} +{"step": 1862, "train/loss": 2.1208358705043793, "train/lm_loss": 2.1208358705043793, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.951046851031315e-05, "perf/tokens_per_sec": 25846.827321417826, "train/loss_math": 2.0643273293972015, "train/loss_code": 1.628475268681844, "train/loss_prose": 3.8239521980285645} +{"step": 1863, "train/loss": 2.0957027971744537, "train/lm_loss": 2.0957027971744537, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.9496252442088733e-05, "perf/tokens_per_sec": 26220.601466716525, "train/loss_code": 1.4967593848705292, "train/loss_prose": 3.055495500564575, "train/loss_math": 2.333796977996826} +{"step": 1864, "train/loss": 1.7873357236385345, "train/lm_loss": 1.7873357236385345, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.948202930856697e-05, "perf/tokens_per_sec": 24243.539617603936, "train/loss_code": 1.2597979456186295, "train/loss_math": 1.925917148590088, "train/loss_prose": 3.4817428588867188} +{"step": 1865, "train/loss": 2.3958632946014404, "train/lm_loss": 2.3958632946014404, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.946779911668006e-05, "perf/tokens_per_sec": 25789.674720372048, "train/loss_prose": 3.476559638977051, "train/loss_math": 2.4580159187316895, "train/loss_code": 1.2737317482630413} +{"step": 1866, "train/loss": 2.552548825740814, "train/lm_loss": 2.552548825740814, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.9453561873363615e-05, "perf/tokens_per_sec": 25629.39314565155, "train/loss_prose": 3.8670642375946045, "train/loss_code": 2.3847160935401917, "train/loss_math": 2.338153839111328} +{"step": 1867, "train/loss": 2.108269214630127, "train/lm_loss": 2.108269214630127, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.943931758555669e-05, "perf/tokens_per_sec": 25668.490245002973, "train/loss_math": 2.3981138467788696, "train/loss_prose": 3.654714345932007, "train/loss_code": 1.2063276767730713} +{"step": 1868, "train/loss": 2.679762750864029, "train/lm_loss": 2.679762750864029, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.9425066260201796e-05, "perf/tokens_per_sec": 27109.772033169484, "train/loss_math": 2.3083189328511557, "train/loss_prose": 3.8839027086893716, "train/loss_code": 1.4307184219360352} +{"step": 1869, "train/loss": 2.286712557077408, "train/lm_loss": 2.286712557077408, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.941080790424484e-05, "perf/tokens_per_sec": 25895.95592529005, "train/loss_code": 1.6390959819157918, "train/loss_math": 2.0862499872843423, "train/loss_prose": 3.5588310956954956} +{"step": 1870, "train/loss": 2.169801414012909, "train/lm_loss": 2.169801414012909, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.9396542524635175e-05, "perf/tokens_per_sec": 26015.20520671494, "train/loss_code": 1.4570622742176056, "train/loss_math": 2.2065320014953613, "train/loss_prose": 3.5585490465164185} +{"step": 1871, "train/loss": 2.6694645285606384, "train/lm_loss": 2.6694645285606384, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.938227012832557e-05, "perf/tokens_per_sec": 26826.31807386194, "train/loss_code": 1.6659271717071533, "train/loss_prose": 3.1588975191116333, "train/loss_math": 2.3513995011647544} +{"step": 1872, "train/loss": 2.399461567401886, "train/lm_loss": 2.399461567401886, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.936799072227222e-05, "perf/tokens_per_sec": 25938.494776780793, "train/loss_code": 1.6223442554473877, "train/loss_prose": 3.2941884199778237, "train/loss_math": 2.223047614097595} +{"step": 1873, "train/loss": 2.326870322227478, "train/lm_loss": 2.326870322227478, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.935370431343475e-05, "perf/tokens_per_sec": 25955.659274233145, "train/loss_math": 2.425457406044006, "train/loss_code": 1.5281376838684082, "train/loss_prose": 3.431400775909424} +{"step": 1874, "train/loss": 2.6107768416404724, "train/lm_loss": 2.6107768416404724, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.933941090877615e-05, "perf/tokens_per_sec": 26720.506019936325, "train/loss_prose": 3.061295509338379, "train/loss_code": 2.066959321498871, "train/loss_math": 2.522802988688151} +{"step": 1875, "train/loss": 1.9599878787994385, "train/lm_loss": 1.9599878787994385, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.932511051526289e-05, "perf/tokens_per_sec": 25600.36774209892, "train/loss_code": 1.6028721233208973, "train/loss_math": 2.405703067779541, "train/loss_prose": 3.6569666862487793} +{"step": 1876, "train/loss": 2.554233819246292, "train/lm_loss": 2.554233819246292, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.9310803139864775e-05, "perf/tokens_per_sec": 26205.52300703802, "train/loss_math": 2.223206361134847, "train/loss_prose": 3.5037384827931723, "train/loss_code": 1.6265179812908173} +{"step": 1877, "train/loss": 1.9484160244464874, "train/lm_loss": 1.9484160244464874, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.9296488789555066e-05, "perf/tokens_per_sec": 25189.537588120067, "train/loss_code": 1.501641857624054, "train/loss_prose": 3.6143627166748047, "train/loss_math": 2.2323780059814453} +{"step": 1878, "train/loss": 2.3334679305553436, "train/lm_loss": 2.3334679305553436, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.928216747131039e-05, "perf/tokens_per_sec": 25640.25638063683, "train/loss_prose": 3.574572205543518, "train/loss_math": 2.3265349864959717, "train/loss_code": 1.5129976868629456} +{"step": 1879, "train/loss": 2.804246038198471, "train/lm_loss": 2.804246038198471, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.92678391921108e-05, "perf/tokens_per_sec": 25598.3079121426, "train/loss_code": 1.6836496591567993, "train/loss_math": 2.340580940246582, "train/loss_prose": 3.596376657485962} +{"step": 1880, "train/loss": 2.3784143328666687, "train/lm_loss": 2.3784143328666687, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.925350395893971e-05, "perf/tokens_per_sec": 26840.148643687284, "train/loss_code": 1.6370567083358765, "train/loss_math": 2.372904618581136, "train/loss_prose": 3.4987157583236694} +{"step": 1881, "train/loss": 1.8351426422595978, "train/lm_loss": 1.8351426422595978, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.923916177878394e-05, "perf/tokens_per_sec": 25546.311866634747, "train/loss_code": 1.1356026768684386, "train/loss_math": 2.2029314041137695, "train/loss_prose": 3.400098204612732} +{"step": 1882, "train/loss": 2.4105397164821625, "train/lm_loss": 2.4105397164821625, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.92248126586337e-05, "perf/tokens_per_sec": 25822.81802139793, "train/loss_math": 2.5067113637924194, "train/loss_code": 1.5542234182357788, "train/loss_prose": 3.202741781870524} +{"step": 1883, "train/loss": 2.1863380670547485, "train/lm_loss": 2.1863380670547485, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.9210456605482576e-05, "perf/tokens_per_sec": 26924.191968756415, "train/loss_code": 1.1792278091112773, "train/loss_prose": 3.7471132278442383, "train/loss_math": 2.1529313723246255} +{"step": 1884, "train/loss": 2.2585326433181763, "train/lm_loss": 2.2585326433181763, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.919609362632753e-05, "perf/tokens_per_sec": 26272.729500462607, "train/loss_code": 1.6076946655909221, "train/loss_math": 2.129820942878723, "train/loss_prose": 4.725893497467041} +{"step": 1885, "train/loss": 2.509163498878479, "train/lm_loss": 2.509163498878479, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.9181723728168916e-05, "perf/tokens_per_sec": 25939.47387461385, "train/loss_prose": 3.1943143208821616, "train/loss_math": 2.352544903755188, "train/loss_code": 1.9284250736236572} +{"step": 1886, "train/loss": 2.5684802532196045, "train/lm_loss": 2.5684802532196045, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.9167346918010425e-05, "perf/tokens_per_sec": 26686.01471935978, "train/loss_prose": 3.5711090564727783, "train/loss_math": 2.4314326643943787, "train/loss_code": 1.839945912361145} +{"step": 1887, "train/loss": 2.380971133708954, "train/lm_loss": 2.380971133708954, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.915296320285917e-05, "perf/tokens_per_sec": 26166.567438063354, "train/loss_math": 2.133821487426758, "train/loss_code": 1.3722666501998901, "train/loss_prose": 3.5544420878092446} +{"step": 1888, "train/loss": 3.0128904283046722, "train/lm_loss": 3.0128904283046722, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.9138572589725576e-05, "perf/tokens_per_sec": 25722.987937952927, "train/loss_prose": 3.580416202545166, "train/loss_code": 1.0062882900238037, "train/loss_math": 2.5973774194717407} +{"step": 1889, "train/loss": 2.3501123189926147, "train/lm_loss": 2.3501123189926147, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.912417508562345e-05, "perf/tokens_per_sec": 25615.827039664575, "train/loss_prose": 3.5631231466929116, "train/loss_math": 2.133077621459961, "train/loss_code": 0.856147974729538} +{"step": 1890, "train/loss": 2.7537210881710052, "train/lm_loss": 2.7537210881710052, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.910977069756998e-05, "perf/tokens_per_sec": 26260.36043918418, "train/loss_prose": 3.548230528831482, "train/loss_code": 1.6123661398887634, "train/loss_math": 2.306057095527649} +{"step": 1891, "train/loss": 1.8275049924850464, "train/lm_loss": 1.8275049924850464, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.909535943258567e-05, "perf/tokens_per_sec": 26882.146682501905, "train/loss_math": 2.2868404388427734, "train/loss_code": 1.3582563996315002, "train/loss_prose": 3.2550771236419678} +{"step": 1892, "train/loss": 2.5196208357810974, "train/lm_loss": 2.5196208357810974, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.908094129769442e-05, "perf/tokens_per_sec": 25844.299970665445, "train/loss_math": 2.2531487743059793, "train/loss_prose": 3.319037079811096} +{"step": 1893, "train/loss": 2.3481377959251404, "train/lm_loss": 2.3481377959251404, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.906651629992342e-05, "perf/tokens_per_sec": 25449.471280986225, "train/loss_math": 2.5344061851501465, "train/loss_code": 1.6222220659255981, "train/loss_prose": 3.6137006282806396} +{"step": 1894, "train/loss": 2.420686721801758, "train/lm_loss": 2.420686721801758, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.905208444630327e-05, "perf/tokens_per_sec": 27133.449550036166, "train/loss_prose": 3.350642760594686, "train/loss_math": 2.55925714969635, "train/loss_code": 1.3983504970868428} +{"step": 1895, "train/loss": 2.5240844786167145, "train/lm_loss": 2.5240844786167145, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.903764574386786e-05, "perf/tokens_per_sec": 25017.32030714164, "train/loss_math": 2.313010851542155, "train/loss_prose": 3.4867785771687827, "train/loss_code": 1.3966538310050964} +{"step": 1896, "train/loss": 2.1821862757205963, "train/lm_loss": 2.1821862757205963, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.902320019965445e-05, "perf/tokens_per_sec": 25743.378198279464, "train/loss_math": 2.362064480781555, "train/loss_prose": 3.117885112762451, "train/loss_code": 1.6304492950439453} +{"step": 1897, "train/loss": 2.759652316570282, "train/lm_loss": 2.759652316570282, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.900874782070362e-05, "perf/tokens_per_sec": 26098.871395996423, "train/loss_math": 2.160407384236654, "train/loss_prose": 3.3633021116256714, "train/loss_code": 2.142787218093872} +{"step": 1898, "train/loss": 2.3744634091854095, "train/lm_loss": 2.3744634091854095, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.899428861405928e-05, "perf/tokens_per_sec": 26094.43155171916, "train/loss_math": 2.1644335746765138, "train/loss_prose": 3.564355731010437, "train/loss_code": 1.044828176498413} +{"step": 1899, "train/loss": 2.0231784284114838, "train/lm_loss": 2.0231784284114838, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.897982258676867e-05, "perf/tokens_per_sec": 26339.83736636417, "train/loss_code": 1.5084651410579681, "train/loss_math": 2.0185499787330627, "train/loss_prose": 3.0572333335876465} +{"step": 1900, "train/loss": 2.6358027160167694, "train/lm_loss": 2.6358027160167694, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.8965349745882365e-05, "perf/tokens_per_sec": 26466.032458879516, "train/loss_math": 2.5883333683013916, "train/loss_prose": 3.733723819255829, "train/loss_code": 1.1877313653628032} +{"step": 1901, "train/loss": 2.344193935394287, "train/lm_loss": 2.344193935394287, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.895087009845425e-05, "perf/tokens_per_sec": 25336.8352665476, "train/loss_math": 2.34527595837911, "train/loss_prose": 3.6394453048706055, "train/loss_code": 1.4796109596888225} +{"step": 1902, "train/loss": 2.3767260909080505, "train/lm_loss": 2.3767260909080505, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.893638365154152e-05, "perf/tokens_per_sec": 24461.49305091817, "train/loss_prose": 3.445622682571411, "train/loss_math": 2.160443353652954, "train/loss_code": 1.3203459978103638} +{"step": 1903, "train/loss": 2.437255769968033, "train/lm_loss": 2.437255769968033, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.8921890412204705e-05, "perf/tokens_per_sec": 25844.14445753209, "train/loss_prose": 3.322895050048828, "train/loss_math": 2.645514726638794, "train/loss_code": 1.1869840621948242} +{"step": 1904, "train/loss": 2.730901777744293, "train/lm_loss": 2.730901777744293, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.8907390387507625e-05, "perf/tokens_per_sec": 26459.592112578874, "train/loss_math": 1.9894161224365234, "train/loss_prose": 3.3449562788009644, "train/loss_code": 2.4991402626037598} +{"step": 1905, "train/loss": 3.0900001525878906, "train/lm_loss": 3.0900001525878906, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.8892883584517415e-05, "perf/tokens_per_sec": 25178.16720306244, "train/loss_math": 2.1041622161865234, "train/loss_prose": 3.4186127185821533} +{"step": 1906, "train/loss": 2.7953323125839233, "train/lm_loss": 2.7953323125839233, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.887837001030452e-05, "perf/tokens_per_sec": 26320.265815260354, "train/loss_code": 1.6306874752044678, "train/loss_prose": 3.293358850479126, "train/loss_math": 2.132588028907776} +{"step": 1907, "train/loss": 2.422928273677826, "train/lm_loss": 2.422928273677826, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.8863849671942685e-05, "perf/tokens_per_sec": 25968.44942991413, "train/loss_code": 1.3668840527534485, "train/loss_math": 2.1824790239334106, "train/loss_prose": 5.7372636795043945} +{"step": 1908, "train/loss": 2.3005655705928802, "train/lm_loss": 2.3005655705928802, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.8849322576508934e-05, "perf/tokens_per_sec": 25713.51688753319, "train/loss_prose": 3.5316616694132485, "train/loss_code": 0.9971923232078552, "train/loss_math": 1.9383849302927654} +{"step": 1909, "train/loss": 2.5688671469688416, "train/lm_loss": 2.5688671469688416, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.883478873108361e-05, "perf/tokens_per_sec": 25986.911427521227, "train/loss_math": 2.3633879025777182, "train/loss_code": 2.3682138125101724, "train/loss_prose": 3.17806613445282} +{"step": 1910, "train/loss": 2.2874095141887665, "train/lm_loss": 2.2874095141887665, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.8820248142750316e-05, "perf/tokens_per_sec": 26285.230868092425, "train/loss_code": 1.6311084429423015, "train/loss_math": 2.262878974278768, "train/loss_prose": 3.3086568117141724} +{"step": 1911, "train/loss": 3.1724935173988342, "train/lm_loss": 3.1724935173988342, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.880570081859597e-05, "perf/tokens_per_sec": 26708.04400797827, "train/loss_math": 2.431432008743286, "train/loss_prose": 3.5305917660395303, "train/loss_code": 1.7649645805358887} +{"step": 1912, "train/loss": 2.038965404033661, "train/lm_loss": 2.038965404033661, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.879114676571076e-05, "perf/tokens_per_sec": 26100.814302990526, "train/loss_code": 1.4948887586593629, "train/loss_math": 2.227255344390869, "train/loss_prose": 3.3050119876861572} +{"step": 1913, "train/loss": 1.92732572555542, "train/lm_loss": 1.92732572555542, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.877658599118815e-05, "perf/tokens_per_sec": 26627.694848633266, "train/loss_code": 1.5578434228897096, "train/loss_math": 2.3215038776397705, "train/loss_prose": 2.9863815307617188} +{"step": 1914, "train/loss": 1.9530005156993866, "train/lm_loss": 1.9530005156993866, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.8762018502124894e-05, "perf/tokens_per_sec": 25328.878656350254, "train/loss_code": 1.6765387773513794, "train/loss_math": 2.4137701193491616} +{"step": 1915, "train/loss": 2.7325985729694366, "train/lm_loss": 2.7325985729694366, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.8747444305621e-05, "perf/tokens_per_sec": 25322.569219725814, "train/loss_prose": 3.539740562438965, "train/loss_math": 2.3301392793655396, "train/loss_code": 1.9210093021392822} +{"step": 1916, "train/loss": 2.659912586212158, "train/lm_loss": 2.659912586212158, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.873286340877975e-05, "perf/tokens_per_sec": 26177.37237519561, "train/loss_math": 2.2901817162831626, "train/loss_prose": 4.044453740119934, "train/loss_code": 2.1066161394119263} +{"step": 1917, "train/loss": 2.4676892161369324, "train/lm_loss": 2.4676892161369324, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.8718275818707715e-05, "perf/tokens_per_sec": 26225.404617721364, "train/loss_math": 2.2456445693969727, "train/loss_code": 1.6429083943367004, "train/loss_prose": 3.2395874659220376} +{"step": 1918, "train/loss": 2.6035734117031097, "train/lm_loss": 2.6035734117031097, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.870368154251469e-05, "perf/tokens_per_sec": 26795.397619901738, "train/loss_code": 1.6957419713338215, "train/loss_math": 2.4777549505233765, "train/loss_prose": 3.59528382619222} +{"step": 1919, "train/loss": 2.6148476600646973, "train/lm_loss": 2.6148476600646973, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.868908058731376e-05, "perf/tokens_per_sec": 25838.702676687386, "train/loss_prose": 3.2654170393943787, "train/loss_math": 2.220968723297119, "train/loss_code": 1.1942062377929688} +{"step": 1920, "train/loss": 2.2813107073307037, "train/lm_loss": 2.2813107073307037, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.867447296022124e-05, "perf/tokens_per_sec": 26998.02492386122, "train/loss_prose": 3.273810863494873, "train/loss_code": 1.8078968822956085, "train/loss_math": 2.2356383204460144} +{"step": 1921, "train/loss": 2.684436559677124, "train/lm_loss": 2.684436559677124, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.865985866835673e-05, "perf/tokens_per_sec": 25830.350116673482, "train/loss_math": 2.2333560943603517, "train/loss_prose": 3.436237176259359} +{"step": 1922, "train/loss": 2.3016045689582825, "train/lm_loss": 2.3016045689582825, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.8645237718843044e-05, "perf/tokens_per_sec": 26281.812914289734, "train/loss_prose": 3.3823790550231934, "train/loss_code": 1.8702765703201294, "train/loss_math": 2.0834858417510986} +{"step": 1923, "train/loss": 2.609010338783264, "train/lm_loss": 2.609010338783264, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.8630610118806254e-05, "perf/tokens_per_sec": 26420.121865662346, "train/loss_math": 2.127898156642914, "train/loss_code": 1.9208830992380779, "train/loss_prose": 3.6178789933522544} +{"step": 1924, "train/loss": 3.102226495742798, "train/lm_loss": 3.102226495742798, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.861597587537568e-05, "perf/tokens_per_sec": 25671.290199858046, "train/loss_math": 2.2313422362009683, "train/loss_prose": 3.624756956100464} +{"step": 1925, "train/loss": 2.6272488832473755, "train/lm_loss": 2.6272488832473755, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.860133499568387e-05, "perf/tokens_per_sec": 26187.587452422216, "train/loss_prose": 3.637995958328247, "train/loss_math": 2.209955851236979, "train/loss_code": 1.7370678186416626} +{"step": 1926, "train/loss": 2.493009388446808, "train/lm_loss": 2.493009388446808, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.858668748686662e-05, "perf/tokens_per_sec": 26072.254969776895, "train/loss_math": 2.1627758344014487, "train/loss_prose": 3.629979054133097, "train/loss_code": 1.2829049229621887} +{"step": 1927, "train/loss": 2.1994258165359497, "train/lm_loss": 2.1994258165359497, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.8572033356062943e-05, "perf/tokens_per_sec": 25913.845888370506, "train/loss_prose": 3.251283884048462, "train/loss_math": 2.237552007039388, "train/loss_code": 1.4600608348846436} +{"step": 1928, "train/loss": 3.0123698711395264, "train/lm_loss": 3.0123698711395264, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.8557372610415074e-05, "perf/tokens_per_sec": 25804.472084245182, "train/loss_prose": 3.8895397186279297, "train/loss_code": 1.3640905618667603, "train/loss_math": 2.392236073811849} +{"step": 1929, "train/loss": 2.2125007808208466, "train/lm_loss": 2.2125007808208466, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.85427052570685e-05, "perf/tokens_per_sec": 27211.06504729503, "train/loss_math": 2.315013329188029, "train/loss_code": 1.1574066480000813, "train/loss_prose": 3.6413729190826416} +{"step": 1930, "train/loss": 2.231519967317581, "train/lm_loss": 2.231519967317581, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.8528031303171895e-05, "perf/tokens_per_sec": 26258.795057561918, "train/loss_code": 0.6641972064971924, "train/loss_math": 2.2676879167556763, "train/loss_prose": 3.5818350315093994} +{"step": 1931, "train/loss": 2.733902096748352, "train/lm_loss": 2.733902096748352, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.851335075587718e-05, "perf/tokens_per_sec": 26229.328496596114, "train/loss_prose": 3.83791176478068, "train/loss_math": 2.2205843329429626, "train/loss_code": 1.4751437902450562} +{"step": 1932, "train/loss": 2.358890175819397, "train/lm_loss": 2.358890175819397, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.849866362233947e-05, "perf/tokens_per_sec": 25787.661731749235, "train/loss_math": 2.2284730672836304, "train/loss_prose": 3.282150983810425, "train/loss_code": 1.1696240901947021} +{"step": 1933, "train/loss": 2.6586380302906036, "train/lm_loss": 2.6586380302906036, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.8483969909717087e-05, "perf/tokens_per_sec": 25681.920244921876, "train/loss_prose": 3.6348975896835327, "train/loss_code": 1.6823783814907074} +{"step": 1934, "train/loss": 2.19436976313591, "train/lm_loss": 2.19436976313591, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.8469269625171576e-05, "perf/tokens_per_sec": 25847.877289352487, "train/loss_math": 2.289724508921305, "train/loss_prose": 3.308825373649597, "train/loss_code": 1.3560446500778198} +{"step": 1935, "train/loss": 2.438058227300644, "train/lm_loss": 2.438058227300644, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.8454562775867684e-05, "perf/tokens_per_sec": 26021.706909739736, "train/loss_code": 1.6576364437739055, "train/loss_prose": 3.0589462518692017, "train/loss_math": 2.295771360397339} +{"step": 1936, "train/loss": 2.2505844235420227, "train/lm_loss": 2.2505844235420227, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.843984936897334e-05, "perf/tokens_per_sec": 25995.011558638264, "train/loss_code": 1.5817633271217346, "train/loss_math": 2.1325167417526245, "train/loss_prose": 3.4309176206588745} +{"step": 1937, "train/loss": 2.4988842606544495, "train/lm_loss": 2.4988842606544495, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.842512941165968e-05, "perf/tokens_per_sec": 25864.922026753386, "train/loss_math": 2.1830067157745363, "train/loss_prose": 3.0253470738728843} +{"step": 1938, "train/loss": 1.9011364877223969, "train/lm_loss": 1.9011364877223969, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.841040291110103e-05, "perf/tokens_per_sec": 26223.803371875598, "train/loss_math": 2.1731759309768677, "train/loss_code": 1.6290970146656036} +{"step": 1939, "train/loss": 2.552281439304352, "train/lm_loss": 2.552281439304352, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.8395669874474915e-05, "perf/tokens_per_sec": 25729.53705051594, "train/loss_math": 2.3358577887217202, "train/loss_prose": 3.5935541788736978, "train/loss_code": 1.3150078654289246} +{"step": 1940, "train/loss": 2.9171287417411804, "train/lm_loss": 2.9171287417411804, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.8380930308962036e-05, "perf/tokens_per_sec": 26772.431329281597, "train/loss_code": 1.3059868812561035, "train/loss_prose": 3.4541760683059692} +{"step": 1941, "train/loss": 2.0949225425720215, "train/lm_loss": 2.0949225425720215, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.836618422174628e-05, "perf/tokens_per_sec": 26082.62574790753, "train/loss_math": 2.145743429660797, "train/loss_prose": 3.0169625282287598, "train/loss_code": 1.0712402164936066} +{"step": 1942, "train/loss": 2.7409225702285767, "train/lm_loss": 2.7409225702285767, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.835143162001472e-05, "perf/tokens_per_sec": 26269.67631169713, "train/loss_code": 1.70012629032135, "train/loss_prose": 3.5896613597869873, "train/loss_math": 2.4683568477630615} +{"step": 1943, "train/loss": 2.8897268176078796, "train/lm_loss": 2.8897268176078796, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.8336672510957574e-05, "perf/tokens_per_sec": 26879.959545196383, "train/loss_prose": 3.5157291889190674, "train/loss_code": 2.02999347448349, "train/loss_math": 2.4974554777145386} +{"step": 1944, "train/loss": 2.2109776437282562, "train/lm_loss": 2.2109776437282562, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.832190690176825e-05, "perf/tokens_per_sec": 26241.427837815132, "train/loss_math": 2.3033804098765054, "train/loss_code": 1.3489043712615967, "train/loss_prose": 3.365483283996582} +{"step": 1945, "train/loss": 2.4641460180282593, "train/lm_loss": 2.4641460180282593, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.830713479964335e-05, "perf/tokens_per_sec": 26408.222556298515, "train/loss_code": 1.8938253323237102, "train/loss_math": 2.3467325369517007, "train/loss_prose": 3.4957470893859863} +{"step": 1946, "train/loss": 2.3633937537670135, "train/lm_loss": 2.3633937537670135, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.82923562117826e-05, "perf/tokens_per_sec": 26066.202565370833, "train/loss_code": 1.7025222480297089, "train/loss_math": 2.350781559944153, "train/loss_prose": 3.697748303413391} +{"step": 1947, "train/loss": 2.84625780582428, "train/lm_loss": 2.84625780582428, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.827757114538892e-05, "perf/tokens_per_sec": 25775.977122520842, "train/loss_prose": 3.4580318927764893, "train/loss_math": 2.017374634742737, "train/loss_code": 2.451593041419983} +{"step": 1948, "train/loss": 2.6832193732261658, "train/lm_loss": 2.6832193732261658, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.826277960766835e-05, "perf/tokens_per_sec": 24563.689763097278, "train/loss_prose": 3.4519523779551187, "train/loss_math": 2.4128357470035553, "train/loss_code": 1.4585548639297485} +{"step": 1949, "train/loss": 2.081167757511139, "train/lm_loss": 2.081167757511139, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.824798160583012e-05, "perf/tokens_per_sec": 25982.70606953376, "train/loss_math": 2.2084065973758698, "train/loss_code": 1.4523609479268391, "train/loss_prose": 3.458632230758667} +{"step": 1950, "train/loss": 2.100388079881668, "train/lm_loss": 2.100388079881668, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.823317714708661e-05, "perf/tokens_per_sec": 26867.936648738934, "train/loss_math": 2.0372931957244873, "train/loss_prose": 3.332269072532654, "train/loss_code": 0.9946970045566559} +{"step": 1951, "train/loss": 2.141287535429001, "train/lm_loss": 2.141287535429001, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.821836623865329e-05, "perf/tokens_per_sec": 25929.764493158294, "train/loss_math": 2.142833411693573, "train/loss_prose": 3.5595213174819946, "train/loss_code": 1.431397944688797} +{"step": 1952, "train/loss": 2.7273153364658356, "train/lm_loss": 2.7273153364658356, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.8203548887748865e-05, "perf/tokens_per_sec": 26459.5513608753, "train/loss_math": 2.506060004234314, "train/loss_code": 1.7441149950027466, "train/loss_prose": 3.3295432329177856} +{"step": 1953, "train/loss": 1.9711633026599884, "train/lm_loss": 1.9711633026599884, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.8188725101595094e-05, "perf/tokens_per_sec": 27265.738519893985, "train/loss_prose": 3.2905850410461426, "train/loss_code": 1.2047010362148285, "train/loss_math": 2.5533058643341064} +{"step": 1954, "train/loss": 2.248240292072296, "train/lm_loss": 2.248240292072296, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.8173894887416945e-05, "perf/tokens_per_sec": 25386.668179830005, "train/loss_prose": 3.8386893272399902, "train/loss_math": 2.2590649127960205, "train/loss_code": 1.4476034343242645} +{"step": 1955, "train/loss": 2.8367186188697815, "train/lm_loss": 2.8367186188697815, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.8159058252442446e-05, "perf/tokens_per_sec": 25617.660490347826, "train/loss_code": 1.4726550579071045, "train/loss_prose": 3.5193416118621825, "train/loss_math": 2.1517298221588135} +{"step": 1956, "train/loss": 2.5835298001766205, "train/lm_loss": 2.5835298001766205, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.8144215203902834e-05, "perf/tokens_per_sec": 26207.601763458577, "train/loss_math": 2.046708643436432, "train/loss_prose": 3.4049429297447205, "train/loss_code": 1.4775246977806091} +{"step": 1957, "train/loss": 2.3871973752975464, "train/lm_loss": 2.3871973752975464, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.81293657490324e-05, "perf/tokens_per_sec": 26374.24881023292, "train/loss_math": 2.0951164166132608, "train/loss_prose": 4.301537394523621, "train/loss_code": 1.4030513763427734} +{"step": 1958, "train/loss": 2.567011684179306, "train/lm_loss": 2.567011684179306, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.8114509895068586e-05, "perf/tokens_per_sec": 26089.755492482054, "train/loss_math": 2.377377927303314, "train/loss_prose": 3.3710867166519165, "train/loss_code": 1.1484956741333008} +{"step": 1959, "train/loss": 2.310571163892746, "train/lm_loss": 2.310571163892746, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.8099647649251986e-05, "perf/tokens_per_sec": 25248.436192836034, "train/loss_code": 1.4156324863433838, "train/loss_prose": 3.8879224061965942, "train/loss_math": 2.5230965614318848} +{"step": 1960, "train/loss": 2.4388265311717987, "train/lm_loss": 2.4388265311717987, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.808477901882624e-05, "perf/tokens_per_sec": 25906.38222308008, "train/loss_prose": 3.306077241897583, "train/loss_code": 1.7709455092748005, "train/loss_math": 2.139772057533264} +{"step": 1961, "train/loss": 2.8801297545433044, "train/lm_loss": 2.8801297545433044, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.8069904011038165e-05, "perf/tokens_per_sec": 26904.627512148676, "train/loss_code": 2.0057228803634644, "train/loss_prose": 3.5799055695533752, "train/loss_math": 2.3549846410751343} +{"step": 1962, "train/loss": 2.585467576980591, "train/lm_loss": 2.585467576980591, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.805502263313765e-05, "perf/tokens_per_sec": 25994.775561093484, "train/loss_math": 2.349669408798218, "train/loss_prose": 2.978464206059774} +{"step": 1963, "train/loss": 2.5289251506328583, "train/lm_loss": 2.5289251506328583, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.80401348923777e-05, "perf/tokens_per_sec": 26091.41966044447, "train/loss_code": 1.8735973834991455, "train/loss_math": 2.4555713534355164, "train/loss_prose": 3.3309597969055176} +{"step": 1964, "train/loss": 2.655889868736267, "train/lm_loss": 2.655889868736267, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.802524079601442e-05, "perf/tokens_per_sec": 26005.360329867945, "train/loss_code": 1.4769350588321686, "train/loss_prose": 3.8288283348083496, "train/loss_math": 2.268921136856079} +{"step": 1965, "train/loss": 2.7824047803878784, "train/lm_loss": 2.7824047803878784, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.8010340351306997e-05, "perf/tokens_per_sec": 26341.85670718151, "train/loss_math": 2.659409999847412, "train/loss_prose": 3.475347638130188, "train/loss_code": 1.5195139050483704} +{"step": 1966, "train/loss": 2.0067966282367706, "train/lm_loss": 2.0067966282367706, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.7995433565517735e-05, "perf/tokens_per_sec": 25630.846143632298, "train/loss_math": 2.2886863152186074, "train/loss_prose": 3.3576369285583496, "train/loss_code": 1.4576692879199982} +{"step": 1967, "train/loss": 2.315153628587723, "train/lm_loss": 2.315153628587723, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.798052044591204e-05, "perf/tokens_per_sec": 25923.15218273698, "train/loss_math": 2.6068137486775718, "train/loss_code": 1.8082219958305359, "train/loss_prose": 3.467900037765503} +{"step": 1968, "train/loss": 2.4201310873031616, "train/lm_loss": 2.4201310873031616, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.7965600999758356e-05, "perf/tokens_per_sec": 26240.546051898025, "train/loss_math": 2.253172516822815, "train/loss_code": 1.583838701248169, "train/loss_prose": 3.367729107538859} +{"step": 1969, "train/loss": 2.4699114859104156, "train/lm_loss": 2.4699114859104156, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.795067523432826e-05, "perf/tokens_per_sec": 26165.451569177534, "train/loss_math": 2.286774158477783, "train/loss_code": 1.7961629033088684, "train/loss_prose": 3.509935140609741} +{"step": 1970, "train/loss": 2.357120931148529, "train/lm_loss": 2.357120931148529, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.7935743156896375e-05, "perf/tokens_per_sec": 26769.302612286236, "train/loss_code": 1.521571675936381, "train/loss_math": 2.426081339518229, "train/loss_prose": 3.507003664970398} +{"step": 1971, "train/loss": 2.800134599208832, "train/lm_loss": 2.800134599208832, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.792080477474043e-05, "perf/tokens_per_sec": 26571.84844960281, "train/loss_code": 1.8733946681022644, "train/loss_prose": 4.221276839574178, "train/loss_math": 2.2436671257019043} +{"step": 1972, "train/loss": 2.28894767165184, "train/lm_loss": 2.28894767165184, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.790586009514119e-05, "perf/tokens_per_sec": 26813.422395777245, "train/loss_prose": 3.4971550703048706, "train/loss_math": 2.2035099864006042, "train/loss_code": 1.2516155242919922} +{"step": 1973, "train/loss": 2.476535677909851, "train/lm_loss": 2.476535677909851, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.789090912538253e-05, "perf/tokens_per_sec": 25664.38730514412, "train/loss_code": 1.8714175820350647, "train/loss_math": 2.321713626384735, "train/loss_prose": 3.391297698020935} +{"step": 1974, "train/loss": 2.640230417251587, "train/lm_loss": 2.640230417251587, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.787595187275136e-05, "perf/tokens_per_sec": 25983.80640555616, "train/loss_prose": 4.053193926811218, "train/loss_code": 1.412656009197235, "train/loss_math": 2.5475359559059143} +{"step": 1975, "train/loss": 1.7092110812664032, "train/lm_loss": 1.7092110812664032, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.786098834453766e-05, "perf/tokens_per_sec": 25721.524474561247, "train/loss_math": 2.1603577931722007, "train/loss_code": 1.4385228753089905} +{"step": 1976, "train/loss": 2.059026539325714, "train/lm_loss": 2.059026539325714, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.784601854803449e-05, "perf/tokens_per_sec": 26635.869058435015, "train/loss_math": 2.2592031955718994, "train/loss_prose": 4.008285045623779, "train/loss_code": 1.1423715551694233} +{"step": 1977, "train/loss": 2.471740812063217, "train/lm_loss": 2.471740812063217, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.783104249053793e-05, "perf/tokens_per_sec": 26092.17256807861, "train/loss_code": 1.6275955041249592, "train/loss_prose": 3.3803802331288657, "train/loss_math": 2.375} +{"step": 1978, "train/loss": 2.3653838634490967, "train/lm_loss": 2.3653838634490967, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.781606017934713e-05, "perf/tokens_per_sec": 25875.6740965661, "train/loss_code": 1.5287578105926514, "train/loss_prose": 3.202009856700897} +{"step": 1979, "train/loss": 1.5308406800031662, "train/lm_loss": 1.5308406800031662, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.780107162176429e-05, "perf/tokens_per_sec": 25166.254087350124, "train/loss_code": 1.0274339675903321, "train/loss_math": 2.112379312515259, "train/loss_prose": 2.884796619415283} +{"step": 1980, "train/loss": 2.5250276923179626, "train/lm_loss": 2.5250276923179626, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.778607682509465e-05, "perf/tokens_per_sec": 25953.267423416128, "train/loss_prose": 4.04643440246582, "train/loss_code": 1.503894865512848, "train/loss_math": 2.045339584350586} +{"step": 1981, "train/loss": 2.1315779983997345, "train/lm_loss": 2.1315779983997345, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.77710757966465e-05, "perf/tokens_per_sec": 24455.956428776823, "train/loss_prose": 3.1940356890360513, "train/loss_math": 2.012470245361328, "train/loss_code": 1.3645117729902267} +{"step": 1982, "train/loss": 2.540594309568405, "train/lm_loss": 2.540594309568405, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.775606854373115e-05, "perf/tokens_per_sec": 24975.023599977903, "train/loss_math": 2.143690586090088, "train/loss_code": 1.7687835693359375, "train/loss_prose": 3.4520386854807534} +{"step": 1983, "train/loss": 2.1493844091892242, "train/lm_loss": 2.1493844091892242, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.7741055073662946e-05, "perf/tokens_per_sec": 26934.196003424026, "train/loss_code": 1.5711236794789631, "train/loss_prose": 3.111888885498047, "train/loss_math": 2.342453718185425} +{"step": 1984, "train/loss": 2.788290798664093, "train/lm_loss": 2.788290798664093, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.7726035393759285e-05, "perf/tokens_per_sec": 25923.8562920153, "train/loss_code": 2.0255364974339805, "train/loss_prose": 3.2459434032440186} +{"step": 1985, "train/loss": 2.772479683160782, "train/lm_loss": 2.772479683160782, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.771100951134057e-05, "perf/tokens_per_sec": 25698.785183146374, "train/loss_math": 2.331283986568451, "train/loss_prose": 3.689798593521118, "train/loss_code": 1.785306692123413} +{"step": 1986, "train/loss": 2.550519347190857, "train/lm_loss": 2.550519347190857, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.769597743373023e-05, "perf/tokens_per_sec": 26258.67465132097, "train/loss_math": 2.460987949371338, "train/loss_prose": 3.326208472251892, "train/loss_code": 1.446798324584961} +{"step": 1987, "train/loss": 3.314077913761139, "train/lm_loss": 3.314077913761139, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.7680939168254733e-05, "perf/tokens_per_sec": 25303.47341273991, "train/loss_prose": 3.69212543964386, "train/loss_math": 2.1799346208572388} +{"step": 1988, "train/loss": 2.294211655855179, "train/lm_loss": 2.294211655855179, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.7665894722243525e-05, "perf/tokens_per_sec": 25859.627191048105, "train/loss_code": 1.699307632446289, "train/loss_math": 1.9747976064682007, "train/loss_prose": 3.9411784410476685} +{"step": 1989, "train/loss": 2.4616870284080505, "train/lm_loss": 2.4616870284080505, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.765084410302909e-05, "perf/tokens_per_sec": 27104.810563068073, "train/loss_math": 2.228815048933029, "train/loss_code": 1.4336608052253723, "train/loss_prose": 3.95545756816864} +{"step": 1990, "train/loss": 2.2952686846256256, "train/lm_loss": 2.2952686846256256, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.763578731794695e-05, "perf/tokens_per_sec": 26020.682183269997, "train/loss_code": 1.5840518474578857, "train/loss_math": 2.2696354389190674, "train/loss_prose": 3.2521020571390786} +{"step": 1991, "train/loss": 2.5862444639205933, "train/lm_loss": 2.5862444639205933, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.762072437433555e-05, "perf/tokens_per_sec": 25526.194538422566, "train/loss_prose": 3.2409561475118003, "train/loss_math": 2.3737680912017822, "train/loss_code": 1.4720138311386108} +{"step": 1992, "train/loss": 2.610161066055298, "train/lm_loss": 2.610161066055298, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.760565527953641e-05, "perf/tokens_per_sec": 25874.076303503116, "train/loss_prose": 3.541454792022705, "train/loss_math": 2.1498266458511353, "train/loss_code": 1.9857568343480427} +{"step": 1993, "train/loss": 2.6825680136680603, "train/lm_loss": 2.6825680136680603, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.759058004089402e-05, "perf/tokens_per_sec": 25462.031470599133, "train/loss_math": 2.4593453407287598, "train/loss_prose": 3.6843668619791665, "train/loss_code": 1.5147041082382202} +{"step": 1994, "train/loss": 2.7946241199970245, "train/lm_loss": 2.7946241199970245, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.757549866575588e-05, "perf/tokens_per_sec": 25768.785908956597, "train/loss_math": 2.282397747039795, "train/loss_prose": 3.5038521885871887, "train/loss_code": 1.888394296169281} +{"step": 1995, "train/loss": 2.8572765588760376, "train/lm_loss": 2.8572765588760376, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.7560411161472456e-05, "perf/tokens_per_sec": 25817.4628123516, "train/loss_math": 2.1553921699523926, "train/loss_prose": 3.247879902521769, "train/loss_code": 1.215540885925293} +{"step": 1996, "train/loss": 2.1786939203739166, "train/lm_loss": 2.1786939203739166, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.7545317535397214e-05, "perf/tokens_per_sec": 25607.846947977516, "train/loss_code": 1.3569764693578084, "train/loss_prose": 3.2865675687789917, "train/loss_math": 2.2618287404378257} +{"step": 1997, "train/loss": 2.2522524297237396, "train/lm_loss": 2.2522524297237396, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.7530217794886606e-05, "perf/tokens_per_sec": 25968.292419105434, "train/loss_code": 1.1019098162651062, "train/loss_prose": 3.756885051727295, "train/loss_math": 2.3397252559661865} +{"step": 1998, "train/loss": 2.3138160705566406, "train/lm_loss": 2.3138160705566406, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.751511194730007e-05, "perf/tokens_per_sec": 26026.9500424191, "train/loss_math": 2.3542288541793823, "train/loss_code": 1.4620361924171448, "train/loss_prose": 3.815312385559082} +{"step": 1999, "train/loss": 2.311407595872879, "train/lm_loss": 2.311407595872879, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.7500000000000003e-05, "perf/tokens_per_sec": 26785.746980343978, "train/loss_code": 1.251254955927531, "train/loss_prose": 3.3944126764933267, "train/loss_math": 2.277128577232361} +{"step": 2000, "train/loss": 1.9319197237491608, "train/lm_loss": 1.9319197237491608, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.748488196035179e-05, "perf/tokens_per_sec": 26139.97191833555, "train/loss_code": 1.502331292629242, "train/loss_math": 2.4534424543380737, "train/loss_prose": 3.036816358566284} +{"step": 2000, "eval/loss": 2.213548381791483, "eval/lm_loss": 2.213548381791483, "eval/lb_loss": 0.0, "eval/z_loss": 0.0, "eval/perplexity": 9.148119891097538, "eval/loss_code": 1.59095507499562, "eval/ppl_code": 4.908434613547083, "eval/loss_prose": 3.488052828270092, "eval/ppl_prose": 32.722169948994015, "eval/loss_math": 2.151683806144085, "eval/ppl_math": 8.59932581123533} +{"step": 2001, "train/loss": 2.8917328119277954, "train/lm_loss": 2.8917328119277954, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.746975783572377e-05, "perf/tokens_per_sec": 26058.571839843222, "train/loss_code": 1.2848397294680278, "train/loss_prose": 3.85586838722229} +{"step": 2002, "train/loss": 2.0186819434165955, "train/lm_loss": 2.0186819434165955, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.7454627633487274e-05, "perf/tokens_per_sec": 24212.651818570033, "train/loss_math": 2.07207190990448, "train/loss_code": 1.3981170058250427, "train/loss_prose": 3.2064218521118164} +{"step": 2003, "train/loss": 2.5203645825386047, "train/lm_loss": 2.5203645825386047, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.7439491361016564e-05, "perf/tokens_per_sec": 26307.046275028635, "train/loss_prose": 3.4735747575759888, "train/loss_code": 1.9909605979919434, "train/loss_math": 2.244961142539978} +{"step": 2004, "train/loss": 2.1246410608291626, "train/lm_loss": 2.1246410608291626, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.742434902568889e-05, "perf/tokens_per_sec": 26010.321216287008, "train/loss_code": 1.3180393775304158, "train/loss_math": 2.2249438762664795, "train/loss_prose": 3.184088706970215} +{"step": 2005, "train/loss": 2.058508276939392, "train/lm_loss": 2.058508276939392, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.7409200634884426e-05, "perf/tokens_per_sec": 26015.8749290161, "train/loss_math": 2.2771851420402527, "train/loss_code": 1.1680796146392822, "train/loss_prose": 3.8550870418548584} +{"step": 2006, "train/loss": 2.4528559744358063, "train/lm_loss": 2.4528559744358063, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.739404619598632e-05, "perf/tokens_per_sec": 26850.971571825332, "train/loss_code": 1.5036118030548096, "train/loss_math": 2.3284464478492737, "train/loss_prose": 3.6509194374084473} +{"step": 2007, "train/loss": 2.4780073165893555, "train/lm_loss": 2.4780073165893555, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.7378885716380664e-05, "perf/tokens_per_sec": 26308.65770307437, "train/loss_math": 2.439297914505005, "train/loss_code": 1.7966334819793701, "train/loss_prose": 4.034302234649658} +{"step": 2008, "train/loss": 2.1165224611759186, "train/lm_loss": 2.1165224611759186, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.7363719203456495e-05, "perf/tokens_per_sec": 25205.355054900556, "train/loss_prose": 3.493336319923401, "train/loss_code": 1.1050994396209717, "train/loss_math": 2.2100695768992105} +{"step": 2009, "train/loss": 2.1803872883319855, "train/lm_loss": 2.1803872883319855, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.7348546664605777e-05, "perf/tokens_per_sec": 25647.682336554495, "train/loss_code": 1.5655112266540527, "train/loss_math": 2.219342887401581, "train/loss_prose": 3.8691933155059814} +{"step": 2010, "train/loss": 2.053498774766922, "train/lm_loss": 2.053498774766922, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.7333368107223424e-05, "perf/tokens_per_sec": 26095.77920190054, "train/loss_code": 1.6171084344387054, "train/loss_math": 2.489889085292816} +{"step": 2011, "train/loss": 2.5811625123023987, "train/lm_loss": 2.5811625123023987, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.731818353870729e-05, "perf/tokens_per_sec": 26247.160891126925, "train/loss_prose": 3.4654258092244468, "train/loss_math": 2.4450879096984863, "train/loss_code": 1.45887953042984} +{"step": 2012, "train/loss": 2.637433737516403, "train/lm_loss": 2.637433737516403, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.730299296645814e-05, "perf/tokens_per_sec": 26197.171330500114, "train/loss_code": 1.7773783206939697, "train/loss_prose": 3.4582114815711975, "train/loss_math": 1.9344896078109741} +{"step": 2013, "train/loss": 2.121510833501816, "train/lm_loss": 2.121510833501816, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.7287796397879674e-05, "perf/tokens_per_sec": 25548.477317002806, "train/loss_math": 2.3824965953826904, "train/loss_code": 1.4474145968755086, "train/loss_prose": 3.09985613822937} +{"step": 2014, "train/loss": 2.7053416669368744, "train/lm_loss": 2.7053416669368744, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.727259384037852e-05, "perf/tokens_per_sec": 25063.891795524938, "train/loss_prose": 3.7853559255599976, "train/loss_code": 1.4353922009468079, "train/loss_math": 1.815262258052826} +{"step": 2015, "train/loss": 2.087797909975052, "train/lm_loss": 2.087797909975052, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.725738530136422e-05, "perf/tokens_per_sec": 25542.39979066372, "train/loss_code": 1.8002835512161255, "train/loss_prose": 3.6295166015625, "train/loss_math": 2.271165609359741} +{"step": 2016, "train/loss": 2.779109835624695, "train/lm_loss": 2.779109835624695, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.724217078824923e-05, "perf/tokens_per_sec": 26364.049450616905, "train/loss_math": 2.325040817260742, "train/loss_prose": 3.385505437850952, "train/loss_code": 1.7157347202301025} +{"step": 2017, "train/loss": 2.76921284198761, "train/lm_loss": 2.76921284198761, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.722695030844891e-05, "perf/tokens_per_sec": 26155.851059715875, "train/loss_code": 1.6642438769340515, "train/loss_math": 2.5478755633036294, "train/loss_prose": 3.727195898691813} +{"step": 2018, "train/loss": 2.1979698538780212, "train/lm_loss": 2.1979698538780212, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.721172386938155e-05, "perf/tokens_per_sec": 26366.517619505845, "train/loss_code": 1.508203700184822, "train/loss_prose": 3.3347055912017822, "train/loss_math": 2.4407660961151123} +{"step": 2019, "train/loss": 2.5304579734802246, "train/lm_loss": 2.5304579734802246, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.719649147846832e-05, "perf/tokens_per_sec": 27207.186654124027, "train/loss_prose": 3.4080674052238464, "train/loss_math": 1.8691595196723938, "train/loss_code": 1.4365370273590088} +{"step": 2020, "train/loss": 2.00361967086792, "train/lm_loss": 2.00361967086792, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.718125314313331e-05, "perf/tokens_per_sec": 26243.99337020963, "train/loss_code": 1.4022053718566894, "train/loss_prose": 3.4752469062805176, "train/loss_math": 2.067436695098877} +{"step": 2021, "train/loss": 2.78507199883461, "train/lm_loss": 2.78507199883461, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.71660088708035e-05, "perf/tokens_per_sec": 26313.775295802476, "train/loss_code": 1.6902042031288147, "train/loss_prose": 3.5573992133140564, "train/loss_math": 2.3352856636047363} +{"step": 2022, "train/loss": 2.4191856682300568, "train/lm_loss": 2.4191856682300568, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.715075866890876e-05, "perf/tokens_per_sec": 26174.660334727396, "train/loss_math": 2.1679351806640623, "train/loss_prose": 3.7720264196395874, "train/loss_code": 0.9697574377059937} +{"step": 2023, "train/loss": 2.5112743377685547, "train/lm_loss": 2.5112743377685547, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.713550254488185e-05, "perf/tokens_per_sec": 26148.486152459926, "train/loss_math": 2.4681841135025024, "train/loss_prose": 3.488036870956421, "train/loss_code": 1.6206921935081482} +{"step": 2024, "train/loss": 2.511209100484848, "train/lm_loss": 2.511209100484848, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.712024050615843e-05, "perf/tokens_per_sec": 26225.885029607387, "train/loss_math": 2.324523687362671, "train/loss_prose": 3.584510326385498, "train/loss_code": 1.8112784028053284} +{"step": 2025, "train/loss": 2.8102933764457703, "train/lm_loss": 2.8102933764457703, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.710497256017702e-05, "perf/tokens_per_sec": 25956.482800299756, "train/loss_prose": 3.7450254559516907, "train/loss_math": 2.0402764876683555, "train/loss_code": 1.3814152479171753} +{"step": 2026, "train/loss": 2.2777496576309204, "train/lm_loss": 2.2777496576309204, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.708969871437904e-05, "perf/tokens_per_sec": 26299.716769182254, "train/loss_prose": 4.213273763656616, "train/loss_code": 1.2743815630674362, "train/loss_math": 2.34896183013916} +{"step": 2027, "train/loss": 2.239124149084091, "train/lm_loss": 2.239124149084091, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.7074418976208766e-05, "perf/tokens_per_sec": 26314.258950821983, "train/loss_prose": 3.2039546966552734, "train/loss_math": 2.162635087966919, "train/loss_code": 1.5346235930919647} +{"step": 2028, "train/loss": 2.4699968099594116, "train/lm_loss": 2.4699968099594116, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.705913335311338e-05, "perf/tokens_per_sec": 26509.607389416444, "train/loss_code": 1.630560040473938, "train/loss_prose": 3.4469472567240396, "train/loss_math": 2.263725996017456} +{"step": 2029, "train/loss": 2.093545913696289, "train/lm_loss": 2.093545913696289, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.704384185254288e-05, "perf/tokens_per_sec": 26328.171640144177, "train/loss_code": 1.3727394342422485, "train/loss_prose": 3.5865012407302856, "train/loss_math": 2.0422033071517944} +{"step": 2030, "train/loss": 2.2840884625911713, "train/lm_loss": 2.2840884625911713, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.702854448195019e-05, "perf/tokens_per_sec": 26297.22083710906, "train/loss_code": 2.105810594558716, "train/loss_math": 2.139392375946045, "train/loss_prose": 3.4648704528808594} +{"step": 2031, "train/loss": 2.3391774892807007, "train/lm_loss": 2.3391774892807007, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.701324124879102e-05, "perf/tokens_per_sec": 25999.378286267744, "train/loss_math": 2.2375547885894775, "train/loss_code": 1.5777582724889119, "train/loss_prose": 3.6337400674819946} +{"step": 2032, "train/loss": 2.225797116756439, "train/lm_loss": 2.225797116756439, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.699793216052402e-05, "perf/tokens_per_sec": 26777.397061321954, "train/loss_code": 1.0983906835317612, "train/loss_prose": 3.353203535079956} +{"step": 2033, "train/loss": 2.3834866881370544, "train/lm_loss": 2.3834866881370544, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.698261722461063e-05, "perf/tokens_per_sec": 26285.67325649535, "train/loss_math": 2.1948230266571045, "train/loss_prose": 3.377836227416992, "train/loss_code": 1.9092505772908528} +{"step": 2034, "train/loss": 2.215451270341873, "train/lm_loss": 2.215451270341873, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.696729644851518e-05, "perf/tokens_per_sec": 26535.075349067094, "train/loss_prose": 3.3808062076568604, "train/loss_code": 1.7006876468658447, "train/loss_math": 2.3101852536201477} +{"step": 2035, "train/loss": 2.1509985625743866, "train/lm_loss": 2.1509985625743866, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.695196983970481e-05, "perf/tokens_per_sec": 26123.158681910867, "train/loss_math": 2.3336361348629, "train/loss_prose": 3.235539197921753, "train/loss_code": 1.5459680954615276} +{"step": 2036, "train/loss": 2.663782089948654, "train/lm_loss": 2.663782089948654, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.693663740564953e-05, "perf/tokens_per_sec": 26261.96615268438, "train/loss_prose": 3.3023407459259033, "train/loss_math": 2.4992799758911133, "train/loss_code": 1.9526975750923157} +{"step": 2037, "train/loss": 2.6327351331710815, "train/lm_loss": 2.6327351331710815, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.69212991538222e-05, "perf/tokens_per_sec": 26237.540408868907, "train/loss_code": 1.883260428905487, "train/loss_math": 2.345613638559977, "train/loss_prose": 3.4195064703623452} +{"step": 2038, "train/loss": 2.5623831748962402, "train/lm_loss": 2.5623831748962402, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.690595509169848e-05, "perf/tokens_per_sec": 26054.777909383887, "train/loss_prose": 3.358284870783488, "train/loss_math": 2.346752961476644, "train/loss_code": 1.6919760704040527} +{"step": 2039, "train/loss": 2.6211595833301544, "train/lm_loss": 2.6211595833301544, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.689060522675689e-05, "perf/tokens_per_sec": 26722.667284704345, "train/loss_prose": 3.505833327770233, "train/loss_code": 1.4987114270528157, "train/loss_math": 2.4498093128204346} +{"step": 2040, "train/loss": 2.0806572139263153, "train/lm_loss": 2.0806572139263153, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.6875249566478745e-05, "perf/tokens_per_sec": 25982.62747766959, "train/loss_math": 2.3618621031443277, "train/loss_code": 1.0038630366325378, "train/loss_prose": 3.2740410566329956} +{"step": 2041, "train/loss": 2.579056203365326, "train/lm_loss": 2.579056203365326, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.685988811834823e-05, "perf/tokens_per_sec": 26231.89166360015, "train/loss_code": 1.0663526058197021, "train/loss_prose": 3.805308739344279, "train/loss_math": 2.3612727324167886} +{"step": 2042, "train/loss": 2.570296347141266, "train/lm_loss": 2.570296347141266, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.684452088985233e-05, "perf/tokens_per_sec": 25894.004367950878, "train/loss_prose": 3.7524683475494385, "train/loss_code": 1.6167682806650798, "train/loss_math": 2.227330207824707} +{"step": 2043, "train/loss": 2.2344036400318146, "train/lm_loss": 2.2344036400318146, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.682914788848083e-05, "perf/tokens_per_sec": 26974.201890406657, "train/loss_math": 2.199146588643392, "train/loss_code": 1.5868677695592244, "train/loss_prose": 3.258593201637268} +{"step": 2044, "train/loss": 2.3934172689914703, "train/lm_loss": 2.3934172689914703, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.681376912172636e-05, "perf/tokens_per_sec": 26252.41504803556, "train/loss_code": 1.3892162243525188, "train/loss_math": 2.321551561355591, "train/loss_prose": 3.445528984069824} +{"step": 2045, "train/loss": 2.3310408890247345, "train/lm_loss": 2.3310408890247345, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.6798384597084325e-05, "perf/tokens_per_sec": 26222.68261612173, "train/loss_prose": 3.135855197906494, "train/loss_code": 1.5262264609336853} +{"step": 2046, "train/loss": 2.4746896624565125, "train/lm_loss": 2.4746896624565125, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.678299432205296e-05, "perf/tokens_per_sec": 25999.378286267744, "train/loss_math": 2.181548833847046, "train/loss_prose": 3.496205965677897, "train/loss_code": 1.648600459098816} +{"step": 2047, "train/loss": 3.227330446243286, "train/lm_loss": 3.227330446243286, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.6767598304133324e-05, "perf/tokens_per_sec": 25934.579124467302, "train/loss_math": 2.3863730430603027, "train/loss_prose": 3.5076496601104736} +{"step": 2048, "train/loss": 2.2412995100021362, "train/lm_loss": 2.2412995100021362, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.675219655082921e-05, "perf/tokens_per_sec": 27133.70667576395, "train/loss_code": 1.6351050436496735, "train/loss_prose": 3.4889506101608276, "train/loss_math": 2.206037163734436} +{"step": 2049, "train/loss": 2.10776424407959, "train/lm_loss": 2.10776424407959, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.673678906964727e-05, "perf/tokens_per_sec": 26194.1756151409, "train/loss_code": 1.4040430903434753, "train/loss_prose": 3.6894806623458862, "train/loss_math": 2.4629368782043457} +{"step": 2050, "train/loss": 2.7400806546211243, "train/lm_loss": 2.7400806546211243, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.6721375868096924e-05, "perf/tokens_per_sec": 26559.688924618145, "train/loss_prose": 3.27805495262146, "train/loss_math": 2.2021063566207886} +{"step": 2051, "train/loss": 2.5965576171875, "train/lm_loss": 2.5965576171875, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.6705956953690364e-05, "perf/tokens_per_sec": 27113.66549246872, "train/loss_code": 1.7748021284739177, "train/loss_prose": 3.707470655441284, "train/loss_math": 2.162821412086487} +{"step": 2052, "train/loss": 2.5521869361400604, "train/lm_loss": 2.5521869361400604, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.66905323339426e-05, "perf/tokens_per_sec": 26074.90625407898, "train/loss_code": 0.8782035708427429, "train/loss_math": 2.5822519779205324, "train/loss_prose": 3.3140156269073486} +{"step": 2053, "train/loss": 2.1927945017814636, "train/lm_loss": 2.1927945017814636, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.667510201637139e-05, "perf/tokens_per_sec": 26184.63383757173, "train/loss_code": 1.4602282842000325, "train/loss_math": 2.0651180744171143, "train/loss_prose": 3.483158230781555} +{"step": 2054, "train/loss": 2.073749452829361, "train/lm_loss": 2.073749452829361, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.665966600849728e-05, "perf/tokens_per_sec": 26010.99068714657, "train/loss_math": 1.9684043725331624, "train/loss_code": 2.013562262058258, "train/loss_prose": 2.630533456802368} +{"step": 2055, "train/loss": 2.032994508743286, "train/lm_loss": 2.032994508743286, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.664422431784361e-05, "perf/tokens_per_sec": 26938.123668567612, "train/loss_code": 1.640234684944153, "train/loss_math": 2.2504643201828003, "train/loss_prose": 3.5618555545806885} +{"step": 2056, "train/loss": 2.8993906378746033, "train/lm_loss": 2.8993906378746033, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.662877695193646e-05, "perf/tokens_per_sec": 26132.894208127727, "train/loss_prose": 3.5356560945510864, "train/loss_math": 2.485408147176107, "train/loss_code": 1.5962766408920288} +{"step": 2057, "train/loss": 2.265949100255966, "train/lm_loss": 2.265949100255966, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.66133239183047e-05, "perf/tokens_per_sec": 25999.142209429378, "train/loss_code": 1.7252168953418732, "train/loss_prose": 3.415316104888916, "train/loss_math": 2.1980464458465576} +{"step": 2058, "train/loss": 2.8776084780693054, "train/lm_loss": 2.8776084780693054, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.659786522447993e-05, "perf/tokens_per_sec": 27101.603682564924, "train/loss_prose": 3.3317753791809084, "train/loss_code": 1.7615807056427002, "train/loss_math": 2.300204873085022} +{"step": 2059, "train/loss": 2.469493120908737, "train/lm_loss": 2.469493120908737, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.6582400877996546e-05, "perf/tokens_per_sec": 26142.597212246637, "train/loss_prose": 3.5137787659962973, "train/loss_math": 2.269519090652466, "train/loss_code": 1.558523178100586} +{"step": 2060, "train/loss": 2.008918344974518, "train/lm_loss": 2.008918344974518, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.6566930886391674e-05, "perf/tokens_per_sec": 27238.115234910714, "train/loss_math": 2.2467265129089355, "train/loss_code": 1.097823699315389, "train/loss_prose": 3.790968656539917} +{"step": 2061, "train/loss": 2.3842891454696655, "train/lm_loss": 2.3842891454696655, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.655145525720522e-05, "perf/tokens_per_sec": 26191.340302498564, "train/loss_code": 1.4404037594795227, "train/loss_math": 2.4242969751358032, "train/loss_prose": 3.2481589317321777} +{"step": 2062, "train/loss": 2.4948030412197113, "train/lm_loss": 2.4948030412197113, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.653597399797979e-05, "perf/tokens_per_sec": 26648.759821182048, "train/loss_math": 2.2755969365437827, "train/loss_code": 1.8048677047093709, "train/loss_prose": 3.85851514339447} +{"step": 2063, "train/loss": 2.520217925310135, "train/lm_loss": 2.520217925310135, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.6520487116260776e-05, "perf/tokens_per_sec": 26060.350625119456, "train/loss_math": 2.1438910484313967, "train/loss_code": 1.4289047718048096, "train/loss_prose": 4.006690979003906} +{"step": 2064, "train/loss": 2.4135647416114807, "train/lm_loss": 2.4135647416114807, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.6504994619596294e-05, "perf/tokens_per_sec": 25888.73663208781, "train/loss_math": 2.4387123187383017, "train/loss_prose": 3.479442834854126, "train/loss_code": 1.1968014240264893} +{"step": 2065, "train/loss": 2.789231777191162, "train/lm_loss": 2.789231777191162, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.6489496515537204e-05, "perf/tokens_per_sec": 25859.627191048105, "train/loss_math": 2.3397350311279297, "train/loss_prose": 3.579783320426941, "train/loss_code": 1.6576253175735474} +{"step": 2066, "train/loss": 2.359661102294922, "train/lm_loss": 2.359661102294922, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.647399281163708e-05, "perf/tokens_per_sec": 26094.51082140617, "train/loss_math": 2.2763781547546387, "train/loss_code": 1.5873900254567463, "train/loss_prose": 3.6429920196533203} +{"step": 2067, "train/loss": 2.1280400156974792, "train/lm_loss": 2.1280400156974792, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.645848351545225e-05, "perf/tokens_per_sec": 26377.447853476613, "train/loss_math": 2.1979166984558107, "train/loss_code": 1.4173545241355896, "train/loss_prose": 3.2000279426574707} +{"step": 2068, "train/loss": 2.9347134232521057, "train/lm_loss": 2.9347134232521057, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.644296863454175e-05, "perf/tokens_per_sec": 25827.670672140932, "train/loss_code": 2.263743817806244, "train/loss_prose": 3.3330656051635743, "train/loss_math": 2.2848916053771973} +{"step": 2069, "train/loss": 2.6669468879699707, "train/lm_loss": 2.6669468879699707, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.642744817646736e-05, "perf/tokens_per_sec": 26771.22147199207, "train/loss_code": 1.0585657358169556, "train/loss_prose": 3.4125946164131165, "train/loss_math": 2.2088769674301147} +{"step": 2070, "train/loss": 2.4752604365348816, "train/lm_loss": 2.4752604365348816, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.6411922148793544e-05, "perf/tokens_per_sec": 26701.15335562073, "train/loss_math": 2.2382220327854156, "train/loss_prose": 3.1508424282073975, "train/loss_code": 1.396668553352356} +{"step": 2071, "train/loss": 2.5020915269851685, "train/lm_loss": 2.5020915269851685, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.639639055908751e-05, "perf/tokens_per_sec": 26165.92978704609, "train/loss_prose": 3.388360341389974, "train/loss_code": 1.7143035729726155, "train/loss_math": 2.3543702363967896} +{"step": 2072, "train/loss": 2.442596197128296, "train/lm_loss": 2.442596197128296, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.6380853414919144e-05, "perf/tokens_per_sec": 25782.088941380567, "train/loss_code": 1.5139854550361633, "train/loss_prose": 3.7018814086914062, "train/loss_math": 2.379183292388916} +{"step": 2073, "train/loss": 2.452167809009552, "train/lm_loss": 2.452167809009552, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.63653107238611e-05, "perf/tokens_per_sec": 25976.773720619032, "train/loss_prose": 3.1688849925994873, "train/loss_math": 2.161989212036133, "train/loss_code": 1.3089117407798767} +{"step": 2074, "train/loss": 2.5908589959144592, "train/lm_loss": 2.5908589959144592, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.634976249348867e-05, "perf/tokens_per_sec": 26040.75786154942, "train/loss_math": 2.4145150979359946, "train/loss_prose": 3.440713802973429, "train/loss_code": 1.5805922746658325} +{"step": 2075, "train/loss": 2.5269275903701782, "train/lm_loss": 2.5269275903701782, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.633420873137988e-05, "perf/tokens_per_sec": 25997.057050204363, "train/loss_math": 2.321779155731201, "train/loss_prose": 3.3885347843170166, "train/loss_code": 1.8294557332992554} +{"step": 2076, "train/loss": 2.344818502664566, "train/lm_loss": 2.344818502664566, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.631864944511545e-05, "perf/tokens_per_sec": 26605.138615686003, "train/loss_code": 1.2265273332595825, "train/loss_math": 2.21585480372111, "train/loss_prose": 3.219309409459432} +{"step": 2077, "train/loss": 2.0671347081661224, "train/lm_loss": 2.0671347081661224, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.630308464227877e-05, "perf/tokens_per_sec": 26035.588076882857, "train/loss_math": 2.154487282037735, "train/loss_code": 1.673734227816264, "train/loss_prose": 2.897925853729248} +{"step": 2078, "train/loss": 2.617675006389618, "train/lm_loss": 2.617675006389618, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.628751433045596e-05, "perf/tokens_per_sec": 26093.480353039275, "train/loss_code": 1.663456122080485, "train/loss_prose": 3.425966203212738, "train/loss_math": 2.2471673488616943} +{"step": 2079, "train/loss": 2.9723405838012695, "train/lm_loss": 2.9723405838012695, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.627193851723577e-05, "perf/tokens_per_sec": 25978.659206692202, "train/loss_prose": 3.4558995962142944, "train/loss_code": 1.5216639041900635} +{"step": 2080, "train/loss": 2.1531361639499664, "train/lm_loss": 2.1531361639499664, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.625635721020969e-05, "perf/tokens_per_sec": 25947.740405833, "train/loss_prose": 2.7705297470092773, "train/loss_math": 2.4292667388916014, "train/loss_code": 1.1541130244731903} +{"step": 2081, "train/loss": 2.926027923822403, "train/lm_loss": 2.926027923822403, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.624077041697185e-05, "perf/tokens_per_sec": 25823.089721640605, "train/loss_prose": 3.5999292373657226, "train/loss_math": 2.16678524017334, "train/loss_code": 1.6208953261375427} +{"step": 2082, "train/loss": 2.367361217737198, "train/lm_loss": 2.367361217737198, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.622517814511906e-05, "perf/tokens_per_sec": 25702.51459279606, "train/loss_math": 2.493280053138733, "train/loss_prose": 3.4568403561909995, "train/loss_code": 1.1939362287521362} +{"step": 2083, "train/loss": 2.107654392719269, "train/lm_loss": 2.107654392719269, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.6209580402250815e-05, "perf/tokens_per_sec": 26405.90891890189, "train/loss_math": 2.307208442687988, "train/loss_code": 1.4796675443649292, "train/loss_prose": 2.3658573627471924} +{"step": 2084, "train/loss": 2.4122205078601837, "train/lm_loss": 2.4122205078601837, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.619397719596924e-05, "perf/tokens_per_sec": 26026.398034219368, "train/loss_prose": 3.5359405676523843, "train/loss_code": 1.2174123227596283, "train/loss_math": 2.0850393772125244} +{"step": 2085, "train/loss": 2.3809849321842194, "train/lm_loss": 2.3809849321842194, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.617836853387918e-05, "perf/tokens_per_sec": 26016.781078780867, "train/loss_code": 1.5790551900863647, "train/loss_prose": 3.194053570429484, "train/loss_math": 2.3642765283584595} +{"step": 2086, "train/loss": 2.5491923391819, "train/lm_loss": 2.5491923391819, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.6162754423588085e-05, "perf/tokens_per_sec": 25961.307359739116, "train/loss_code": 1.5863991181055705, "train/loss_prose": 3.126868152618408} +{"step": 2087, "train/loss": 2.487647235393524, "train/lm_loss": 2.487647235393524, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.614713487270611e-05, "perf/tokens_per_sec": 26135.20000730209, "train/loss_prose": 3.048269510269165, "train/loss_math": 2.409294009208679, "train/loss_code": 2.0837310552597046} +{"step": 2088, "train/loss": 2.6526125073432922, "train/lm_loss": 2.6526125073432922, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.613150988884599e-05, "perf/tokens_per_sec": 25909.976750218684, "train/loss_code": 0.8169375061988831, "train/loss_prose": 3.4756990671157837, "train/loss_math": 2.1670552094777427} +{"step": 2089, "train/loss": 2.029558300971985, "train/lm_loss": 2.029558300971985, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.611587947962319e-05, "perf/tokens_per_sec": 26656.61607974082, "train/loss_prose": 3.5433919429779053, "train/loss_code": 1.2591746648152669, "train/loss_math": 2.2288875579833984} +{"step": 2090, "train/loss": 2.7045736610889435, "train/lm_loss": 2.7045736610889435, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.610024365265577e-05, "perf/tokens_per_sec": 25981.684412382983, "train/loss_prose": 3.321642303466797, "train/loss_code": 0.8714948296546936, "train/loss_math": 2.078441321849823} +{"step": 2091, "train/loss": 2.2957665622234344, "train/lm_loss": 2.2957665622234344, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.608460241556443e-05, "perf/tokens_per_sec": 26020.445719392043, "train/loss_code": 1.3022048473358154, "train/loss_math": 2.3583527207374573, "train/loss_prose": 3.1641560792922974} +{"step": 2092, "train/loss": 2.2054334580898285, "train/lm_loss": 2.2054334580898285, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.606895577597255e-05, "perf/tokens_per_sec": 26037.32426415203, "train/loss_code": 1.6050819635391236, "train/loss_prose": 3.206019083658854} +{"step": 2093, "train/loss": 2.1526730060577393, "train/lm_loss": 2.1526730060577393, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.605330374150607e-05, "perf/tokens_per_sec": 26006.305114713203, "train/loss_code": 1.5922739624977111, "train/loss_prose": 3.530648708343506, "train/loss_math": 2.198716640472412} +{"step": 2094, "train/loss": 2.7466264963150024, "train/lm_loss": 2.7466264963150024, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.603764631979363e-05, "perf/tokens_per_sec": 25998.90613687817, "train/loss_prose": 3.4681586265563964, "train/loss_code": 1.5440730849901836} +{"step": 2095, "train/loss": 2.48422434926033, "train/lm_loss": 2.48422434926033, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.602198351846647e-05, "perf/tokens_per_sec": 26033.260370954056, "train/loss_code": 1.8138544857501984, "train/loss_prose": 3.4005788962046304, "train/loss_math": 2.4166390895843506} +{"step": 2096, "train/loss": 2.1436767578125, "train/lm_loss": 2.1436767578125, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.6006315345158434e-05, "perf/tokens_per_sec": 25927.455670202668, "train/loss_prose": 3.734250783920288, "train/loss_code": 1.5428398370742797, "train/loss_math": 1.9667130708694458} +{"step": 2097, "train/loss": 2.6354259252548218, "train/lm_loss": 2.6354259252548218, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.5990641807506e-05, "perf/tokens_per_sec": 25994.657563927976, "train/loss_code": 1.4853967428207397, "train/loss_math": 2.537324905395508, "train/loss_prose": 3.850855986277262} +{"step": 2098, "train/loss": 2.1636842787265778, "train/lm_loss": 2.1636842787265778, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.597496291314827e-05, "perf/tokens_per_sec": 25913.767712526944, "train/loss_code": 1.0449350029230118, "train/loss_prose": 3.626351277033488, "train/loss_math": 2.250680923461914} +{"step": 2099, "train/loss": 2.7296910881996155, "train/lm_loss": 2.7296910881996155, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.5959278669726935e-05, "perf/tokens_per_sec": 26241.54808594096, "train/loss_code": 1.4819197058677673, "train/loss_prose": 3.8367531299591064, "train/loss_math": 2.454476753870646} +{"step": 2100, "train/loss": 2.56205016374588, "train/lm_loss": 2.56205016374588, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.594358908488632e-05, "perf/tokens_per_sec": 26152.98422436326, "train/loss_math": 2.3558157980442047, "train/loss_code": 1.2529549598693848, "train/loss_prose": 3.2733942667643228} +{"step": 2101, "train/loss": 2.9433789253234863, "train/lm_loss": 2.9433789253234863, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.592789416627332e-05, "perf/tokens_per_sec": 26388.427686248728, "train/loss_math": 2.1053408086299896, "train/loss_prose": 3.781416594982147} +{"step": 2102, "train/loss": 2.336938291788101, "train/lm_loss": 2.336938291788101, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.5912193921537476e-05, "perf/tokens_per_sec": 26029.868189077068, "train/loss_code": 1.3101398348808289, "train/loss_math": 2.0410318970680237, "train/loss_prose": 3.561007340749105} +{"step": 2103, "train/loss": 1.645220547914505, "train/lm_loss": 1.645220547914505, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.5896488358330856e-05, "perf/tokens_per_sec": 26181.68088890379, "train/loss_code": 1.377198874950409, "train/loss_math": 2.4492855072021484} +{"step": 2104, "train/loss": 2.1906804740428925, "train/lm_loss": 2.1906804740428925, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.588077748430819e-05, "perf/tokens_per_sec": 26544.956317917673, "train/loss_code": 1.3509781956672668, "train/loss_prose": 3.1448022524515786, "train/loss_math": 2.6871237754821777} +{"step": 2105, "train/loss": 1.8452490270137787, "train/lm_loss": 1.8452490270137787, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.586506130712676e-05, "perf/tokens_per_sec": 26067.468195322388, "train/loss_code": 1.4990002910296123, "train/loss_prose": 3.7107620239257812, "train/loss_math": 2.0572283267974854} +{"step": 2106, "train/loss": 2.50641006231308, "train/lm_loss": 2.50641006231308, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.584933983444644e-05, "perf/tokens_per_sec": 25995.522901352444, "train/loss_math": 2.284822463989258, "train/loss_code": 1.2396582663059235, "train/loss_prose": 3.5724989573160806} +{"step": 2107, "train/loss": 2.090162307024002, "train/lm_loss": 2.090162307024002, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.5833613073929684e-05, "perf/tokens_per_sec": 26489.25185642038, "train/loss_code": 1.2172807157039642, "train/loss_prose": 3.1372700532277427, "train/loss_math": 2.4403653144836426} +{"step": 2108, "train/loss": 2.325180768966675, "train/lm_loss": 2.325180768966675, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.581788103324152e-05, "perf/tokens_per_sec": 25109.682770797128, "train/loss_math": 2.336994727452596, "train/loss_code": 1.9057309925556183, "train/loss_prose": 3.9675381183624268} +{"step": 2109, "train/loss": 2.351637601852417, "train/lm_loss": 2.351637601852417, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.580214372004956e-05, "perf/tokens_per_sec": 25169.64614841414, "train/loss_prose": 3.6014641523361206, "train/loss_math": 2.1408791840076447, "train/loss_code": 1.5233274698257446} +{"step": 2110, "train/loss": 2.0457933247089386, "train/lm_loss": 2.0457933247089386, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.5786401142023975e-05, "perf/tokens_per_sec": 26678.638267210805, "train/loss_code": 1.3020690282185872, "train/loss_math": 2.2102755904197693, "train/loss_prose": 3.619036912918091} +{"step": 2111, "train/loss": 2.475204586982727, "train/lm_loss": 2.475204586982727, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.577065330683751e-05, "perf/tokens_per_sec": 26043.244825040514, "train/loss_math": 2.327263275782267, "train/loss_prose": 3.6688238382339478, "train/loss_code": 1.8273998101552327} +{"step": 2112, "train/loss": 2.286271721124649, "train/lm_loss": 2.286271721124649, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.5754900222165465e-05, "perf/tokens_per_sec": 25621.786714152342, "train/loss_math": 2.3965632120768228, "train/loss_prose": 3.3990800380706787, "train/loss_code": 1.434108018875122} +{"step": 2113, "train/loss": 2.6660803854465485, "train/lm_loss": 2.6660803854465485, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.573914189568571e-05, "perf/tokens_per_sec": 25947.074186844522, "train/loss_prose": 3.5749418258666994, "train/loss_code": 1.151311198870341} +{"step": 2114, "train/loss": 2.43300324678421, "train/lm_loss": 2.43300324678421, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.572337833507865e-05, "perf/tokens_per_sec": 26125.025751138226, "train/loss_code": 1.9011052052179973, "train/loss_prose": 3.0967628955841064, "train/loss_math": 2.2352105379104614} +{"step": 2115, "train/loss": 2.5967174768447876, "train/lm_loss": 2.5967174768447876, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.570760954802726e-05, "perf/tokens_per_sec": 25768.863212492743, "train/loss_math": 2.3094124794006348, "train/loss_code": 1.7683419585227966, "train/loss_prose": 3.436272462209066} +{"step": 2116, "train/loss": 2.2284184992313385, "train/lm_loss": 2.2284184992313385, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.5691835542217054e-05, "perf/tokens_per_sec": 26509.975548296203, "train/loss_code": 1.4778767029444377, "train/loss_math": 2.091773430506388, "train/loss_prose": 3.5591986179351807} +{"step": 2117, "train/loss": 2.3685094714164734, "train/lm_loss": 2.3685094714164734, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.567605632533608e-05, "perf/tokens_per_sec": 25428.075864457554, "train/loss_math": 2.135548084974289, "train/loss_prose": 3.47510826587677, "train/loss_code": 1.7278332114219666} +{"step": 2118, "train/loss": 2.925479084253311, "train/lm_loss": 2.925479084253311, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.5660271905074974e-05, "perf/tokens_per_sec": 25903.68698216733, "train/loss_code": 1.8795466820398967, "train/loss_prose": 3.889737367630005, "train/loss_math": 2.206242561340332} +{"step": 2119, "train/loss": 3.0575475692749023, "train/lm_loss": 3.0575475692749023, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.564448228912682e-05, "perf/tokens_per_sec": 25895.95592529005, "train/loss_math": 2.1954103310902915, "train/loss_code": 1.7779741287231445, "train/loss_prose": 4.0240438580513} +{"step": 2120, "train/loss": 2.1870804131031036, "train/lm_loss": 2.1870804131031036, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.562868748518732e-05, "perf/tokens_per_sec": 26128.999126998853, "train/loss_code": 1.449369748433431, "train/loss_math": 2.232273896535238, "train/loss_prose": 3.2258559465408325} +{"step": 2121, "train/loss": 2.0620299577713013, "train/lm_loss": 2.0620299577713013, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.561288750095465e-05, "perf/tokens_per_sec": 26104.304331706488, "train/loss_prose": 2.7497072219848633, "train/loss_code": 1.527615447839101, "train/loss_math": 2.1379929780960083} +{"step": 2122, "train/loss": 2.1206488013267517, "train/lm_loss": 2.1206488013267517, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.559708234412954e-05, "perf/tokens_per_sec": 24746.296934777598, "train/loss_math": 2.319651464621226, "train/loss_code": 1.5236406326293945} +{"step": 2123, "train/loss": 2.3922597467899323, "train/lm_loss": 2.3922597467899323, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.5581272022415244e-05, "perf/tokens_per_sec": 25115.959989590978, "train/loss_code": 1.5085071722666423, "train/loss_math": 2.237194776535034, "train/loss_prose": 3.3793890476226807} +{"step": 2124, "train/loss": 2.3853309750556946, "train/lm_loss": 2.3853309750556946, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.556545654351749e-05, "perf/tokens_per_sec": 24995.590360183673, "train/loss_code": 1.0197784900665283, "train/loss_prose": 3.3467322985331216, "train/loss_math": 2.334298054377238} +{"step": 2125, "train/loss": 2.0031692385673523, "train/lm_loss": 2.0031692385673523, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.554963591514457e-05, "perf/tokens_per_sec": 24617.930794142092, "train/loss_code": 1.5336908499399822, "train/loss_math": 2.2848562717437746} +{"step": 2126, "train/loss": 2.8679149746894836, "train/lm_loss": 2.8679149746894836, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.553381014500727e-05, "perf/tokens_per_sec": 24005.193969871714, "train/loss_prose": 3.1857462406158445, "train/loss_math": 2.3381961981455484} +{"step": 2127, "train/loss": 2.269160211086273, "train/lm_loss": 2.269160211086273, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.551797924081887e-05, "perf/tokens_per_sec": 25181.673207404467, "train/loss_code": 1.6692081987857819, "train/loss_prose": 3.158030351003011, "train/loss_math": 2.0023574829101562} +{"step": 2128, "train/loss": 2.6979952454566956, "train/lm_loss": 2.6979952454566956, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.5502143210295165e-05, "perf/tokens_per_sec": 25702.74531347714, "train/loss_math": 2.0491156578063965, "train/loss_code": 1.53268963098526, "train/loss_prose": 3.6050878763198853} +{"step": 2129, "train/loss": 3.29709392786026, "train/lm_loss": 3.29709392786026, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.548630206115443e-05, "perf/tokens_per_sec": 25816.88085824501, "train/loss_math": 2.223533511161804, "train/loss_prose": 3.6549474795659385} +{"step": 2130, "train/loss": 2.317465662956238, "train/lm_loss": 2.317465662956238, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.547045580111746e-05, "perf/tokens_per_sec": 25540.425217125, "train/loss_code": 1.63380628824234, "train/loss_prose": 3.71822988986969, "train/loss_math": 2.284020185470581} +{"step": 2131, "train/loss": 1.6557864546775818, "train/lm_loss": 1.6557864546775818, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.545460443790753e-05, "perf/tokens_per_sec": 24605.132148325105, "train/loss_code": 1.3499407172203064, "train/loss_math": 1.8497859239578247, "train/loss_prose": 3.2968616485595703} +{"step": 2132, "train/loss": 2.72169691324234, "train/lm_loss": 2.72169691324234, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.543874797925042e-05, "perf/tokens_per_sec": 25951.032813654154, "train/loss_math": 2.228571097056071, "train/loss_prose": 3.420507311820984, "train/loss_code": 1.4058318138122559} +{"step": 2133, "train/loss": 1.8317764401435852, "train/lm_loss": 1.8317764401435852, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.542288643287434e-05, "perf/tokens_per_sec": 25867.14183070896, "train/loss_prose": 2.979401469230652, "train/loss_math": 2.3359923362731934, "train/loss_code": 1.271883225440979} +{"step": 2134, "train/loss": 2.344506710767746, "train/lm_loss": 2.344506710767746, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.540701980651003e-05, "perf/tokens_per_sec": 25244.02130035618, "train/loss_prose": 3.369140386581421, "train/loss_math": 2.1788880228996277, "train/loss_code": 1.6511104106903076} +{"step": 2135, "train/loss": 2.707498788833618, "train/lm_loss": 2.707498788833618, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.53911481078907e-05, "perf/tokens_per_sec": 26376.03046009482, "train/loss_prose": 3.1348427295684815, "train/loss_code": 1.7002106308937073, "train/loss_math": 2.585355043411255} +{"step": 2136, "train/loss": 2.3507862389087677, "train/lm_loss": 2.3507862389087677, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.537527134475201e-05, "perf/tokens_per_sec": 25905.0931248445, "train/loss_code": 1.3728867769241333, "train/loss_math": 2.2964377403259277, "train/loss_prose": 3.3649181524912515} +{"step": 2137, "train/loss": 2.2681021094322205, "train/lm_loss": 2.2681021094322205, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.535938952483211e-05, "perf/tokens_per_sec": 26941.376473318738, "train/loss_math": 2.2111310164133706, "train/loss_prose": 3.507922410964966, "train/loss_code": 1.4985260168711345} +{"step": 2138, "train/loss": 2.624132454395294, "train/lm_loss": 2.624132454395294, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.5343502655871594e-05, "perf/tokens_per_sec": 26320.95133651087, "train/loss_code": 1.663305441538493, "train/loss_prose": 3.445564925670624, "train/loss_math": 2.2208826541900635} +{"step": 2139, "train/loss": 2.402111291885376, "train/lm_loss": 2.402111291885376, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.532761074561355e-05, "perf/tokens_per_sec": 25955.110285886083, "train/loss_math": 2.292074203491211, "train/loss_prose": 3.3998708724975586, "train/loss_code": 2.0645735263824463} +{"step": 2140, "train/loss": 2.6159197092056274, "train/lm_loss": 2.6159197092056274, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.531171380180348e-05, "perf/tokens_per_sec": 26505.762785502255, "train/loss_math": 2.5103651682535806, "train/loss_code": 1.9679795503616333, "train/loss_prose": 3.1534341971079507} +{"step": 2141, "train/loss": 2.89944189786911, "train/lm_loss": 2.89944189786911, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.529581183218937e-05, "perf/tokens_per_sec": 25771.182534265583, "train/loss_prose": 3.501228630542755, "train/loss_code": 2.121549069881439, "train/loss_math": 2.4737608432769775} +{"step": 2142, "train/loss": 2.494214713573456, "train/lm_loss": 2.494214713573456, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.527990484452166e-05, "perf/tokens_per_sec": 25756.539158333457, "train/loss_prose": 3.2521767616271973, "train/loss_math": 2.234119176864624, "train/loss_code": 1.7474149465560913} +{"step": 2143, "train/loss": 2.2088517546653748, "train/lm_loss": 2.2088517546653748, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.52639928465532e-05, "perf/tokens_per_sec": 25477.664140137116, "train/loss_math": 2.335885834693909, "train/loss_code": 1.0801713466644287, "train/loss_prose": 3.831042528152466} +{"step": 2144, "train/loss": 2.8293759524822235, "train/lm_loss": 2.8293759524822235, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.524807584603932e-05, "perf/tokens_per_sec": 24666.88565131555, "train/loss_prose": 3.6342448592185974, "train/loss_math": 2.522369861602783, "train/loss_code": 1.5266444683074951} +{"step": 2145, "train/loss": 2.660525143146515, "train/lm_loss": 2.660525143146515, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.523215385073777e-05, "perf/tokens_per_sec": 27086.349328275493, "train/loss_code": 1.6378290057182312, "train/loss_prose": 3.842559258143107, "train/loss_math": 2.160288453102112} +{"step": 2146, "train/loss": 2.793051153421402, "train/lm_loss": 2.793051153421402, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.521622686840873e-05, "perf/tokens_per_sec": 25193.30536923578, "train/loss_code": 2.1137914657592773, "train/loss_prose": 3.544819176197052, "train/loss_math": 1.8237576484680176} +{"step": 2147, "train/loss": 2.0918189585208893, "train/lm_loss": 2.0918189585208893, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.5200294906814824e-05, "perf/tokens_per_sec": 25032.11972379949, "train/loss_math": 2.189082384109497, "train/loss_prose": 3.0926491419474282, "train/loss_code": 1.3168802559375763} +{"step": 2148, "train/loss": 2.3012295365333557, "train/lm_loss": 2.3012295365333557, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.518435797372109e-05, "perf/tokens_per_sec": 26056.5956965339, "train/loss_math": 2.1484318375587463, "train/loss_prose": 3.7313698132832847, "train/loss_code": 0.9729544520378113} +{"step": 2149, "train/loss": 2.227373391389847, "train/lm_loss": 2.227373391389847, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.516841607689501e-05, "perf/tokens_per_sec": 26169.99534481288, "train/loss_math": 2.3202348947525024, "train/loss_code": 1.5689537525177002, "train/loss_prose": 3.451350688934326} +{"step": 2150, "train/loss": 2.630259394645691, "train/lm_loss": 2.630259394645691, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.5152469224106454e-05, "perf/tokens_per_sec": 25818.31639250151, "train/loss_math": 2.4360331296920776, "train/loss_code": 1.9347184499104817, "train/loss_prose": 3.455284357070923} +{"step": 2151, "train/loss": 1.8731623888015747, "train/lm_loss": 1.8731623888015747, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.513651742312774e-05, "perf/tokens_per_sec": 26050.70856647227, "train/loss_code": 1.5327528715133667, "train/loss_math": 2.2135719060897827} +{"step": 2152, "train/loss": 2.185280203819275, "train/lm_loss": 2.185280203819275, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.512056068173356e-05, "perf/tokens_per_sec": 26179.087632039304, "train/loss_code": 1.6602174043655396, "train/loss_math": 2.309656540552775, "train/loss_prose": 3.912402391433716} +{"step": 2153, "train/loss": 2.3602911233901978, "train/lm_loss": 2.3602911233901978, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.5104599007701054e-05, "perf/tokens_per_sec": 26151.551042340565, "train/loss_code": 1.401841938495636, "train/loss_math": 2.2590829133987427, "train/loss_prose": 3.5211567878723145} +{"step": 2154, "train/loss": 2.981013238430023, "train/lm_loss": 2.981013238430023, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.5088632408809755e-05, "perf/tokens_per_sec": 25865.66191907208, "train/loss_prose": 3.6868844985961915, "train/loss_code": 1.8045612573623657} +{"step": 2155, "train/loss": 2.231749176979065, "train/lm_loss": 2.231749176979065, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.507266089284157e-05, "perf/tokens_per_sec": 25830.621975441176, "train/loss_code": 1.5853064854939778, "train/loss_prose": 3.2381385564804077, "train/loss_math": 2.207265615463257} +{"step": 2156, "train/loss": 1.8557699024677277, "train/lm_loss": 1.8557699024677277, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.505668446758083e-05, "perf/tokens_per_sec": 27160.81790414277, "train/loss_math": 2.2496081590652466, "train/loss_code": 1.4360258102416992, "train/loss_prose": 3.1668128967285156} +{"step": 2157, "train/loss": 1.8786565959453583, "train/lm_loss": 1.8786565959453583, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.504070314081425e-05, "perf/tokens_per_sec": 24973.20837524639, "train/loss_math": 1.9939404726028442, "train/loss_code": 1.3251066207885742, "train/loss_prose": 3.747004985809326} +{"step": 2158, "train/loss": 2.433486759662628, "train/lm_loss": 2.433486759662628, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.502471692033094e-05, "perf/tokens_per_sec": 25927.220897680265, "train/loss_math": 1.9517216682434082, "train/loss_prose": 3.512906074523926, "train/loss_code": 1.7443634867668152} +{"step": 2159, "train/loss": 2.3886407017707825, "train/lm_loss": 2.3886407017707825, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.5008725813922386e-05, "perf/tokens_per_sec": 26190.222319788405, "train/loss_math": 2.2566285610198973, "train/loss_prose": 3.1618303060531616, "train/loss_code": 1.5023220777511597} +{"step": 2160, "train/loss": 2.573350578546524, "train/lm_loss": 2.573350578546524, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.499272982938244e-05, "perf/tokens_per_sec": 24919.23551032604, "train/loss_prose": 3.645540237426758, "train/loss_math": 2.0951895316441855, "train/loss_code": 1.682307243347168} +{"step": 2161, "train/loss": 3.058344841003418, "train/lm_loss": 3.058344841003418, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.4976728974507384e-05, "perf/tokens_per_sec": 26066.716712716196, "train/loss_prose": 3.5599545001983643, "train/loss_code": 2.2452006340026855, "train/loss_math": 2.176584482192993} +{"step": 2162, "train/loss": 2.1510183811187744, "train/lm_loss": 2.1510183811187744, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.496072325709582e-05, "perf/tokens_per_sec": 25740.52393002959, "train/loss_code": 1.8670836448669434, "train/loss_math": 2.214564561843872, "train/loss_prose": 3.443599224090576} +{"step": 2163, "train/loss": 2.88841050863266, "train/lm_loss": 2.88841050863266, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.494471268494875e-05, "perf/tokens_per_sec": 25653.350391895987, "train/loss_prose": 3.473339080810547, "train/loss_code": 1.6038492321968079, "train/loss_math": 2.532888650894165} +{"step": 2164, "train/loss": 2.1365383565425873, "train/lm_loss": 2.1365383565425873, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.4928697265869515e-05, "perf/tokens_per_sec": 27294.45858534826, "train/loss_math": 2.1917203187942507, "train/loss_code": 1.4314714670181274, "train/loss_prose": 3.270761728286743} +{"step": 2165, "train/loss": 1.7288408875465393, "train/lm_loss": 1.7288408875465393, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.491267700766386e-05, "perf/tokens_per_sec": 25948.132315337156, "train/loss_math": 2.232705513636271, "train/loss_code": 1.426522183418274} +{"step": 2166, "train/loss": 2.1546590328216553, "train/lm_loss": 2.1546590328216553, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.4896651918139845e-05, "perf/tokens_per_sec": 25517.360528293593, "train/loss_code": 1.2472626169522603, "train/loss_math": 2.2052207787831626, "train/loss_prose": 3.4399107694625854} +{"step": 2167, "train/loss": 2.58858984708786, "train/lm_loss": 2.58858984708786, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.488062200510791e-05, "perf/tokens_per_sec": 25955.855347128214, "train/loss_code": 1.8944578170776367, "train/loss_prose": 3.556525468826294, "train/loss_math": 2.177884817123413} +{"step": 2168, "train/loss": 2.209469437599182, "train/lm_loss": 2.209469437599182, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.486458727638085e-05, "perf/tokens_per_sec": 25952.28728403771, "train/loss_code": 1.094756305217743, "train/loss_prose": 3.459854245185852, "train/loss_math": 2.1416335999965668} +{"step": 2169, "train/loss": 2.336327835917473, "train/lm_loss": 2.336327835917473, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.484854773977378e-05, "perf/tokens_per_sec": 26686.5121550377, "train/loss_prose": 3.1494861841201782, "train/loss_code": 1.2789826194445293, "train/loss_math": 2.255730628967285} +{"step": 2170, "train/loss": 2.2853665947914124, "train/lm_loss": 2.2853665947914124, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.483250340310418e-05, "perf/tokens_per_sec": 26174.81985096389, "train/loss_math": 2.095874865849813, "train/loss_prose": 3.132250706354777, "train/loss_code": 1.299277663230896} +{"step": 2171, "train/loss": 2.5049734711647034, "train/lm_loss": 2.5049734711647034, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.481645427419188e-05, "perf/tokens_per_sec": 25949.072946450553, "train/loss_math": 2.0957939326763153, "train/loss_prose": 3.5697530110677085, "train/loss_code": 0.947353720664978} +{"step": 2172, "train/loss": 2.476820707321167, "train/lm_loss": 2.476820707321167, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.480040036085901e-05, "perf/tokens_per_sec": 25642.973628433767, "train/loss_code": 1.9778424104054768, "train/loss_prose": 3.3870164155960083, "train/loss_math": 2.369001626968384} +{"step": 2173, "train/loss": 2.1313688457012177, "train/lm_loss": 2.1313688457012177, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.4784341670930065e-05, "perf/tokens_per_sec": 25106.93059444545, "train/loss_code": 1.4320478558540344, "train/loss_prose": 3.7945622205734253, "train/loss_math": 2.3015873432159424} +{"step": 2174, "train/loss": 2.7101245522499084, "train/lm_loss": 2.7101245522499084, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.476827821223184e-05, "perf/tokens_per_sec": 25984.04220397175, "train/loss_code": 1.479361891746521, "train/loss_prose": 3.3077404499053955, "train/loss_math": 2.1835713386535645} +{"step": 2175, "train/loss": 3.4030072689056396, "train/lm_loss": 3.4030072689056396, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.475220999259349e-05, "perf/tokens_per_sec": 26020.99747512613, "train/loss_prose": 3.7311434348424277, "train/loss_math": 2.419116497039795, "train/loss_code": 2.418081521987915} +{"step": 2176, "train/loss": 2.640878975391388, "train/lm_loss": 2.640878975391388, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.4736137019846465e-05, "perf/tokens_per_sec": 25536.66677666246, "train/loss_code": 1.4207661549250286, "train/loss_prose": 3.5555872917175293, "train/loss_math": 2.6423840522766113} +{"step": 2177, "train/loss": 2.623685300350189, "train/lm_loss": 2.623685300350189, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.4720059301824525e-05, "perf/tokens_per_sec": 24507.34467603126, "train/loss_code": 1.8904811541239421, "train/loss_prose": 3.7520058949788413, "train/loss_math": 2.031010091304779} +{"step": 2178, "train/loss": 2.4853179156780243, "train/lm_loss": 2.4853179156780243, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.470397684636377e-05, "perf/tokens_per_sec": 25063.745532477347, "train/loss_prose": 3.868412971496582, "train/loss_code": 1.783442735671997, "train/loss_math": 2.265129725138346} +{"step": 2179, "train/loss": 2.3910476863384247, "train/lm_loss": 2.3910476863384247, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.4687889661302576e-05, "perf/tokens_per_sec": 25863.091948658814, "train/loss_prose": 3.646779775619507, "train/loss_math": 2.006629546483358, "train/loss_code": 1.9383110602696736} +{"step": 2180, "train/loss": 2.101458251476288, "train/lm_loss": 2.101458251476288, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.467179775448166e-05, "perf/tokens_per_sec": 26493.00996194107, "train/loss_prose": 3.027146339416504, "train/loss_math": 2.3720145225524902, "train/loss_code": 1.4321537415186565} +{"step": 2181, "train/loss": 2.0128364264965057, "train/lm_loss": 2.0128364264965057, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.4655701133744e-05, "perf/tokens_per_sec": 25591.48232714446, "train/loss_math": 2.4106647968292236, "train/loss_code": 1.144210547208786, "train/loss_prose": 3.352259635925293} +{"step": 2182, "train/loss": 2.4133816957473755, "train/lm_loss": 2.4133816957473755, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.463959980693492e-05, "perf/tokens_per_sec": 25574.643670153062, "train/loss_code": 1.219918668270111, "train/loss_prose": 3.606844663619995} +{"step": 2183, "train/loss": 2.4806405305862427, "train/lm_loss": 2.4806405305862427, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.462349378190199e-05, "perf/tokens_per_sec": 25734.123413886646, "train/loss_math": 2.2188768863677977, "train/loss_code": 1.5217171907424927, "train/loss_prose": 3.614511728286743} +{"step": 2184, "train/loss": 2.4336748719215393, "train/lm_loss": 2.4336748719215393, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.460738306649509e-05, "perf/tokens_per_sec": 26298.951836424785, "train/loss_math": 2.3391491174697876, "train/loss_prose": 3.496799945831299, "train/loss_code": 1.9377044439315796} +{"step": 2185, "train/loss": 1.8546710014343262, "train/lm_loss": 1.8546710014343262, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.459126766856641e-05, "perf/tokens_per_sec": 25629.24020807879, "train/loss_math": 2.247150421142578, "train/loss_code": 1.2841499000787735, "train/loss_prose": 2.959317445755005} +{"step": 2186, "train/loss": 2.5449737906455994, "train/lm_loss": 2.5449737906455994, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.457514759597038e-05, "perf/tokens_per_sec": 26623.65088348422, "train/loss_prose": 3.994464556376139, "train/loss_code": 1.5325256139039993, "train/loss_math": 2.24629545211792} +{"step": 2187, "train/loss": 3.164934068918228, "train/lm_loss": 3.164934068918228, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.455902285656373e-05, "perf/tokens_per_sec": 26063.948466417758, "train/loss_prose": 3.723685085773468, "train/loss_code": 4.102159023284912, "train/loss_math": 2.1075244347254434} +{"step": 2188, "train/loss": 2.2656895220279694, "train/lm_loss": 2.2656895220279694, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.454289345820546e-05, "perf/tokens_per_sec": 25689.754500974963, "train/loss_math": 2.2602479457855225, "train/loss_code": 1.2500542004903157, "train/loss_prose": 3.2849525610605874} +{"step": 2189, "train/loss": 2.1730703115463257, "train/lm_loss": 2.1730703115463257, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.452675940875686e-05, "perf/tokens_per_sec": 26346.381154372873, "train/loss_math": 2.2393965125083923, "train/loss_code": 1.5981280604998271, "train/loss_prose": 3.63259220123291} +{"step": 2190, "train/loss": 2.3677282631397247, "train/lm_loss": 2.3677282631397247, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.4510620716081446e-05, "perf/tokens_per_sec": 26254.94260546376, "train/loss_prose": 3.283190886179606, "train/loss_code": 1.5273113250732422, "train/loss_math": 2.255159378051758} +{"step": 2191, "train/loss": 2.879315674304962, "train/lm_loss": 2.879315674304962, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.4494477388045035e-05, "perf/tokens_per_sec": 26357.294696129375, "train/loss_prose": 3.417723798751831, "train/loss_code": 1.9145270586013794, "train/loss_math": 2.1168527603149414} +{"step": 2192, "train/loss": 3.2380948662757874, "train/lm_loss": 3.2380948662757874, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.4478329432515674e-05, "perf/tokens_per_sec": 26864.827580599005, "train/loss_prose": 3.71985399723053, "train/loss_code": 1.792817234992981} +{"step": 2193, "train/loss": 2.0078365206718445, "train/lm_loss": 2.0078365206718445, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.4462176857363704e-05, "perf/tokens_per_sec": 26418.90301359246, "train/loss_math": 2.305362892150879, "train/loss_code": 1.5119590560595195} +{"step": 2194, "train/loss": 2.4126179218292236, "train/lm_loss": 2.4126179218292236, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.444601967046168e-05, "perf/tokens_per_sec": 26014.25977510668, "train/loss_math": 2.1999192039171853, "train/loss_prose": 3.050714373588562} +{"step": 2195, "train/loss": 2.185298413038254, "train/lm_loss": 2.185298413038254, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.442985787968442e-05, "perf/tokens_per_sec": 27075.037167784558, "train/loss_math": 2.2950183153152466, "train/loss_prose": 3.4043318033218384, "train/loss_code": 1.5209218561649323} +{"step": 2196, "train/loss": 2.491494834423065, "train/lm_loss": 2.491494834423065, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.4413691492908985e-05, "perf/tokens_per_sec": 26315.71002256306, "train/loss_math": 2.370316219329834, "train/loss_code": 1.7656837701797485, "train/loss_prose": 4.549009799957275} +{"step": 2197, "train/loss": 2.4069920480251312, "train/lm_loss": 2.4069920480251312, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.439752051801467e-05, "perf/tokens_per_sec": 26120.497576461574, "train/loss_math": 2.2788321375846863, "train/loss_code": 1.3114423155784607, "train/loss_prose": 3.7588614225387573} +{"step": 2198, "train/loss": 2.187480479478836, "train/lm_loss": 2.187480479478836, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.438134496288302e-05, "perf/tokens_per_sec": 26474.026107471804, "train/loss_math": 2.16209610303243, "train/loss_code": 1.4033684134483337, "train/loss_prose": 3.4017252922058105} +{"step": 2199, "train/loss": 2.0853277146816254, "train/lm_loss": 2.0853277146816254, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.436516483539781e-05, "perf/tokens_per_sec": 26089.28005491243, "train/loss_prose": 3.1797025203704834, "train/loss_code": 1.7167165577411652, "train/loss_math": 2.2120174566904702} +{"step": 2200, "train/loss": 2.519756257534027, "train/lm_loss": 2.519756257534027, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.434898014344501e-05, "perf/tokens_per_sec": 26343.835961603338, "train/loss_prose": 2.953219413757324, "train/loss_math": 2.498924811681112, "train/loss_code": 2.2112817764282227} +{"step": 2200, "eval/loss": 2.2062477993318086, "eval/lm_loss": 2.2062477993318086, "eval/lb_loss": 0.0, "eval/z_loss": 0.0, "eval/perplexity": 9.081576485845256, "eval/loss_code": 1.589014548281012, "eval/ppl_code": 4.8989189007860645, "eval/loss_prose": 3.4888375234185602, "eval/ppl_prose": 32.747856953916354, "eval/loss_math": 2.13012248390319, "eval/ppl_math": 8.415897560295802} +{"step": 2201, "train/loss": 2.529344767332077, "train/lm_loss": 2.529344767332077, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.433279089491288e-05, "perf/tokens_per_sec": 27048.905965761933, "train/loss_prose": 3.399048646291097, "train/loss_code": 1.7830208937327068, "train/loss_math": 2.3442747592926025} +{"step": 2202, "train/loss": 2.184398114681244, "train/lm_loss": 2.184398114681244, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.431659709769183e-05, "perf/tokens_per_sec": 25254.745861527084, "train/loss_code": 1.5040667553742726, "train/loss_prose": 4.2253923416137695} +{"step": 2203, "train/loss": 2.2930131554603577, "train/lm_loss": 2.2930131554603577, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.430039875967454e-05, "perf/tokens_per_sec": 26307.40882912764, "train/loss_code": 1.8903677066167195, "train/loss_prose": 3.198390245437622, "train/loss_math": 2.0920737187067666} +{"step": 2204, "train/loss": 2.4387462735176086, "train/lm_loss": 2.4387462735176086, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.428419588875588e-05, "perf/tokens_per_sec": 26090.23094738025, "train/loss_math": 2.2202908396720886, "train/loss_code": 1.4483421444892883, "train/loss_prose": 3.866060972213745} +{"step": 2205, "train/loss": 2.593186616897583, "train/lm_loss": 2.593186616897583, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.426798849283291e-05, "perf/tokens_per_sec": 26176.175817478445, "train/loss_code": 2.065350294113159, "train/loss_prose": 3.3667544523874917, "train/loss_math": 2.224589705467224} +{"step": 2206, "train/loss": 2.2733727991580963, "train/lm_loss": 2.2733727991580963, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.425177657980496e-05, "perf/tokens_per_sec": 26255.94573933669, "train/loss_math": 2.4625741243362427, "train/loss_prose": 3.654694676399231, "train/loss_code": 1.4881114065647125} +{"step": 2207, "train/loss": 2.612108439207077, "train/lm_loss": 2.612108439207077, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.423556015757349e-05, "perf/tokens_per_sec": 26307.731107827298, "train/loss_code": 1.4207972288131714, "train/loss_prose": 3.344783067703247, "train/loss_math": 2.3380703926086426} +{"step": 2208, "train/loss": 2.628852069377899, "train/lm_loss": 2.628852069377899, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.421933923404219e-05, "perf/tokens_per_sec": 26890.141172750908, "train/loss_prose": 3.2163111209869384, "train/loss_math": 2.2357470989227295, "train/loss_code": 1.3567570447921753} +{"step": 2209, "train/loss": 2.3544636964797974, "train/lm_loss": 2.3544636964797974, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.4203113817116957e-05, "perf/tokens_per_sec": 26331.319157023525, "train/loss_math": 2.244714230298996, "train/loss_code": 1.6153082847595215, "train/loss_prose": 3.313118100166321} +{"step": 2210, "train/loss": 2.2719743847846985, "train/lm_loss": 2.2719743847846985, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.4186883914705835e-05, "perf/tokens_per_sec": 26294.68513328813, "train/loss_math": 2.235779364903768, "train/loss_code": 1.4935994148254395, "train/loss_prose": 3.493829369544983} +{"step": 2211, "train/loss": 2.1087327003479004, "train/lm_loss": 2.1087327003479004, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.417064953471911e-05, "perf/tokens_per_sec": 26374.815672716442, "train/loss_code": 1.290513277053833, "train/loss_prose": 4.472869396209717, "train/loss_math": 2.1313630044460297} +{"step": 2212, "train/loss": 1.5726590752601624, "train/lm_loss": 1.5726590752601624, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.4154410685069196e-05, "perf/tokens_per_sec": 26281.49127028685, "train/loss_code": 1.3937888940175374, "train/loss_math": 2.1092695593833923} +{"step": 2213, "train/loss": 2.5805089473724365, "train/lm_loss": 2.5805089473724365, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.413816737367073e-05, "perf/tokens_per_sec": 26397.99690842991, "train/loss_code": 1.5138168334960938, "train/loss_prose": 3.5089170932769775, "train/loss_math": 2.3632288773854575} +{"step": 2214, "train/loss": 2.050993889570236, "train/lm_loss": 2.050993889570236, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.412191960844049e-05, "perf/tokens_per_sec": 25692.48225066662, "train/loss_prose": 3.6728243827819824, "train/loss_code": 1.3614317327737808, "train/loss_math": 2.429800033569336} +{"step": 2215, "train/loss": 2.3803948163986206, "train/lm_loss": 2.3803948163986206, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.410566739729746e-05, "perf/tokens_per_sec": 26166.647146625724, "train/loss_code": 1.952516237894694, "train/loss_prose": 3.3570806980133057, "train/loss_math": 2.157149394353231} +{"step": 2216, "train/loss": 1.9943231046199799, "train/lm_loss": 1.9943231046199799, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.408941074816275e-05, "perf/tokens_per_sec": 26277.471384411132, "train/loss_math": 2.429599332809448, "train/loss_code": 1.2688626845677693} +{"step": 2217, "train/loss": 2.601509004831314, "train/lm_loss": 2.601509004831314, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.407314966895966e-05, "perf/tokens_per_sec": 26530.362956042343, "train/loss_math": 2.107961336771647, "train/loss_prose": 3.8368941148122153, "train/loss_code": 1.4887524247169495} +{"step": 2218, "train/loss": 1.903720200061798, "train/lm_loss": 1.903720200061798, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.405688416761364e-05, "perf/tokens_per_sec": 26157.842287412623, "train/loss_code": 1.4615855455398559, "train/loss_prose": 3.7415311336517334, "train/loss_math": 2.0901508927345276} +{"step": 2219, "train/loss": 1.8376021683216095, "train/lm_loss": 1.8376021683216095, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.4040614252052305e-05, "perf/tokens_per_sec": 26108.98560803397, "train/loss_code": 1.4810408353805542, "train/loss_math": 2.2979934215545654, "train/loss_prose": 3.5165786743164062} +{"step": 2220, "train/loss": 1.9848697781562805, "train/lm_loss": 1.9848697781562805, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.4024339930205415e-05, "perf/tokens_per_sec": 26260.68156617574, "train/loss_code": 1.4528283476829529, "train/loss_prose": 3.5160491466522217, "train/loss_math": 2.1838653882344565} +{"step": 2221, "train/loss": 1.6527746617794037, "train/lm_loss": 1.6527746617794037, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.400806121000487e-05, "perf/tokens_per_sec": 26082.74454505153, "train/loss_code": 1.423354411125183, "train/loss_math": 2.035141388575236} +{"step": 2222, "train/loss": 2.3749484419822693, "train/lm_loss": 2.3749484419822693, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.399177809938472e-05, "perf/tokens_per_sec": 26136.551871866024, "train/loss_prose": 3.2443044980367026, "train/loss_code": 1.6970827182133992, "train/loss_math": 2.087712824344635} +{"step": 2223, "train/loss": 2.7811425030231476, "train/lm_loss": 2.7811425030231476, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.397549060628116e-05, "perf/tokens_per_sec": 26253.016803281193, "train/loss_prose": 3.381593406200409, "train/loss_code": 2.3743687868118286, "train/loss_math": 1.9870139956474304} +{"step": 2224, "train/loss": 2.532645583152771, "train/lm_loss": 2.532645583152771, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.39591987386325e-05, "perf/tokens_per_sec": 26004.612423881437, "train/loss_code": 1.20239261786143, "train/loss_prose": 3.4838792085647583, "train/loss_math": 2.7184693813323975} +{"step": 2225, "train/loss": 2.4408674240112305, "train/lm_loss": 2.4408674240112305, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.3942902504379235e-05, "perf/tokens_per_sec": 26897.97196514823, "train/loss_code": 1.497190773487091, "train/loss_math": 2.342549204826355, "train/loss_prose": 3.5811805725097656} +{"step": 2226, "train/loss": 2.539657711982727, "train/lm_loss": 2.539657711982727, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.3926601911463915e-05, "perf/tokens_per_sec": 25798.81454295355, "train/loss_prose": 3.465608596801758, "train/loss_math": 1.9569066762924194, "train/loss_code": 1.2705072164535522} +{"step": 2227, "train/loss": 2.855507105588913, "train/lm_loss": 2.855507105588913, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.3910296967831266e-05, "perf/tokens_per_sec": 25830.000592380544, "train/loss_prose": 3.7585144639015198, "train/loss_code": 1.6262952089309692, "train/loss_math": 2.0612345933914185} +{"step": 2228, "train/loss": 2.8437410593032837, "train/lm_loss": 2.8437410593032837, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.389398768142812e-05, "perf/tokens_per_sec": 26055.13354343415, "train/loss_math": 2.7166941165924072, "train/loss_prose": 3.6325581073760986, "train/loss_code": 1.834333856900533} +{"step": 2229, "train/loss": 2.8375235199928284, "train/lm_loss": 2.8375235199928284, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.387767406020343e-05, "perf/tokens_per_sec": 26393.211429976032, "train/loss_math": 2.3829482078552244, "train/loss_prose": 3.595149119695028} +{"step": 2230, "train/loss": 2.1687879264354706, "train/lm_loss": 2.1687879264354706, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.3861356112108247e-05, "perf/tokens_per_sec": 26005.714616140318, "train/loss_math": 2.1279514133930206, "train/loss_code": 1.6310117642084758, "train/loss_prose": 3.9454619884490967} +{"step": 2231, "train/loss": 2.7270628213882446, "train/lm_loss": 2.7270628213882446, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.384503384509574e-05, "perf/tokens_per_sec": 26058.76947066015, "train/loss_math": 2.4463000893592834, "train/loss_prose": 3.5377321243286133, "train/loss_code": 1.4181056022644043} +{"step": 2232, "train/loss": 2.3428307473659515, "train/lm_loss": 2.3428307473659515, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.382870726712119e-05, "perf/tokens_per_sec": 26481.330649213724, "train/loss_prose": 3.205215096473694, "train/loss_math": 2.257523250579834, "train/loss_code": 1.0445996522903442} +{"step": 2233, "train/loss": 2.379251629114151, "train/lm_loss": 2.379251629114151, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.381237638614196e-05, "perf/tokens_per_sec": 26050.94558070699, "train/loss_prose": 3.55175518989563, "train/loss_math": 2.345124125480652, "train/loss_code": 1.8100637793540955} +{"step": 2234, "train/loss": 2.860353708267212, "train/lm_loss": 2.860353708267212, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.3796041210117546e-05, "perf/tokens_per_sec": 26049.010090656702, "train/loss_math": 2.405569235483805, "train/loss_prose": 3.578037659327189, "train/loss_code": 2.4660043716430664} +{"step": 2235, "train/loss": 2.3695846498012543, "train/lm_loss": 2.3695846498012543, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.3779701747009504e-05, "perf/tokens_per_sec": 26471.537770765215, "train/loss_prose": 3.036218762397766, "train/loss_math": 2.206823766231537, "train/loss_code": 2.028472423553467} +{"step": 2236, "train/loss": 2.5538222789764404, "train/lm_loss": 2.5538222789764404, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.3763358004781475e-05, "perf/tokens_per_sec": 26563.67240774156, "train/loss_math": 2.3348963856697083, "train/loss_code": 1.930227816104889, "train/loss_prose": 3.6152687072753906} +{"step": 2237, "train/loss": 2.6297076642513275, "train/lm_loss": 2.6297076642513275, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.374700999139923e-05, "perf/tokens_per_sec": 26453.96956384494, "train/loss_prose": 3.495998442173004, "train/loss_code": 1.6074151595433552, "train/loss_math": 2.2314224243164062} +{"step": 2238, "train/loss": 2.526322513818741, "train/lm_loss": 2.526322513818741, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.373065771483056e-05, "perf/tokens_per_sec": 26152.904599018726, "train/loss_prose": 3.1683678030967712, "train/loss_code": 1.6009252468744914, "train/loss_math": 2.734333038330078} +{"step": 2239, "train/loss": 1.9798224568367004, "train/lm_loss": 1.9798224568367004, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.3714301183045385e-05, "perf/tokens_per_sec": 26237.259917316875, "train/loss_code": 1.1913321216901143, "train/loss_prose": 3.9457755088806152, "train/loss_math": 2.0797019600868225} +{"step": 2240, "train/loss": 2.1592264771461487, "train/lm_loss": 2.1592264771461487, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.369794040401567e-05, "perf/tokens_per_sec": 25896.619521798246, "train/loss_math": 2.33393394947052, "train/loss_code": 1.4618453681468964, "train/loss_prose": 3.379280686378479} +{"step": 2241, "train/loss": 2.5549357533454895, "train/lm_loss": 2.5549357533454895, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.368157538571548e-05, "perf/tokens_per_sec": 26306.28091585754, "train/loss_prose": 3.456642230351766, "train/loss_code": 1.1109132766723633, "train/loss_math": 2.239661455154419} +{"step": 2242, "train/loss": 2.922562837600708, "train/lm_loss": 2.922562837600708, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.3665206136120906e-05, "perf/tokens_per_sec": 26264.696316340647, "train/loss_prose": 3.789442300796509, "train/loss_math": 2.2443673610687256, "train/loss_code": 1.4896312952041626} +{"step": 2243, "train/loss": 2.5820319652557373, "train/lm_loss": 2.5820319652557373, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.3648832663210124e-05, "perf/tokens_per_sec": 26212.440146534453, "train/loss_math": 2.1365799009799957, "train/loss_code": 1.8212811946868896, "train/loss_prose": 3.4295515219370523} +{"step": 2244, "train/loss": 2.0402668118476868, "train/lm_loss": 2.0402668118476868, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.363245497496337e-05, "perf/tokens_per_sec": 26011.54204196702, "train/loss_math": 2.273451119661331, "train/loss_code": 1.231050173441569, "train/loss_prose": 3.5351781845092773} +{"step": 2245, "train/loss": 2.4958205819129944, "train/lm_loss": 2.4958205819129944, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.3616073079362926e-05, "perf/tokens_per_sec": 25744.766990102125, "train/loss_prose": 3.375122904777527, "train/loss_code": 1.3607875108718872, "train/loss_math": 2.3837101459503174} +{"step": 2246, "train/loss": 1.7523916065692902, "train/lm_loss": 1.7523916065692902, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.3599686984393134e-05, "perf/tokens_per_sec": 25289.24489979171, "train/loss_code": 1.3269606709480286, "train/loss_math": 2.4614429473876953} +{"step": 2247, "train/loss": 2.3927237689495087, "train/lm_loss": 2.3927237689495087, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.3583296698040384e-05, "perf/tokens_per_sec": 26150.715092030518, "train/loss_prose": 3.4520095586776733, "train/loss_code": 0.975267767906189, "train/loss_math": 2.407949447631836} +{"step": 2248, "train/loss": 2.533567488193512, "train/lm_loss": 2.533567488193512, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.356690222829309e-05, "perf/tokens_per_sec": 26144.069198612742, "train/loss_code": 2.5657429695129395, "train/loss_math": 2.0898595094680785, "train/loss_prose": 3.6267497539520264} +{"step": 2249, "train/loss": 2.113612711429596, "train/lm_loss": 2.113612711429596, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.355050358314172e-05, "perf/tokens_per_sec": 25757.00254573477, "train/loss_code": 1.7622884213924408, "train/loss_math": 2.2263580163319907, "train/loss_prose": 3.180673360824585} +{"step": 2250, "train/loss": 2.2333569526672363, "train/lm_loss": 2.2333569526672363, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.353410077057877e-05, "perf/tokens_per_sec": 26734.144468184102, "train/loss_math": 2.2342646598815916, "train/loss_code": 1.9496662616729736, "train/loss_prose": 2.7961997985839844} +{"step": 2251, "train/loss": 2.055521070957184, "train/lm_loss": 2.055521070957184, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.35176937985988e-05, "perf/tokens_per_sec": 25429.39320387631, "train/loss_math": 2.075264632701874, "train/loss_prose": 4.1956868171691895, "train/loss_code": 1.3158077001571655} +{"step": 2252, "train/loss": 2.365602672100067, "train/lm_loss": 2.365602672100067, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.350128267519832e-05, "perf/tokens_per_sec": 25652.431077604422, "train/loss_math": 2.3142424821853638, "train/loss_prose": 3.0057986974716187, "train/loss_code": 1.1365711092948914} +{"step": 2253, "train/loss": 2.7556492686271667, "train/lm_loss": 2.7556492686271667, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.3484867408375954e-05, "perf/tokens_per_sec": 25627.71093273346, "train/loss_prose": 3.355936110019684, "train/loss_math": 2.2844595909118652, "train/loss_code": 1.768070936203003} +{"step": 2254, "train/loss": 1.8788289427757263, "train/lm_loss": 1.8788289427757263, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.346844800613229e-05, "perf/tokens_per_sec": 25959.34594038371, "train/loss_code": 1.5093274116516113, "train/loss_math": 2.383111000061035, "train/loss_prose": 2.7177724838256836} +{"step": 2255, "train/loss": 1.9401024281978607, "train/lm_loss": 1.9401024281978607, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.3452024476469934e-05, "perf/tokens_per_sec": 25936.575959419064, "train/loss_code": 1.219894915819168, "train/loss_math": 2.3489866256713867, "train/loss_prose": 2.971633195877075} +{"step": 2256, "train/loss": 2.641694724559784, "train/lm_loss": 2.641694724559784, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.343559682739353e-05, "perf/tokens_per_sec": 25265.44238244053, "train/loss_prose": 3.280673325061798, "train/loss_math": 2.3996448516845703, "train/loss_code": 1.8704062700271606} +{"step": 2257, "train/loss": 2.5319854021072388, "train/lm_loss": 2.5319854021072388, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.3419165066909705e-05, "perf/tokens_per_sec": 25488.172217087293, "train/loss_math": 2.4679009914398193, "train/loss_code": 1.9371438920497894, "train/loss_prose": 3.3464686075846353} +{"step": 2258, "train/loss": 2.2172439992427826, "train/lm_loss": 2.2172439992427826, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.340272920302711e-05, "perf/tokens_per_sec": 26824.391308081573, "train/loss_prose": 3.3370611667633057, "train/loss_math": 2.2859953939914703, "train/loss_code": 0.9599237442016602} +{"step": 2259, "train/loss": 2.497504621744156, "train/lm_loss": 2.497504621744156, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.338628924375638e-05, "perf/tokens_per_sec": 26100.02124478907, "train/loss_prose": 3.374459664026896, "train/loss_code": 1.6895124514897664, "train/loss_math": 2.3940603733062744} +{"step": 2260, "train/loss": 2.5336810052394867, "train/lm_loss": 2.5336810052394867, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.336984519711015e-05, "perf/tokens_per_sec": 26863.315321119135, "train/loss_math": 2.2714542150497437, "train/loss_prose": 3.671116590499878, "train/loss_code": 1.5710632006327312} +{"step": 2261, "train/loss": 2.0096854865550995, "train/lm_loss": 2.0096854865550995, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.3353397071103046e-05, "perf/tokens_per_sec": 26084.090988318247, "train/loss_code": 1.5855951607227325, "train/loss_math": 2.116761008898417, "train/loss_prose": 3.384819984436035} +{"step": 2262, "train/loss": 2.13628751039505, "train/lm_loss": 2.13628751039505, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.333694487375168e-05, "perf/tokens_per_sec": 24874.135890842543, "train/loss_prose": 3.04392671585083, "train/loss_code": 1.7878814339637756, "train/loss_math": 1.8796006043752034} +{"step": 2263, "train/loss": 2.4729008972644806, "train/lm_loss": 2.4729008972644806, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.332048861307467e-05, "perf/tokens_per_sec": 26641.280006947254, "train/loss_math": 2.1024745225906374, "train/loss_prose": 4.039111375808716, "train/loss_code": 1.1926110982894897} +{"step": 2264, "train/loss": 1.9544386565685272, "train/lm_loss": 1.9544386565685272, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.330402829709258e-05, "perf/tokens_per_sec": 25000.20981709544, "train/loss_math": 2.1728570461273193, "train/loss_code": 1.5904080470403035} +{"step": 2265, "train/loss": 2.5696004927158356, "train/lm_loss": 2.5696004927158356, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.3287563933827995e-05, "perf/tokens_per_sec": 26693.312301701983, "train/loss_prose": 3.6914153893788657, "train/loss_math": 2.064161459604899, "train/loss_code": 1.6450365781784058} +{"step": 2266, "train/loss": 1.8106594383716583, "train/lm_loss": 1.8106594383716583, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.327109553130541e-05, "perf/tokens_per_sec": 26998.02492386122, "train/loss_prose": 3.4968708753585815, "train/loss_code": 1.0852922916412353, "train/loss_math": 2.065072536468506} +{"step": 2267, "train/loss": 2.2715374529361725, "train/lm_loss": 2.2715374529361725, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.325462309755134e-05, "perf/tokens_per_sec": 25664.195611060488, "train/loss_prose": 3.452455163002014, "train/loss_code": 1.6094170808792114, "train/loss_math": 2.146379550298055} +{"step": 2268, "train/loss": 2.7467039227485657, "train/lm_loss": 2.7467039227485657, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.3238146640594256e-05, "perf/tokens_per_sec": 25979.99492494023, "train/loss_code": 1.0536338686943054, "train/loss_prose": 3.3110607862472534} +{"step": 2269, "train/loss": 2.261491447687149, "train/lm_loss": 2.261491447687149, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.322166616846458e-05, "perf/tokens_per_sec": 25744.766990102125, "train/loss_math": 2.419472396373749, "train/loss_prose": 2.6666741371154785, "train/loss_code": 1.5403470396995544} +{"step": 2270, "train/loss": 2.3256802558898926, "train/lm_loss": 2.3256802558898926, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.32051816891947e-05, "perf/tokens_per_sec": 26200.5673030291, "train/loss_code": 1.8040254712104797, "train/loss_math": 2.3012442588806152, "train/loss_prose": 3.4911704063415527} +{"step": 2271, "train/loss": 2.0825365781784058, "train/lm_loss": 2.0825365781784058, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.318869321081892e-05, "perf/tokens_per_sec": 26055.489187192976, "train/loss_code": 1.3499124646186829, "train/loss_math": 2.160261940956116, "train/loss_prose": 3.1591579914093018} +{"step": 2272, "train/loss": 2.461961567401886, "train/lm_loss": 2.461961567401886, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.3172200741373563e-05, "perf/tokens_per_sec": 26131.54272189925, "train/loss_prose": 3.563456137975057, "train/loss_math": 2.235024015108744, "train/loss_code": 1.1501259803771973} +{"step": 2273, "train/loss": 2.734204024076462, "train/lm_loss": 2.734204024076462, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.315570428889684e-05, "perf/tokens_per_sec": 25715.248875510606, "train/loss_prose": 3.509097528457642, "train/loss_math": 2.0012574195861816, "train/loss_code": 1.1634432971477509} +{"step": 2274, "train/loss": 2.8454232215881348, "train/lm_loss": 2.8454232215881348, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.313920386142892e-05, "perf/tokens_per_sec": 26729.98492965044, "train/loss_prose": 4.098674535751343, "train/loss_math": 2.0523523092269897, "train/loss_code": 2.155151844024658} +{"step": 2275, "train/loss": 2.5664517879486084, "train/lm_loss": 2.5664517879486084, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.312269946701191e-05, "perf/tokens_per_sec": 25910.32844230685, "train/loss_code": 1.8875576257705688, "train/loss_prose": 3.291284720102946, "train/loss_math": 2.2942150433858237} +{"step": 2276, "train/loss": 2.1469065248966217, "train/lm_loss": 2.1469065248966217, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.310619111368986e-05, "perf/tokens_per_sec": 26038.231908747275, "train/loss_prose": 3.3940411806106567, "train/loss_math": 2.3348437547683716, "train/loss_code": 1.4293705821037292} +{"step": 2277, "train/loss": 2.6977846920490265, "train/lm_loss": 2.6977846920490265, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.308967880950874e-05, "perf/tokens_per_sec": 26008.23426361166, "train/loss_prose": 3.949972629547119, "train/loss_math": 2.06654946009318, "train/loss_code": 1.7663559317588806} +{"step": 2278, "train/loss": 2.6590642631053925, "train/lm_loss": 2.6590642631053925, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.307316256251644e-05, "perf/tokens_per_sec": 26532.452593343052, "train/loss_math": 2.3344454765319824, "train/loss_code": 1.703198254108429, "train/loss_prose": 3.620927095413208} +{"step": 2279, "train/loss": 2.2844803631305695, "train/lm_loss": 2.2844803631305695, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.305664238076278e-05, "perf/tokens_per_sec": 25858.07029896552, "train/loss_code": 1.4900983572006226, "train/loss_math": 2.528813123703003, "train/loss_prose": 2.9159739017486572} +{"step": 2280, "train/loss": 2.473977267742157, "train/lm_loss": 2.473977267742157, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.3040118272299495e-05, "perf/tokens_per_sec": 25902.7496385197, "train/loss_math": 2.2374188899993896, "train/loss_prose": 3.53178334236145, "train/loss_code": 1.242105394601822} +{"step": 2281, "train/loss": 2.5877533555030823, "train/lm_loss": 2.5877533555030823, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.302359024518024e-05, "perf/tokens_per_sec": 25022.749587077793, "train/loss_prose": 3.5763254165649414, "train/loss_math": 2.1460620164871216, "train/loss_code": 1.3888018131256104} +{"step": 2282, "train/loss": 2.5181269347667694, "train/lm_loss": 2.5181269347667694, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.300705830746057e-05, "perf/tokens_per_sec": 25869.400969733473, "train/loss_code": 0.991859495639801, "train/loss_prose": 3.173439311981201, "train/loss_math": 2.2940995693206787} +{"step": 2283, "train/loss": 2.4192949533462524, "train/lm_loss": 2.4192949533462524, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.299052246719795e-05, "perf/tokens_per_sec": 26110.572862427543, "train/loss_prose": 3.339690923690796, "train/loss_code": 1.7692208290100098, "train/loss_math": 2.0138118267059326} +{"step": 2284, "train/loss": 2.067980855703354, "train/lm_loss": 2.067980855703354, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.2973982732451755e-05, "perf/tokens_per_sec": 25091.74943112277, "train/loss_math": 2.103983521461487, "train/loss_code": 1.4612831274668376, "train/loss_prose": 3.744062900543213} +{"step": 2285, "train/loss": 1.9632724523544312, "train/lm_loss": 1.9632724523544312, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.295743911128324e-05, "perf/tokens_per_sec": 26801.458934017675, "train/loss_math": 2.2221176624298096, "train/loss_code": 1.5405375719070435, "train/loss_prose": 3.559255838394165} +{"step": 2286, "train/loss": 2.1908904016017914, "train/lm_loss": 2.1908904016017914, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.2940891611755564e-05, "perf/tokens_per_sec": 26202.085787319193, "train/loss_prose": 3.8230910301208496, "train/loss_code": 2.0840753316879272, "train/loss_math": 1.7892435391743977} +{"step": 2287, "train/loss": 2.357084095478058, "train/lm_loss": 2.357084095478058, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.29243402419338e-05, "perf/tokens_per_sec": 27109.515360860692, "train/loss_code": 1.48406383395195, "train/loss_prose": 3.4966390132904053, "train/loss_math": 2.4305012226104736} +{"step": 2288, "train/loss": 2.0807557106018066, "train/lm_loss": 2.0807557106018066, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.290778500988485e-05, "perf/tokens_per_sec": 25498.91604143383, "train/loss_math": 2.307133340835571, "train/loss_code": 1.70345934232076} +{"step": 2289, "train/loss": 2.2507537603378296, "train/lm_loss": 2.2507537603378296, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.289122592367757e-05, "perf/tokens_per_sec": 26296.657305107834, "train/loss_math": 2.243531346321106, "train/loss_code": 1.8135789235432942, "train/loss_prose": 3.591167688369751} +{"step": 2290, "train/loss": 3.1223012804985046, "train/lm_loss": 3.1223012804985046, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.287466299138262e-05, "perf/tokens_per_sec": 26584.96527370498, "train/loss_prose": 3.4939361015955606, "train/loss_math": 2.5898940563201904, "train/loss_code": 1.4248995780944824} +{"step": 2291, "train/loss": 2.981272876262665, "train/lm_loss": 2.981272876262665, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.28580962210726e-05, "perf/tokens_per_sec": 26084.566236779214, "train/loss_prose": 3.3967517375946046, "train/loss_math": 2.288808027903239} +{"step": 2292, "train/loss": 2.1105879843235016, "train/lm_loss": 2.1105879843235016, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.2841525620821945e-05, "perf/tokens_per_sec": 25687.104144207722, "train/loss_math": 2.27773255109787, "train/loss_code": 1.2997188170750935, "train/loss_prose": 3.8746182918548584} +{"step": 2293, "train/loss": 2.6047961115837097, "train/lm_loss": 2.6047961115837097, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.2824951198706954e-05, "perf/tokens_per_sec": 25809.93822994757, "train/loss_math": 2.384841799736023, "train/loss_prose": 3.60253643989563, "train/loss_code": 1.7536920309066772} +{"step": 2294, "train/loss": 2.4327638149261475, "train/lm_loss": 2.4327638149261475, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.2808372962805816e-05, "perf/tokens_per_sec": 25345.619488244734, "train/loss_prose": 3.536114056905111, "train/loss_code": 1.7017797827720642, "train/loss_math": 2.04664945602417} +{"step": 2295, "train/loss": 2.382919579744339, "train/lm_loss": 2.382919579744339, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.279179092119855e-05, "perf/tokens_per_sec": 24904.858055724682, "train/loss_code": 1.4651284217834473, "train/loss_prose": 3.5114740133285522, "train/loss_math": 2.5483409563700357} +{"step": 2296, "train/loss": 2.676375150680542, "train/lm_loss": 2.676375150680542, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.277520508196705e-05, "perf/tokens_per_sec": 24389.74655197625, "train/loss_code": 1.5417977571487427, "train/loss_prose": 3.665869355201721, "train/loss_math": 2.122130870819092} +{"step": 2297, "train/loss": 3.2288478314876556, "train/lm_loss": 3.2288478314876556, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.2758615453195034e-05, "perf/tokens_per_sec": 24178.303233133815, "train/loss_math": 2.324803590774536, "train/loss_code": 1.4613010883331299, "train/loss_prose": 3.9439747810363768} +{"step": 2298, "train/loss": 2.439732998609543, "train/lm_loss": 2.439732998609543, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.2742022042968104e-05, "perf/tokens_per_sec": 26017.175076628584, "train/loss_prose": 3.791795094807943, "train/loss_code": 1.39389169216156, "train/loss_math": 1.9804014563560486} +{"step": 2299, "train/loss": 2.2005716264247894, "train/lm_loss": 2.2005716264247894, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.272542485937369e-05, "perf/tokens_per_sec": 25585.68915682747, "train/loss_math": 2.2088695526123048, "train/loss_code": 1.7723647356033325, "train/loss_prose": 3.0154964923858643} +{"step": 2300, "train/loss": 2.652606785297394, "train/lm_loss": 2.652606785297394, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.270882391050104e-05, "perf/tokens_per_sec": 25965.466549181358, "train/loss_code": 1.7135175863901775, "train/loss_prose": 3.762402375539144, "train/loss_math": 2.396546959877014} +{"step": 2301, "train/loss": 2.719410538673401, "train/lm_loss": 2.719410538673401, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.269221920444127e-05, "perf/tokens_per_sec": 25767.81965388665, "train/loss_prose": 3.6269710858662925, "train/loss_math": 2.174874258041382} +{"step": 2302, "train/loss": 2.2947841584682465, "train/lm_loss": 2.2947841584682465, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.26756107492873e-05, "perf/tokens_per_sec": 25697.74729259157, "train/loss_prose": 3.2084157466888428, "train/loss_code": 1.447044014930725, "train/loss_math": 2.195946753025055} +{"step": 2303, "train/loss": 2.634730100631714, "train/lm_loss": 2.634730100631714, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.2658998553133895e-05, "perf/tokens_per_sec": 27243.644053847045, "train/loss_prose": 3.4402033487955728, "train/loss_math": 2.235684633255005, "train/loss_code": 1.814491868019104} +{"step": 2304, "train/loss": 2.5077381432056427, "train/lm_loss": 2.5077381432056427, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.264238262407764e-05, "perf/tokens_per_sec": 26076.528947747884, "train/loss_math": 2.1977168718973794, "train/loss_code": 1.6007458766301472, "train/loss_prose": 4.333258152008057} +{"step": 2305, "train/loss": 2.465167284011841, "train/lm_loss": 2.465167284011841, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.262576297021695e-05, "perf/tokens_per_sec": 24889.487492864864, "train/loss_math": 2.6407476663589478, "train/loss_code": 1.9417516589164734, "train/loss_prose": 3.3364181518554688} +{"step": 2306, "train/loss": 2.85090035200119, "train/lm_loss": 2.85090035200119, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.260913959965201e-05, "perf/tokens_per_sec": 25266.780084684066, "train/loss_prose": 3.6332724690437317, "train/loss_code": 1.5361428260803223, "train/loss_math": 2.245990435282389} +{"step": 2307, "train/loss": 2.491879940032959, "train/lm_loss": 2.491879940032959, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.2592512520484856e-05, "perf/tokens_per_sec": 25385.80539075672, "train/loss_code": 1.5305817127227783, "train/loss_prose": 4.126981616020203, "train/loss_math": 2.363110065460205} +{"step": 2308, "train/loss": 2.032662183046341, "train/lm_loss": 2.032662183046341, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.2575881740819355e-05, "perf/tokens_per_sec": 26556.56867970494, "train/loss_math": 2.1015333334604898, "train/loss_prose": 4.237643718719482, "train/loss_code": 1.4297632426023483} +{"step": 2309, "train/loss": 1.9626088738441467, "train/lm_loss": 1.9626088738441467, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.2559247268761115e-05, "perf/tokens_per_sec": 26538.68248495409, "train/loss_math": 2.414539122581482, "train/loss_code": 1.2093915343284607} +{"step": 2310, "train/loss": 3.0464760065078735, "train/lm_loss": 3.0464760065078735, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.254260911241759e-05, "perf/tokens_per_sec": 26087.220025662245, "train/loss_code": 1.216368556022644, "train/loss_prose": 3.66339054107666, "train/loss_math": 2.4192429780960083} +{"step": 2311, "train/loss": 2.64752921462059, "train/lm_loss": 2.64752921462059, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.2525967279898015e-05, "perf/tokens_per_sec": 26413.21652391429, "train/loss_code": 1.3604915142059326, "train/loss_prose": 3.934566855430603} +{"step": 2312, "train/loss": 2.273911789059639, "train/lm_loss": 2.273911789059639, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.25093217793134e-05, "perf/tokens_per_sec": 25664.00391984046, "train/loss_code": 1.198794201016426, "train/loss_prose": 3.804105838139852, "train/loss_math": 1.9838001728057861} +{"step": 2313, "train/loss": 2.020632892847061, "train/lm_loss": 2.020632892847061, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.24926726187766e-05, "perf/tokens_per_sec": 26857.85580129413, "train/loss_code": 1.6407417356967926, "train/loss_math": 2.4538607597351074, "train/loss_prose": 3.866752862930298} +{"step": 2314, "train/loss": 2.070666939020157, "train/lm_loss": 2.070666939020157, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.247601980640217e-05, "perf/tokens_per_sec": 25533.630461050514, "train/loss_code": 1.5635454535484314, "train/loss_prose": 4.151655673980713, "train/loss_math": 2.297976016998291} +{"step": 2315, "train/loss": 1.7544080317020416, "train/lm_loss": 1.7544080317020416, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.245936335030651e-05, "perf/tokens_per_sec": 25659.979065599236, "train/loss_math": 2.0981741845607758, "train/loss_code": 1.4106418788433075} +{"step": 2316, "train/loss": 2.262934148311615, "train/lm_loss": 2.262934148311615, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.2442703258607766e-05, "perf/tokens_per_sec": 26053.11841217318, "train/loss_code": 1.4832396507263184, "train/loss_math": 2.37329363822937, "train/loss_prose": 3.2669365406036377} +{"step": 2317, "train/loss": 1.921017050743103, "train/lm_loss": 1.921017050743103, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.2426039539425876e-05, "perf/tokens_per_sec": 26745.714790119266, "train/loss_prose": 2.873664140701294, "train/loss_code": 1.5772662162780762, "train/loss_math": 2.3040707111358643} +{"step": 2318, "train/loss": 2.8333293795585632, "train/lm_loss": 2.8333293795585632, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.240937220088253e-05, "perf/tokens_per_sec": 25774.35294929555, "train/loss_prose": 3.595920503139496, "train/loss_math": 2.2219852209091187, "train/loss_code": 1.616997241973877} +{"step": 2319, "train/loss": 2.3448033928871155, "train/lm_loss": 2.3448033928871155, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.239270125110117e-05, "perf/tokens_per_sec": 25759.281109666703, "train/loss_code": 1.174515724182129, "train/loss_prose": 3.596327225367228, "train/loss_math": 2.222948908805847} +{"step": 2320, "train/loss": 2.040097236633301, "train/lm_loss": 2.040097236633301, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.237602669820704e-05, "perf/tokens_per_sec": 25696.55574385215, "train/loss_prose": 3.294663667678833, "train/loss_code": 1.5079540014266968, "train/loss_math": 2.191680431365967} +{"step": 2321, "train/loss": 2.210525691509247, "train/lm_loss": 2.210525691509247, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.235934855032709e-05, "perf/tokens_per_sec": 25953.855542580208, "train/loss_math": 2.1743775606155396, "train/loss_prose": 3.490447998046875, "train/loss_code": 1.3933922251065571} +{"step": 2322, "train/loss": 2.2667356133461, "train/lm_loss": 2.2667356133461, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.234266681559007e-05, "perf/tokens_per_sec": 26868.020687595104, "train/loss_math": 2.2355223298072815, "train/loss_prose": 3.115097999572754, "train/loss_code": 1.605653166770935} +{"step": 2323, "train/loss": 2.4546146988868713, "train/lm_loss": 2.4546146988868713, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.2325981502126433e-05, "perf/tokens_per_sec": 25675.433719364475, "train/loss_math": 2.2465256452560425, "train/loss_prose": 2.9433326721191406, "train/loss_code": 1.8208163976669312} +{"step": 2324, "train/loss": 1.9421918392181396, "train/lm_loss": 1.9421918392181396, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.230929261806842e-05, "perf/tokens_per_sec": 25993.123698253545, "train/loss_math": 2.124596357345581, "train/loss_prose": 2.712200403213501, "train/loss_code": 1.4423161347707112} +{"step": 2325, "train/loss": 2.6130001842975616, "train/lm_loss": 2.6130001842975616, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.229260017154997e-05, "perf/tokens_per_sec": 24691.560371899697, "train/loss_math": 1.9015569686889648, "train/loss_prose": 3.5407081246376038, "train/loss_code": 1.4690274000167847} +{"step": 2326, "train/loss": 2.2203001379966736, "train/lm_loss": 2.2203001379966736, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.2275904170706797e-05, "perf/tokens_per_sec": 26276.38622605206, "train/loss_prose": 3.530260960261027, "train/loss_code": 1.3213235437870026, "train/loss_math": 1.8863235712051392} +{"step": 2327, "train/loss": 2.347803056240082, "train/lm_loss": 2.347803056240082, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.225920462367632e-05, "perf/tokens_per_sec": 25795.483179505198, "train/loss_prose": 3.5705325603485107, "train/loss_code": 1.4886394739151, "train/loss_math": 2.1660199761390686} +{"step": 2328, "train/loss": 1.9508878886699677, "train/lm_loss": 1.9508878886699677, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.224250153859769e-05, "perf/tokens_per_sec": 25859.54934199138, "train/loss_code": 1.421282410621643, "train/loss_prose": 3.1768546104431152, "train/loss_math": 2.1469812393188477} +{"step": 2329, "train/loss": 2.427004784345627, "train/lm_loss": 2.427004784345627, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.222579492361179e-05, "perf/tokens_per_sec": 26689.53862167213, "train/loss_math": 2.2466225624084473, "train/loss_code": 1.5325806140899658, "train/loss_prose": 3.203669230143229} +{"step": 2330, "train/loss": 2.6179333329200745, "train/lm_loss": 2.6179333329200745, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.220908478686123e-05, "perf/tokens_per_sec": 26388.91408945265, "train/loss_code": 1.3754839897155762, "train/loss_prose": 3.6339526176452637, "train/loss_math": 2.4302136103312173} +{"step": 2331, "train/loss": 2.4992526471614838, "train/lm_loss": 2.4992526471614838, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.219237113649032e-05, "perf/tokens_per_sec": 25537.69169617348, "train/loss_math": 2.235492547353109, "train/loss_prose": 3.3772896925608316, "train/loss_code": 1.5778375267982483} +{"step": 2332, "train/loss": 2.3502789735794067, "train/lm_loss": 2.3502789735794067, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.217565398064509e-05, "perf/tokens_per_sec": 26130.19137545021, "train/loss_code": 1.3435160319010417, "train/loss_prose": 3.3960909843444824, "train/loss_math": 2.2917052507400513} +{"step": 2333, "train/loss": 2.202314108610153, "train/lm_loss": 2.202314108610153, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.215893332747328e-05, "perf/tokens_per_sec": 26951.56240871618, "train/loss_math": 2.252596298853556, "train/loss_code": 1.5519383748372395, "train/loss_prose": 3.10245418548584} +{"step": 2334, "train/loss": 2.6245917975902557, "train/lm_loss": 2.6245917975902557, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.214220918512434e-05, "perf/tokens_per_sec": 26261.685138646855, "train/loss_prose": 3.366306734085083, "train/loss_code": 1.3884002367655437} +{"step": 2335, "train/loss": 2.1632118225097656, "train/lm_loss": 2.1632118225097656, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.21254815617494e-05, "perf/tokens_per_sec": 25703.245222496345, "train/loss_math": 2.372082312901815, "train/loss_prose": 3.283867359161377, "train/loss_code": 1.2072374025980632} +{"step": 2336, "train/loss": 2.315706342458725, "train/lm_loss": 2.315706342458725, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.210875046550132e-05, "perf/tokens_per_sec": 26557.84132264622, "train/loss_math": 2.2051377296447754, "train/loss_code": 1.7411459982395172, "train/loss_prose": 3.5753955841064453} +{"step": 2337, "train/loss": 1.7633805871009827, "train/lm_loss": 1.7633805871009827, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.209201590453461e-05, "perf/tokens_per_sec": 25368.674298000024, "train/loss_code": 1.3458440601825714, "train/loss_math": 2.1809171438217163} +{"step": 2338, "train/loss": 1.9268777072429657, "train/lm_loss": 1.9268777072429657, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.207527788700551e-05, "perf/tokens_per_sec": 24406.066024735836, "train/loss_code": 1.5255484382311504, "train/loss_prose": 3.8914101123809814, "train/loss_math": 2.370321035385132} +{"step": 2339, "train/loss": 2.688633620738983, "train/lm_loss": 2.688633620738983, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.205853642107192e-05, "perf/tokens_per_sec": 25134.553081420185, "train/loss_math": 2.3121816317240396, "train/loss_prose": 3.539144595464071, "train/loss_code": 1.9775452613830566} +{"step": 2340, "train/loss": 3.155164062976837, "train/lm_loss": 3.155164062976837, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.2041791514893416e-05, "perf/tokens_per_sec": 24518.43204146758, "train/loss_prose": 3.797494077682495, "train/loss_math": 2.090390920639038, "train/loss_code": 2.0730602741241455} +{"step": 2341, "train/loss": 2.4079218804836273, "train/lm_loss": 2.4079218804836273, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.202504317663128e-05, "perf/tokens_per_sec": 24618.424672492212, "train/loss_prose": 3.579134464263916, "train/loss_math": 2.125203490257263, "train/loss_code": 1.07518070936203} +{"step": 2342, "train/loss": 2.5946543216705322, "train/lm_loss": 2.5946543216705322, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.200829141444844e-05, "perf/tokens_per_sec": 25763.76217377025, "train/loss_code": 1.3797533512115479, "train/loss_math": 2.247769753138224, "train/loss_prose": 3.75147279103597} +{"step": 2343, "train/loss": 2.370406538248062, "train/lm_loss": 2.370406538248062, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.19915362365095e-05, "perf/tokens_per_sec": 26318.370736445093, "train/loss_math": 2.0910616159439086, "train/loss_prose": 3.422890543937683, "train/loss_code": 1.66216242313385} +{"step": 2344, "train/loss": 2.693227231502533, "train/lm_loss": 2.693227231502533, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.1974777650980735e-05, "perf/tokens_per_sec": 25853.32293574392, "train/loss_code": 1.4602786302566528, "train/loss_prose": 3.3350082635879517, "train/loss_math": 2.2485020955403647} +{"step": 2345, "train/loss": 2.5179476737976074, "train/lm_loss": 2.5179476737976074, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.195801566603007e-05, "perf/tokens_per_sec": 26017.293278309924, "train/loss_math": 2.392094910144806, "train/loss_prose": 3.707703948020935, "train/loss_code": 1.5798963904380798} +{"step": 2346, "train/loss": 2.6927531957626343, "train/lm_loss": 2.6927531957626343, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.1941250289827104e-05, "perf/tokens_per_sec": 26195.613473003676, "train/loss_math": 2.0374938249588013, "train/loss_prose": 3.3480124473571777} +{"step": 2347, "train/loss": 2.07753449678421, "train/lm_loss": 2.07753449678421, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.192448153054306e-05, "perf/tokens_per_sec": 25039.16105637498, "train/loss_prose": 3.7623403072357178, "train/loss_math": 2.299177964528402, "train/loss_code": 1.4901005029678345} +{"step": 2348, "train/loss": 2.6161502301692963, "train/lm_loss": 2.6161502301692963, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.1907709396350844e-05, "perf/tokens_per_sec": 26125.86005982533, "train/loss_prose": 3.6661428610483804, "train/loss_math": 2.1508579552173615, "train/loss_code": 1.3273411989212036} +{"step": 2349, "train/loss": 2.148153305053711, "train/lm_loss": 2.148153305053711, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.1890933895424976e-05, "perf/tokens_per_sec": 25795.018406492345, "train/loss_code": 1.4552113115787506, "train/loss_prose": 3.5408557653427124, "train/loss_math": 2.1413344144821167} +{"step": 2350, "train/loss": 2.2849573493003845, "train/lm_loss": 2.2849573493003845, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.187415503594166e-05, "perf/tokens_per_sec": 25069.34131044486, "train/loss_prose": 3.771178722381592, "train/loss_code": 1.6747814416885376, "train/loss_math": 2.0190874338150024} +{"step": 2351, "train/loss": 2.6895903944969177, "train/lm_loss": 2.6895903944969177, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.185737282607867e-05, "perf/tokens_per_sec": 26203.524513751618, "train/loss_prose": 3.753019094467163, "train/loss_math": 2.329797697067261, "train/loss_code": 2.3616955280303955} +{"step": 2352, "train/loss": 2.4466117918491364, "train/lm_loss": 2.4466117918491364, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.184058727401546e-05, "perf/tokens_per_sec": 26650.454727234086, "train/loss_math": 2.2057971954345703, "train/loss_code": 1.3760568300882976, "train/loss_prose": 3.6777097384134927} +{"step": 2353, "train/loss": 2.017739027738571, "train/lm_loss": 2.017739027738571, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.1823798387933134e-05, "perf/tokens_per_sec": 25194.229016992304, "train/loss_code": 1.5614297091960907, "train/loss_math": 2.3018381595611572, "train/loss_prose": 4.471496105194092} +{"step": 2354, "train/loss": 2.2508651316165924, "train/lm_loss": 2.2508651316165924, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.180700617601436e-05, "perf/tokens_per_sec": 25014.51549513975, "train/loss_prose": 3.368475079536438, "train/loss_code": 1.8572203993797303, "train/loss_math": 1.9838685989379883} +{"step": 2355, "train/loss": 2.377683162689209, "train/lm_loss": 2.377683162689209, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.179021064644347e-05, "perf/tokens_per_sec": 26357.254258922872, "train/loss_code": 1.859169453382492, "train/loss_math": 2.317949056625366, "train/loss_prose": 3.4744449853897095} +{"step": 2356, "train/loss": 2.6236554980278015, "train/lm_loss": 2.6236554980278015, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.17734118074064e-05, "perf/tokens_per_sec": 25664.61734182453, "train/loss_math": 2.364782214164734, "train/loss_code": 1.9019657969474792, "train/loss_prose": 3.863092064857483} +{"step": 2357, "train/loss": 1.7568345069885254, "train/lm_loss": 1.7568345069885254, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.1756609667090696e-05, "perf/tokens_per_sec": 25032.192670669712, "train/loss_math": 2.0847095251083374, "train/loss_code": 1.415886402130127, "train/loss_prose": 2.8058245182037354} +{"step": 2358, "train/loss": 2.723572313785553, "train/lm_loss": 2.723572313785553, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.173980423368553e-05, "perf/tokens_per_sec": 26418.82176078636, "train/loss_prose": 3.8925840258598328, "train/loss_code": 1.3649174372355144, "train/loss_math": 2.1234896183013916} +{"step": 2359, "train/loss": 2.4200882613658905, "train/lm_loss": 2.4200882613658905, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.172299551538164e-05, "perf/tokens_per_sec": 26003.392248568893, "train/loss_prose": 3.3616600036621094, "train/loss_code": 1.7750197350978851, "train/loss_math": 2.1756465435028076} +{"step": 2360, "train/loss": 2.079800933599472, "train/lm_loss": 2.079800933599472, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.170618352037142e-05, "perf/tokens_per_sec": 25932.152013681654, "train/loss_prose": 3.0841981569925943, "train/loss_code": 1.1622546116511028, "train/loss_math": 1.9495246410369873} +{"step": 2361, "train/loss": 2.2838281393051147, "train/lm_loss": 2.2838281393051147, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.168936825684882e-05, "perf/tokens_per_sec": 25673.1315989641, "train/loss_math": 2.0710333387056985, "train/loss_code": 1.9595164060592651, "train/loss_prose": 3.8849079608917236} +{"step": 2362, "train/loss": 2.8209158182144165, "train/lm_loss": 2.8209158182144165, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.1672549733009396e-05, "perf/tokens_per_sec": 25823.128536427725, "train/loss_math": 2.306343364715576, "train/loss_prose": 3.6785367329915366} +{"step": 2363, "train/loss": 2.223816990852356, "train/lm_loss": 2.223816990852356, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.1655727957050285e-05, "perf/tokens_per_sec": 25346.853504989, "train/loss_code": 1.4527277112007142, "train/loss_prose": 3.5089661280314126} +{"step": 2364, "train/loss": 2.1790126860141754, "train/lm_loss": 2.1790126860141754, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.163890293717022e-05, "perf/tokens_per_sec": 26350.705069251653, "train/loss_prose": 2.9740301370620728, "train/loss_math": 2.3489197492599487, "train/loss_code": 1.696550577878952} +{"step": 2365, "train/loss": 2.5160850286483765, "train/lm_loss": 2.5160850286483765, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.162207468156952e-05, "perf/tokens_per_sec": 25746.889044750173, "train/loss_math": 2.1151464581489563, "train/loss_code": 1.9296766519546509, "train/loss_prose": 3.2461390495300293} +{"step": 2366, "train/loss": 2.951542466878891, "train/lm_loss": 2.951542466878891, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.1605243198450066e-05, "perf/tokens_per_sec": 25118.27344036704, "train/loss_math": 2.36957049369812, "train/loss_prose": 4.092417359352112, "train/loss_code": 1.2517637014389038} +{"step": 2367, "train/loss": 2.1824911236763, "train/lm_loss": 2.1824911236763, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.158840849601532e-05, "perf/tokens_per_sec": 26574.396787536236, "train/loss_prose": 3.049365441004435, "train/loss_math": 2.136235475540161, "train/loss_code": 1.3464541832605998} +{"step": 2368, "train/loss": 2.411224842071533, "train/lm_loss": 2.411224842071533, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.15715705824703e-05, "perf/tokens_per_sec": 25793.043308020526, "train/loss_math": 2.227308988571167, "train/loss_code": 1.5007789929707844, "train/loss_prose": 3.4442808628082275} +{"step": 2369, "train/loss": 3.2578232288360596, "train/lm_loss": 3.2578232288360596, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.155472946602162e-05, "perf/tokens_per_sec": 27005.281921256526, "train/loss_prose": 3.727015256881714, "train/loss_math": 2.4758365948994956} +{"step": 2370, "train/loss": 1.6310851871967316, "train/lm_loss": 1.6310851871967316, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.153788515487742e-05, "perf/tokens_per_sec": 26065.213877480426, "train/loss_code": 1.5100426844188146, "train/loss_math": 2.4783835411071777} +{"step": 2371, "train/loss": 2.1518024802207947, "train/lm_loss": 2.1518024802207947, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.152103765724743e-05, "perf/tokens_per_sec": 26099.822987768843, "train/loss_prose": 3.707175135612488, "train/loss_code": 1.4204164743423462, "train/loss_math": 2.6979873180389404} +{"step": 2372, "train/loss": 2.4878940284252167, "train/lm_loss": 2.4878940284252167, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.150418698134289e-05, "perf/tokens_per_sec": 25552.16322572489, "train/loss_code": 1.385295808315277, "train/loss_prose": 3.38462503751119, "train/loss_math": 2.326228698094686} +{"step": 2373, "train/loss": 3.04656982421875, "train/lm_loss": 3.04656982421875, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.148733313537664e-05, "perf/tokens_per_sec": 26132.854456537447, "train/loss_math": 2.057185709476471, "train/loss_prose": 3.8445364952087404, "train/loss_code": 1.0355033874511719} +{"step": 2374, "train/loss": 2.9200238585472107, "train/lm_loss": 2.9200238585472107, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.147047612756302e-05, "perf/tokens_per_sec": 26721.545045899813, "train/loss_prose": 3.8977527618408203, "train/loss_code": 1.2904757658640544} +{"step": 2375, "train/loss": 1.91024649143219, "train/lm_loss": 1.91024649143219, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.145361596611795e-05, "perf/tokens_per_sec": 25092.812247956263, "train/loss_math": 2.341918706893921, "train/loss_code": 1.520885157585144, "train/loss_prose": 2.6678136587142944} +{"step": 2376, "train/loss": 2.472549945116043, "train/lm_loss": 2.472549945116043, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.143675265925885e-05, "perf/tokens_per_sec": 25269.864771363347, "train/loss_code": 1.0558199286460876, "train/loss_math": 2.2893826961517334, "train/loss_prose": 3.6002036730448403} +{"step": 2377, "train/loss": 3.148259699344635, "train/lm_loss": 3.148259699344635, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.1419886215204694e-05, "perf/tokens_per_sec": 24684.500349148966, "train/loss_math": 2.5038310289382935, "train/loss_prose": 3.616975498199463, "train/loss_code": 2.093539237976074} +{"step": 2378, "train/loss": 2.484486997127533, "train/lm_loss": 2.484486997127533, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.140301664217599e-05, "perf/tokens_per_sec": 25631.95512135734, "train/loss_code": 1.4872728288173676, "train/loss_prose": 3.4817013144493103} +{"step": 2379, "train/loss": 2.2249256670475006, "train/lm_loss": 2.2249256670475006, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.138614394839476e-05, "perf/tokens_per_sec": 25149.270671452578, "train/loss_code": 1.4157041311264038, "train/loss_prose": 3.3735433419545493, "train/loss_math": 2.015958309173584} +{"step": 2380, "train/loss": 1.8310819566249847, "train/lm_loss": 1.8310819566249847, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.1369268142084556e-05, "perf/tokens_per_sec": 25215.93574731033, "train/loss_code": 1.6526213884353638, "train/loss_math": 2.3664634227752686} +{"step": 2381, "train/loss": 2.5749974250793457, "train/lm_loss": 2.5749974250793457, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.135238923147043e-05, "perf/tokens_per_sec": 26754.71159665174, "train/loss_prose": 3.2619997024536134, "train/loss_math": 1.8561688661575317, "train/loss_code": 1.2169058322906494} +{"step": 2382, "train/loss": 2.961706817150116, "train/lm_loss": 2.961706817150116, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.133550722477896e-05, "perf/tokens_per_sec": 25696.824793586216, "train/loss_prose": 3.577755641937256, "train/loss_math": 1.9678807854652405, "train/loss_code": 1.8691147565841675} +{"step": 2383, "train/loss": 2.079442322254181, "train/lm_loss": 2.079442322254181, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.1318622130238236e-05, "perf/tokens_per_sec": 25689.754500974963, "train/loss_code": 1.340517481168111, "train/loss_math": 2.031526207923889, "train/loss_prose": 3.259703516960144} +{"step": 2384, "train/loss": 2.641090005636215, "train/lm_loss": 2.641090005636215, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.130173395607785e-05, "perf/tokens_per_sec": 25678.7725273904, "train/loss_prose": 3.648281216621399, "train/loss_code": 1.3905775547027588, "train/loss_math": 2.363860845565796} +{"step": 2385, "train/loss": 2.235561579465866, "train/lm_loss": 2.235561579465866, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.1284842710528876e-05, "perf/tokens_per_sec": 26889.467769174542, "train/loss_prose": 3.192373037338257, "train/loss_code": 1.7695600986480713, "train/loss_math": 2.210753083229065} +{"step": 2386, "train/loss": 2.6756765246391296, "train/lm_loss": 2.6756765246391296, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.126794840182392e-05, "perf/tokens_per_sec": 25080.32068018114, "train/loss_math": 2.088227152824402, "train/loss_prose": 3.518498122692108, "train/loss_code": 1.577481985092163} +{"step": 2387, "train/loss": 2.88634592294693, "train/lm_loss": 2.88634592294693, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.1251051038197055e-05, "perf/tokens_per_sec": 26088.725399647996, "train/loss_prose": 3.559911346435547, "train/loss_math": 2.120893716812134, "train/loss_code": 1.0494226217269897} +{"step": 2388, "train/loss": 2.9627758860588074, "train/lm_loss": 2.9627758860588074, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.123415062788385e-05, "perf/tokens_per_sec": 25643.241557293975, "train/loss_prose": 3.736797904968262, "train/loss_math": 2.2924416065216064, "train/loss_code": 1.3628877997398376} +{"step": 2389, "train/loss": 2.519689530134201, "train/lm_loss": 2.519689530134201, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.121724717912138e-05, "perf/tokens_per_sec": 25101.86816597166, "train/loss_prose": 3.5334432125091553, "train/loss_code": 1.6757395267486572, "train/loss_math": 2.264983296394348} +{"step": 2390, "train/loss": 2.10883766412735, "train/lm_loss": 2.10883766412735, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.120034070014814e-05, "perf/tokens_per_sec": 26429.266842247765, "train/loss_code": 1.6430659770965577, "train/loss_math": 1.976308822631836, "train/loss_prose": 3.339530825614929} +{"step": 2391, "train/loss": 2.280742257833481, "train/lm_loss": 2.280742257833481, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.118343119920418e-05, "perf/tokens_per_sec": 26458.77710235682, "train/loss_prose": 3.449871381123861, "train/loss_math": 2.056083917617798, "train/loss_code": 1.4600602388381958} +{"step": 2392, "train/loss": 2.644361913204193, "train/lm_loss": 2.644361913204193, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.116651868453097e-05, "perf/tokens_per_sec": 25990.96086197454, "train/loss_math": 2.0811673402786255, "train/loss_prose": 3.437720835208893, "train/loss_code": 1.6208381652832031} +{"step": 2393, "train/loss": 2.6298716068267822, "train/lm_loss": 2.6298716068267822, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.114960316437145e-05, "perf/tokens_per_sec": 25847.799511024517, "train/loss_prose": 3.459304928779602, "train/loss_code": 1.76118008295695, "train/loss_math": 1.918213129043579} +{"step": 2394, "train/loss": 2.5096103847026825, "train/lm_loss": 2.5096103847026825, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.1132684646970064e-05, "perf/tokens_per_sec": 26244.03346063365, "train/loss_math": 2.243498961130778, "train/loss_code": 1.176296889781952, "train/loss_prose": 3.664597511291504} +{"step": 2395, "train/loss": 2.2724294662475586, "train/lm_loss": 2.2724294662475586, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.111576314057268e-05, "perf/tokens_per_sec": 26051.22210276512, "train/loss_prose": 3.3479626178741455, "train/loss_math": 2.2540109952290854, "train/loss_code": 1.5738259156545003} +{"step": 2396, "train/loss": 2.2169487178325653, "train/lm_loss": 2.2169487178325653, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.1098838653426645e-05, "perf/tokens_per_sec": 26080.645954812433, "train/loss_prose": 3.4072872400283813, "train/loss_code": 1.7703686714172364, "train/loss_math": 2.0691723823547363} +{"step": 2397, "train/loss": 2.5121983885765076, "train/lm_loss": 2.5121983885765076, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.108191119378073e-05, "perf/tokens_per_sec": 26189.623439739018, "train/loss_code": 1.6498839259147644, "train/loss_math": 2.27005942662557, "train/loss_prose": 3.3292133808135986} +{"step": 2398, "train/loss": 2.3528853952884674, "train/lm_loss": 2.3528853952884674, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.1064980769885187e-05, "perf/tokens_per_sec": 27297.40765445477, "train/loss_math": 2.1909748315811157, "train/loss_code": 1.7972051501274109, "train/loss_prose": 3.2323867082595825} +{"step": 2399, "train/loss": 2.5726987421512604, "train/lm_loss": 2.5726987421512604, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.104804738999169e-05, "perf/tokens_per_sec": 26319.660973222068, "train/loss_code": 1.264173646767934, "train/loss_math": 2.3366503715515137, "train/loss_prose": 4.038589318593343} +{"step": 2400, "train/loss": 2.6330870389938354, "train/lm_loss": 2.6330870389938354, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.1031111062353373e-05, "perf/tokens_per_sec": 26906.270843904855, "train/loss_prose": 3.3429526686668396, "train/loss_math": 2.235073685646057, "train/loss_code": 1.6113696098327637} +{"step": 2400, "eval/loss": 2.1975392281138078, "eval/lm_loss": 2.1975392281138078, "eval/lb_loss": 0.0, "eval/z_loss": 0.0, "eval/perplexity": 9.00283230256803, "eval/loss_code": 1.5806722537567626, "eval/ppl_code": 4.8582206708045526, "eval/loss_prose": 3.489374045740094, "eval/ppl_prose": 32.76543162433443, "eval/loss_math": 2.1135573643179693, "eval/ppl_math": 8.277635535890601} +{"step": 2401, "train/loss": 2.6282317340373993, "train/lm_loss": 2.6282317340373993, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.101417179522479e-05, "perf/tokens_per_sec": 26641.817091212477, "train/loss_prose": 3.8722027937571206, "train/loss_code": 1.881848967075348} +{"step": 2402, "train/loss": 2.4046952724456787, "train/lm_loss": 2.4046952724456787, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.0997229596861944e-05, "perf/tokens_per_sec": 26262.52819880182, "train/loss_math": 2.3433937430381775, "train/loss_code": 1.7898492813110352, "train/loss_prose": 3.1421449184417725} +{"step": 2403, "train/loss": 2.4761966466903687, "train/lm_loss": 2.4761966466903687, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.098028447552224e-05, "perf/tokens_per_sec": 26522.66282152026, "train/loss_prose": 3.7856268882751465, "train/loss_math": 1.8415493567784627, "train/loss_code": 1.4640222787857056} +{"step": 2404, "train/loss": 2.2286141216754913, "train/lm_loss": 2.2286141216754913, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.0963336439464526e-05, "perf/tokens_per_sec": 26012.13280521337, "train/loss_code": 1.6718440453211467, "train/loss_math": 2.3054521083831787, "train/loss_prose": 3.5915727615356445} +{"step": 2405, "train/loss": 3.0073387026786804, "train/lm_loss": 3.0073387026786804, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.094638549694908e-05, "perf/tokens_per_sec": 25783.52060896397, "train/loss_prose": 3.6024780750274656, "train/loss_math": 2.4409079551696777, "train/loss_code": 1.1645030975341797} +{"step": 2406, "train/loss": 1.8185099065303802, "train/lm_loss": 1.8185099065303802, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.092943165623758e-05, "perf/tokens_per_sec": 25933.1306327249, "train/loss_math": 1.9714897473653157, "train/loss_code": 1.2320128083229065, "train/loss_prose": 3.705559253692627} +{"step": 2407, "train/loss": 2.361888974905014, "train/lm_loss": 2.361888974905014, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.091247492559312e-05, "perf/tokens_per_sec": 27006.93757231249, "train/loss_prose": 3.2695931593577066, "train/loss_code": 1.7680679857730865, "train/loss_math": 2.0140607357025146} +{"step": 2408, "train/loss": 1.833167314529419, "train/lm_loss": 1.833167314529419, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.089551531328021e-05, "perf/tokens_per_sec": 26451.973726430926, "train/loss_math": 2.2982369661331177, "train/loss_code": 1.6781440774599712} +{"step": 2409, "train/loss": 2.360549569129944, "train/lm_loss": 2.360549569129944, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.087855282756475e-05, "perf/tokens_per_sec": 26148.804549135166, "train/loss_code": 1.6072063148021698, "train/loss_prose": 3.4795737266540527, "train/loss_math": 2.0168490409851074} +{"step": 2410, "train/loss": 2.1088928878307343, "train/lm_loss": 2.1088928878307343, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.086158747671406e-05, "perf/tokens_per_sec": 27188.628774270943, "train/loss_code": 1.724291721979777, "train/loss_prose": 4.2017059326171875, "train/loss_math": 2.323686361312866} +{"step": 2411, "train/loss": 2.552410215139389, "train/lm_loss": 2.552410215139389, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.0844619268996845e-05, "perf/tokens_per_sec": 26244.755109211397, "train/loss_code": 1.58487206697464, "train/loss_prose": 3.5438581307729087, "train/loss_math": 2.2059876124064126} +{"step": 2412, "train/loss": 2.4845688343048096, "train/lm_loss": 2.4845688343048096, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.08276482126832e-05, "perf/tokens_per_sec": 26771.8889659396, "train/loss_math": 2.138693928718567, "train/loss_prose": 3.521920680999756, "train/loss_code": 1.677800138791402} +{"step": 2413, "train/loss": 2.9079126715660095, "train/lm_loss": 2.9079126715660095, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.08106743160446e-05, "perf/tokens_per_sec": 26360.853656253694, "train/loss_prose": 3.604427528381348, "train/loss_code": 1.7470542987187703} +{"step": 2414, "train/loss": 2.7290529012680054, "train/lm_loss": 2.7290529012680054, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.079369758735393e-05, "perf/tokens_per_sec": 26955.114503625162, "train/loss_code": 1.4928923447926838, "train/loss_prose": 3.6051692366600037, "train/loss_math": 2.9330697059631348} +{"step": 2415, "train/loss": 2.1040660440921783, "train/lm_loss": 2.1040660440921783, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.0776718034885454e-05, "perf/tokens_per_sec": 27151.84633012661, "train/loss_prose": 3.2620259523391724, "train/loss_math": 2.305420160293579, "train/loss_code": 1.4244091510772705} +{"step": 2416, "train/loss": 2.1426391303539276, "train/lm_loss": 2.1426391303539276, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.075973566691477e-05, "perf/tokens_per_sec": 27100.064649447424, "train/loss_math": 2.124346057573954, "train/loss_code": 1.5382690131664276, "train/loss_prose": 4.614998817443848} +{"step": 2417, "train/loss": 2.668652892112732, "train/lm_loss": 2.668652892112732, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.074275049171889e-05, "perf/tokens_per_sec": 25931.799728906477, "train/loss_prose": 3.6879798571268716, "train/loss_code": 1.8081159591674805, "train/loss_math": 2.2230171362559} +{"step": 2418, "train/loss": 2.6870368123054504, "train/lm_loss": 2.6870368123054504, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.0725762517576195e-05, "perf/tokens_per_sec": 25829.37923921525, "train/loss_code": 1.8694543242454529, "train/loss_math": 1.9982913136482239, "train/loss_prose": 3.4402007460594177} +{"step": 2419, "train/loss": 2.6384327113628387, "train/lm_loss": 2.6384327113628387, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.0708771752766394e-05, "perf/tokens_per_sec": 26353.251589567222, "train/loss_prose": 3.4951539635658264, "train/loss_code": 1.5520177284876506, "train/loss_math": 2.470791816711426} +{"step": 2420, "train/loss": 2.8750347793102264, "train/lm_loss": 2.8750347793102264, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.06917782055706e-05, "perf/tokens_per_sec": 26047.62777383573, "train/loss_prose": 4.378093878428142, "train/loss_math": 2.127357095479965, "train/loss_code": 1.3565678596496582} +{"step": 2421, "train/loss": 2.146057903766632, "train/lm_loss": 2.146057903766632, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.0674781884271254e-05, "perf/tokens_per_sec": 26041.902595266645, "train/loss_code": 1.6900060176849365, "train/loss_prose": 3.3587030172348022, "train/loss_math": 2.0010273456573486} +{"step": 2422, "train/loss": 3.3339288234710693, "train/lm_loss": 3.3339288234710693, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.065778279715215e-05, "perf/tokens_per_sec": 26383.48360464402, "train/loss_prose": 3.6539373795191445, "train/loss_math": 2.3739030361175537} +{"step": 2423, "train/loss": 2.4283530712127686, "train/lm_loss": 2.4283530712127686, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.064078095249844e-05, "perf/tokens_per_sec": 26563.54918932626, "train/loss_prose": 3.3740787506103516, "train/loss_code": 1.1876096526781719, "train/loss_math": 2.3676795959472656} +{"step": 2424, "train/loss": 2.215705305337906, "train/lm_loss": 2.215705305337906, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.062377635859663e-05, "perf/tokens_per_sec": 26348.401498710944, "train/loss_code": 1.2760247141122818, "train/loss_prose": 3.4832756519317627, "train/loss_math": 2.1717162132263184} +{"step": 2425, "train/loss": 2.5199843049049377, "train/lm_loss": 2.5199843049049377, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.0606769023734536e-05, "perf/tokens_per_sec": 26933.60484118772, "train/loss_math": 2.189152777194977, "train/loss_prose": 3.517301162083944, "train/loss_code": 0.8513591885566711} +{"step": 2426, "train/loss": 2.1973437070846558, "train/lm_loss": 2.1973437070846558, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.0589758956201327e-05, "perf/tokens_per_sec": 26026.989472472233, "train/loss_prose": 3.361854076385498, "train/loss_math": 2.3774004777272544, "train/loss_code": 1.2409464319547017} +{"step": 2427, "train/loss": 2.6663201451301575, "train/lm_loss": 2.6663201451301575, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.0572746164287514e-05, "perf/tokens_per_sec": 26285.67325649535, "train/loss_code": 2.137967050075531, "train/loss_prose": 3.2294846375783286, "train/loss_math": 2.455390532811483} +{"step": 2428, "train/loss": 2.3037689328193665, "train/lm_loss": 2.3037689328193665, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.0555730656284914e-05, "perf/tokens_per_sec": 26259.316830699485, "train/loss_math": 2.082141160964966, "train/loss_code": 1.5052919685840607, "train/loss_prose": 3.4422806898752847} +{"step": 2429, "train/loss": 2.184976577758789, "train/lm_loss": 2.184976577758789, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.053871244048669e-05, "perf/tokens_per_sec": 26315.9518834124, "train/loss_code": 1.4712746540705364, "train/loss_math": 2.265559136867523, "train/loss_prose": 4.003751754760742} +{"step": 2430, "train/loss": 2.2543719708919525, "train/lm_loss": 2.2543719708919525, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.052169152518729e-05, "perf/tokens_per_sec": 26195.573530375965, "train/loss_code": 1.944686198234558, "train/loss_prose": 3.8024163246154785, "train/loss_math": 2.254563808441162} +{"step": 2431, "train/loss": 2.22665998339653, "train/lm_loss": 2.22665998339653, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.050466791868254e-05, "perf/tokens_per_sec": 26329.906716629503, "train/loss_code": 1.711009830236435, "train/loss_math": 2.3731165726979575, "train/loss_prose": 3.849891424179077} +{"step": 2432, "train/loss": 2.161308616399765, "train/lm_loss": 2.161308616399765, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.0487641629269516e-05, "perf/tokens_per_sec": 26100.655687494684, "train/loss_code": 1.3609329164028168, "train/loss_prose": 3.7470593452453613, "train/loss_math": 2.17630934715271} +{"step": 2433, "train/loss": 2.596899390220642, "train/lm_loss": 2.596899390220642, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.0470612665246618e-05, "perf/tokens_per_sec": 26098.752451903794, "train/loss_code": 1.6195734143257141, "train/loss_math": 2.1442585587501526, "train/loss_prose": 3.311882972717285} +{"step": 2434, "train/loss": 2.651914656162262, "train/lm_loss": 2.651914656162262, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.045358103491357e-05, "perf/tokens_per_sec": 26112.160309822077, "train/loss_prose": 3.4126046657562257, "train/loss_code": 1.3840981523195903} +{"step": 2435, "train/loss": 2.252135843038559, "train/lm_loss": 2.252135843038559, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.0436546746571372e-05, "perf/tokens_per_sec": 26441.103738258782, "train/loss_prose": 3.4270924727121987, "train/loss_code": 1.4100931137800217, "train/loss_math": 2.0954372882843018} +{"step": 2436, "train/loss": 2.2317567467689514, "train/lm_loss": 2.2317567467689514, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.0419509808522334e-05, "perf/tokens_per_sec": 27348.466665817657, "train/loss_prose": 3.145692467689514, "train/loss_code": 1.5968042016029358, "train/loss_math": 2.0922650694847107} +{"step": 2437, "train/loss": 2.1260218918323517, "train/lm_loss": 2.1260218918323517, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.0402470229070056e-05, "perf/tokens_per_sec": 27379.368395553607, "train/loss_math": 2.301645517349243, "train/loss_prose": 3.0155611832936606, "train/loss_code": 1.1194002230962117} +{"step": 2438, "train/loss": 2.166298508644104, "train/lm_loss": 2.166298508644104, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.038542801651941e-05, "perf/tokens_per_sec": 27405.57397248255, "train/loss_prose": 3.3965940475463867, "train/loss_math": 2.1425666213035583, "train/loss_code": 0.9834668040275574} +{"step": 2439, "train/loss": 2.658250868320465, "train/lm_loss": 2.658250868320465, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.0368383179176585e-05, "perf/tokens_per_sec": 27341.372178111775, "train/loss_code": 1.3420735597610474, "train/loss_prose": 3.663769543170929, "train/loss_math": 1.9633904695510864} +{"step": 2440, "train/loss": 2.2015777230262756, "train/lm_loss": 2.2015777230262756, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.0351335725349e-05, "perf/tokens_per_sec": 27367.592229328788, "train/loss_prose": 3.165226697921753, "train/loss_math": 2.20902156829834, "train/loss_code": 1.8704359928766887} +{"step": 2441, "train/loss": 2.4551514387130737, "train/lm_loss": 2.4551514387130737, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.0334285663345404e-05, "perf/tokens_per_sec": 27243.384840320516, "train/loss_math": 2.119540959596634, "train/loss_prose": 3.3659070332845054, "train/loss_code": 1.06532621383667} +{"step": 2442, "train/loss": 2.2035392820835114, "train/lm_loss": 2.2035392820835114, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.031723300147577e-05, "perf/tokens_per_sec": 26692.73166608661, "train/loss_code": 1.355973243713379, "train/loss_math": 2.3528877099355063, "train/loss_prose": 3.250866174697876} +{"step": 2443, "train/loss": 2.720352917909622, "train/lm_loss": 2.720352917909622, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.0300177748051373e-05, "perf/tokens_per_sec": 27273.443249215765, "train/loss_prose": 3.3951289653778076, "train/loss_math": 1.9345279335975647, "train/loss_code": 2.156625509262085} +{"step": 2444, "train/loss": 2.0897298455238342, "train/lm_loss": 2.0897298455238342, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.028311991138472e-05, "perf/tokens_per_sec": 27127.49416786015, "train/loss_math": 2.1294151544570923, "train/loss_code": 1.970673680305481} +{"step": 2445, "train/loss": 2.729048043489456, "train/lm_loss": 2.729048043489456, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.02660594997896e-05, "perf/tokens_per_sec": 24842.87870711932, "train/loss_math": 2.2947731614112854, "train/loss_prose": 3.413049817085266, "train/loss_code": 1.7953194677829742} +{"step": 2446, "train/loss": 2.8121180534362793, "train/lm_loss": 2.8121180534362793, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.024899652158107e-05, "perf/tokens_per_sec": 26047.232853118567, "train/loss_prose": 3.7147756218910217, "train/loss_code": 0.8619899153709412, "train/loss_math": 2.2586172421773276} +{"step": 2447, "train/loss": 2.5699002146720886, "train/lm_loss": 2.5699002146720886, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.023193098507538e-05, "perf/tokens_per_sec": 26233.734249332316, "train/loss_math": 2.3054753144582114, "train/loss_prose": 3.3889753023783364, "train/loss_code": 1.7379254698753357} +{"step": 2448, "train/loss": 2.521759659051895, "train/lm_loss": 2.521759659051895, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.0214862898590095e-05, "perf/tokens_per_sec": 24996.135876420958, "train/loss_code": 1.4805806279182434, "train/loss_prose": 3.458587328592936, "train/loss_math": 2.2790513038635254} +{"step": 2449, "train/loss": 2.7670478224754333, "train/lm_loss": 2.7670478224754333, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.0197792270443982e-05, "perf/tokens_per_sec": 26810.66064904765, "train/loss_math": 2.2067281007766724, "train/loss_prose": 3.6293959617614746, "train/loss_code": 1.602671504020691} +{"step": 2450, "train/loss": 2.110782116651535, "train/lm_loss": 2.110782116651535, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.0180719108957063e-05, "perf/tokens_per_sec": 26903.911250869136, "train/loss_math": 1.818814794222514, "train/loss_code": 1.4231400887171428, "train/loss_prose": 3.580196738243103} +{"step": 2451, "train/loss": 2.6356208324432373, "train/lm_loss": 2.6356208324432373, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.016364342245059e-05, "perf/tokens_per_sec": 26150.63548050256, "train/loss_code": 1.256498247385025, "train/loss_prose": 3.3663127422332764, "train/loss_math": 2.5533595085144043} +{"step": 2452, "train/loss": 2.4929230213165283, "train/lm_loss": 2.4929230213165283, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.0146565219247036e-05, "perf/tokens_per_sec": 26781.487871073747, "train/loss_math": 2.203132450580597, "train/loss_prose": 3.374736229578654, "train/loss_code": 1.0066457986831665} +{"step": 2453, "train/loss": 2.956968903541565, "train/lm_loss": 2.956968903541565, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.0129484507670115e-05, "perf/tokens_per_sec": 26705.054854590024, "train/loss_prose": 3.383086013793945, "train/loss_math": 2.057687997817993, "train/loss_code": 2.3413166999816895} +{"step": 2454, "train/loss": 3.223022699356079, "train/lm_loss": 3.223022699356079, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.0112401296044757e-05, "perf/tokens_per_sec": 26851.768954242238, "train/loss_prose": 3.9105029582977293, "train/loss_code": 1.7678616046905518, "train/loss_math": 2.231902241706848} +{"step": 2455, "train/loss": 2.6683064699172974, "train/lm_loss": 2.6683064699172974, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.0095315592697126e-05, "perf/tokens_per_sec": 27036.85914287153, "train/loss_prose": 3.328027089436849, "train/loss_math": 2.409133195877075, "train/loss_code": 1.725837230682373} +{"step": 2456, "train/loss": 2.3183594346046448, "train/lm_loss": 2.3183594346046448, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.0078227405954557e-05, "perf/tokens_per_sec": 26968.570205939097, "train/loss_prose": 3.743609666824341, "train/loss_code": 1.7202134132385254, "train/loss_math": 2.4585890769958496} +{"step": 2457, "train/loss": 2.6856846809387207, "train/lm_loss": 2.6856846809387207, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.0061136744145652e-05, "perf/tokens_per_sec": 26678.182552832677, "train/loss_math": 2.2982043027877808, "train/loss_prose": 3.593132416407267, "train/loss_code": 1.5132622718811035} +{"step": 2458, "train/loss": 2.1668460071086884, "train/lm_loss": 2.1668460071086884, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.0044043615600175e-05, "perf/tokens_per_sec": 26797.863616865623, "train/loss_code": 1.7599794626235963, "train/loss_prose": 3.2039681673049927, "train/loss_math": 2.126934289932251} +{"step": 2459, "train/loss": 2.5339975357055664, "train/lm_loss": 2.5339975357055664, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.002694802864912e-05, "perf/tokens_per_sec": 26935.885181387443, "train/loss_math": 2.2004552125930785, "train/loss_prose": 3.541753053665161, "train/loss_code": 2.1861979961395264} +{"step": 2460, "train/loss": 2.95719450712204, "train/lm_loss": 2.95719450712204, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.0009849991624662e-05, "perf/tokens_per_sec": 27260.936088644736, "train/loss_math": 2.1456539630889893, "train/loss_code": 1.8660830855369568, "train/loss_prose": 3.908519983291626} +{"step": 2461, "train/loss": 2.426787316799164, "train/lm_loss": 2.426787316799164, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.9992749512860173e-05, "perf/tokens_per_sec": 27265.392342143605, "train/loss_code": 1.247823715209961, "train/loss_math": 2.213287854194641, "train/loss_prose": 3.5500177145004272} +{"step": 2462, "train/loss": 2.3414308726787567, "train/lm_loss": 2.3414308726787567, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.9975646600690234e-05, "perf/tokens_per_sec": 27336.021105223797, "train/loss_prose": 3.517075538635254, "train/loss_math": 2.29778790473938, "train/loss_code": 1.1948811610539753} +{"step": 2463, "train/loss": 2.4547050297260284, "train/lm_loss": 2.4547050297260284, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.9958541263450584e-05, "perf/tokens_per_sec": 26400.471437945358, "train/loss_code": 1.0319578647613525, "train/loss_math": 2.025228579839071, "train/loss_prose": 3.1324991583824158} +{"step": 2464, "train/loss": 2.2933433651924133, "train/lm_loss": 2.2933433651924133, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.9941433509478156e-05, "perf/tokens_per_sec": 26477.77989368736, "train/loss_prose": 3.9361515045166016, "train/loss_math": 2.287715291976929, "train/loss_code": 1.4860097765922546} +{"step": 2465, "train/loss": 2.455985337495804, "train/lm_loss": 2.455985337495804, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.9924323347111073e-05, "perf/tokens_per_sec": 26811.24642657049, "train/loss_prose": 3.5078700383504233, "train/loss_math": 2.0975295305252075, "train/loss_code": 1.643071134885152} +{"step": 2466, "train/loss": 3.0976293683052063, "train/lm_loss": 3.0976293683052063, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.99072107846886e-05, "perf/tokens_per_sec": 27063.606550765915, "train/loss_prose": 3.351624329884847, "train/loss_math": 2.335644483566284} +{"step": 2467, "train/loss": 2.081886947154999, "train/lm_loss": 2.081886947154999, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.9890095830551207e-05, "perf/tokens_per_sec": 26050.076549597114, "train/loss_math": 2.3495240211486816, "train/loss_code": 1.5075269937515259, "train/loss_prose": 2.962969660758972} +{"step": 2468, "train/loss": 2.345160275697708, "train/lm_loss": 2.345160275697708, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.9872978493040514e-05, "perf/tokens_per_sec": 26984.709360843914, "train/loss_prose": 3.3305324713389077, "train/loss_code": 1.639334003130595, "train/loss_math": 1.9258412718772888} +{"step": 2469, "train/loss": 2.585260510444641, "train/lm_loss": 2.585260510444641, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.98558587804993e-05, "perf/tokens_per_sec": 26635.167035396516, "train/loss_prose": 3.5512994130452475, "train/loss_math": 2.202500343322754, "train/loss_code": 1.7103426456451416} +{"step": 2470, "train/loss": 2.5370007753372192, "train/lm_loss": 2.5370007753372192, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.9838736701271514e-05, "perf/tokens_per_sec": 25679.655345908188, "train/loss_prose": 3.462253510951996, "train/loss_code": 1.139768362045288, "train/loss_math": 2.0837274193763733} +{"step": 2471, "train/loss": 2.4096235632896423, "train/lm_loss": 2.4096235632896423, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.9821612263702226e-05, "perf/tokens_per_sec": 26933.393718106498, "train/loss_math": 2.242484450340271, "train/loss_prose": 3.4148415327072144, "train/loss_code": 1.7386839389801025} +{"step": 2472, "train/loss": 2.4566673040390015, "train/lm_loss": 2.4566673040390015, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.9804485476137706e-05, "perf/tokens_per_sec": 26669.154332994403, "train/loss_prose": 3.6176836490631104, "train/loss_code": 1.4875874519348145, "train/loss_math": 2.16876220703125} +{"step": 2473, "train/loss": 2.9725719690322876, "train/lm_loss": 2.9725719690322876, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.9787356346925327e-05, "perf/tokens_per_sec": 26573.245864719804, "train/loss_math": 2.4191854000091553, "train/loss_prose": 3.5409242153167724, "train/loss_code": 1.2375825643539429} +{"step": 2474, "train/loss": 2.1027243733406067, "train/lm_loss": 2.1027243733406067, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.9770224884413623e-05, "perf/tokens_per_sec": 26504.127122043334, "train/loss_math": 2.2614450693130492, "train/loss_prose": 2.879268169403076, "train/loss_code": 1.3176510035991669} +{"step": 2475, "train/loss": 2.1422872245311737, "train/lm_loss": 2.1422872245311737, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.9753091096952255e-05, "perf/tokens_per_sec": 26955.41055379826, "train/loss_math": 2.2315096259117126, "train/loss_code": 0.8246030509471893, "train/loss_prose": 3.2815263271331787} +{"step": 2476, "train/loss": 2.617468774318695, "train/lm_loss": 2.617468774318695, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.9735954992892033e-05, "perf/tokens_per_sec": 27304.262364490838, "train/loss_math": 2.1804683208465576, "train/loss_prose": 3.448842167854309, "train/loss_code": 1.6546377340952556} +{"step": 2477, "train/loss": 2.379379540681839, "train/lm_loss": 2.379379540681839, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.9718816580584884e-05, "perf/tokens_per_sec": 27100.96239752272, "train/loss_math": 2.2301498651504517, "train/loss_code": 1.367800533771515, "train/loss_prose": 3.689418077468872} +{"step": 2478, "train/loss": 1.9661153852939606, "train/lm_loss": 1.9661153852939606, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.9701675868383848e-05, "perf/tokens_per_sec": 26963.829558420766, "train/loss_prose": 3.070797920227051, "train/loss_code": 1.464410275220871, "train/loss_math": 2.2668280601501465} +{"step": 2479, "train/loss": 2.0911696553230286, "train/lm_loss": 2.0911696553230286, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.9684532864643122e-05, "perf/tokens_per_sec": 26639.214500030237, "train/loss_math": 2.1452946186065676, "train/loss_code": 1.3679077923297882, "train/loss_prose": 3.267068386077881} +{"step": 2480, "train/loss": 2.257142663002014, "train/lm_loss": 2.257142663002014, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.9667387577717976e-05, "perf/tokens_per_sec": 27146.526283188778, "train/loss_math": 2.209767520427704, "train/loss_prose": 3.5314865112304688, "train/loss_code": 1.0775494873523712} +{"step": 2481, "train/loss": 1.9712951183319092, "train/lm_loss": 1.9712951183319092, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.9650240015964825e-05, "perf/tokens_per_sec": 26486.270639945855, "train/loss_math": 2.1548920472462973, "train/loss_code": 1.3858803808689117, "train/loss_prose": 3.7621636390686035} +{"step": 2482, "train/loss": 2.3591663539409637, "train/lm_loss": 2.3591663539409637, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.9633090187741185e-05, "perf/tokens_per_sec": 27092.32938089298, "train/loss_math": 2.257484197616577, "train/loss_code": 1.763675570487976, "train/loss_prose": 3.1870479583740234} +{"step": 2483, "train/loss": 2.068122088909149, "train/lm_loss": 2.068122088909149, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.9615938101405676e-05, "perf/tokens_per_sec": 26970.179018673614, "train/loss_math": 2.3450827598571777, "train/loss_prose": 3.3834073543548584, "train/loss_code": 1.5315803289413452} +{"step": 2484, "train/loss": 3.272912919521332, "train/lm_loss": 3.272912919521332, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.9598783765318007e-05, "perf/tokens_per_sec": 26665.139153352102, "train/loss_prose": 3.604288379351298, "train/loss_math": 2.3330233097076416, "train/loss_code": 2.224550485610962} +{"step": 2485, "train/loss": 2.5595353841781616, "train/lm_loss": 2.5595353841781616, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.9581627187838994e-05, "perf/tokens_per_sec": 26986.06575979391, "train/loss_code": 1.5450401306152344, "train/loss_prose": 3.3225436210632324, "train/loss_math": 2.4728573163350425} +{"step": 2486, "train/loss": 2.2035920321941376, "train/lm_loss": 2.2035920321941376, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.9564468377330556e-05, "perf/tokens_per_sec": 24842.950555210915, "train/loss_math": 2.1226399660110475, "train/loss_prose": 3.6450085639953613, "train/loss_code": 1.6852643489837646} +{"step": 2487, "train/loss": 2.2492282390594482, "train/lm_loss": 2.2492282390594482, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.9547307342155673e-05, "perf/tokens_per_sec": 25452.261363232312, "train/loss_math": 2.118358612060547, "train/loss_code": 1.7239465951919555, "train/loss_prose": 3.6278672218322754} +{"step": 2488, "train/loss": 2.0633776783943176, "train/lm_loss": 2.0633776783943176, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.9530144090678435e-05, "perf/tokens_per_sec": 24550.91235486199, "train/loss_code": 1.427933692932129, "train/loss_prose": 3.320644974708557, "train/loss_math": 2.0769985914230347} +{"step": 2489, "train/loss": 2.813540905714035, "train/lm_loss": 2.813540905714035, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.9512978631264006e-05, "perf/tokens_per_sec": 26707.462731301912, "train/loss_prose": 3.373208224773407, "train/loss_code": 2.2337809801101685, "train/loss_math": 2.2739660143852234} +{"step": 2490, "train/loss": 2.7689332962036133, "train/lm_loss": 2.7689332962036133, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.949581097227861e-05, "perf/tokens_per_sec": 26287.121177206842, "train/loss_prose": 3.552240014076233, "train/loss_math": 2.3959063291549683, "train/loss_code": 1.575346827507019} +{"step": 2491, "train/loss": 2.257176786661148, "train/lm_loss": 2.257176786661148, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.9478641122089562e-05, "perf/tokens_per_sec": 26195.653415753193, "train/loss_prose": 3.4658737977345786, "train/loss_math": 2.2434725761413574, "train/loss_code": 1.0576158165931702} +{"step": 2492, "train/loss": 2.2845741510391235, "train/lm_loss": 2.2845741510391235, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.9461469089065234e-05, "perf/tokens_per_sec": 26658.684286153206, "train/loss_prose": 3.0764967600504556, "train/loss_math": 2.1146042346954346, "train/loss_code": 1.3516452312469482} +{"step": 2493, "train/loss": 2.1541958451271057, "train/lm_loss": 2.1541958451271057, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.944429488157508e-05, "perf/tokens_per_sec": 26408.30374392051, "train/loss_prose": 3.5389044284820557, "train/loss_math": 2.3156774044036865, "train/loss_code": 1.3811007142066956} +{"step": 2494, "train/loss": 2.3166279792785645, "train/lm_loss": 2.3166279792785645, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.9427118507989586e-05, "perf/tokens_per_sec": 26255.745106429804, "train/loss_code": 1.5967341264088948, "train/loss_math": 2.2476534048716226, "train/loss_prose": 3.4999301433563232} +{"step": 2495, "train/loss": 2.502882957458496, "train/lm_loss": 2.502882957458496, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.9409939976680313e-05, "perf/tokens_per_sec": 26464.89090312513, "train/loss_code": 2.1141551733016968, "train/loss_math": 2.263995587825775, "train/loss_prose": 3.3693851232528687} +{"step": 2496, "train/loss": 2.109739512205124, "train/lm_loss": 2.109739512205124, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.9392759296019867e-05, "perf/tokens_per_sec": 26893.719194872934, "train/loss_prose": 3.2318044900894165, "train/loss_code": 1.6663660645484923, "train/loss_math": 2.0824766159057617} +{"step": 2497, "train/loss": 2.047543317079544, "train/lm_loss": 2.047543317079544, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.9375576474381905e-05, "perf/tokens_per_sec": 26848.705585604734, "train/loss_math": 2.2546317179997764, "train/loss_code": 1.5744324326515198, "train/loss_prose": 3.3187217712402344} +{"step": 2498, "train/loss": 2.398620843887329, "train/lm_loss": 2.398620843887329, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.9358391520141122e-05, "perf/tokens_per_sec": 25574.948245318916, "train/loss_math": 2.3516035079956055, "train/loss_prose": 3.5060999393463135, "train/loss_code": 1.3224867184956868} +{"step": 2499, "train/loss": 2.1482174694538116, "train/lm_loss": 2.1482174694538116, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.9341204441673266e-05, "perf/tokens_per_sec": 26194.49512546923, "train/loss_code": 1.266906976699829, "train/loss_prose": 3.459115505218506, "train/loss_math": 1.7407644987106323} +{"step": 2500, "train/loss": 2.9464136362075806, "train/lm_loss": 2.9464136362075806, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.9324015247355098e-05, "perf/tokens_per_sec": 26833.27348749307, "train/loss_prose": 3.982417941093445, "train/loss_math": 2.658023476600647, "train/loss_code": 1.1627944707870483} +{"step": 2501, "train/loss": 2.393630415201187, "train/lm_loss": 2.393630415201187, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.9306823945564422e-05, "perf/tokens_per_sec": 25609.75560832577, "train/loss_code": 1.463685393333435, "train/loss_math": 2.247769832611084, "train/loss_prose": 3.615296721458435} +{"step": 2502, "train/loss": 2.8419210612773895, "train/lm_loss": 2.8419210612773895, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.9289630544680075e-05, "perf/tokens_per_sec": 26578.137975312155, "train/loss_math": 2.542996883392334, "train/loss_code": 1.62813933690389, "train/loss_prose": 3.826988160610199} +{"step": 2503, "train/loss": 2.3629500567913055, "train/lm_loss": 2.3629500567913055, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.9272435053081922e-05, "perf/tokens_per_sec": 26949.19486549614, "train/loss_math": 2.142196923494339, "train/loss_prose": 3.891209602355957, "train/loss_code": 1.2761963605880737} +{"step": 2504, "train/loss": 2.0360836684703827, "train/lm_loss": 2.0360836684703827, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.9255237479150816e-05, "perf/tokens_per_sec": 27133.492403985718, "train/loss_prose": 2.2957049012184143, "train/loss_code": 1.1988213658332825, "train/loss_math": 2.3249044120311737} +{"step": 2505, "train/loss": 3.104726195335388, "train/lm_loss": 3.104726195335388, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.923803783126866e-05, "perf/tokens_per_sec": 27178.821847636074, "train/loss_prose": 3.3232131401697793, "train/loss_math": 2.4492645263671875} +{"step": 2506, "train/loss": 2.6071440875530243, "train/lm_loss": 2.6071440875530243, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.9220836117818344e-05, "perf/tokens_per_sec": 26463.137991374, "train/loss_prose": 3.559437910715739, "train/loss_math": 2.5986968278884888, "train/loss_code": 1.6604818105697632} +{"step": 2507, "train/loss": 2.5576159954071045, "train/lm_loss": 2.5576159954071045, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.920363234718379e-05, "perf/tokens_per_sec": 26865.12165082582, "train/loss_math": 2.2891743779182434, "train/loss_code": 0.920233964920044, "train/loss_prose": 4.7318809032440186} +{"step": 2508, "train/loss": 2.569243907928467, "train/lm_loss": 2.569243907928467, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.918642652774989e-05, "perf/tokens_per_sec": 26858.695582977274, "train/loss_prose": 3.2280097007751465, "train/loss_code": 1.7861523032188416, "train/loss_math": 2.4325390656789145} +{"step": 2509, "train/loss": 2.4521650671958923, "train/lm_loss": 2.4521650671958923, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.916921866790256e-05, "perf/tokens_per_sec": 25305.635163022263, "train/loss_code": 1.7847975492477417, "train/loss_math": 2.2609375715255737, "train/loss_prose": 3.088304360707601} +{"step": 2510, "train/loss": 2.9880049228668213, "train/lm_loss": 2.9880049228668213, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.91520087760287e-05, "perf/tokens_per_sec": 26943.489104898483, "train/loss_prose": 3.465789461135864, "train/loss_math": 2.3438178300857544, "train/loss_code": 1.887455701828003} +{"step": 2511, "train/loss": 2.8938143253326416, "train/lm_loss": 2.8938143253326416, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.9134796860516194e-05, "perf/tokens_per_sec": 26827.909958727178, "train/loss_prose": 3.3517451683680215, "train/loss_code": 0.9386548399925232, "train/loss_math": 2.1013882160186768} +{"step": 2512, "train/loss": 2.703085482120514, "train/lm_loss": 2.703085482120514, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.9117582929753932e-05, "perf/tokens_per_sec": 27077.939009551428, "train/loss_prose": 3.4768913984298706, "train/loss_math": 2.3251123825709024, "train/loss_code": 0.7417805790901184} +{"step": 2513, "train/loss": 2.1601021885871887, "train/lm_loss": 2.1601021885871887, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.910036699213178e-05, "perf/tokens_per_sec": 27051.631824172502, "train/loss_math": 2.0396650234858194, "train/loss_prose": 3.484524726867676, "train/loss_code": 1.9193242192268372} +{"step": 2514, "train/loss": 2.753262162208557, "train/lm_loss": 2.753262162208557, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.908314905604056e-05, "perf/tokens_per_sec": 27146.35470348149, "train/loss_math": 2.203263759613037, "train/loss_code": 2.0879600445429483, "train/loss_prose": 3.3897382616996765} +{"step": 2515, "train/loss": 2.2404991686344147, "train/lm_loss": 2.2404991686344147, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.9065929129872094e-05, "perf/tokens_per_sec": 26091.736668889087, "train/loss_code": 1.728402316570282, "train/loss_math": 2.230926990509033, "train/loss_prose": 3.274264931678772} +{"step": 2516, "train/loss": 2.1753512918949127, "train/lm_loss": 2.1753512918949127, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.9048707222019154e-05, "perf/tokens_per_sec": 26988.227802484886, "train/loss_code": 1.6020514488220214, "train/loss_prose": 3.5027111768722534, "train/loss_math": 2.3871302604675293} +{"step": 2517, "train/loss": 2.5961477756500244, "train/lm_loss": 2.5961477756500244, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.903148334087552e-05, "perf/tokens_per_sec": 27040.17839728274, "train/loss_math": 2.3123199939727783, "train/loss_prose": 3.7411365509033203, "train/loss_code": 1.6403775215148926} +{"step": 2518, "train/loss": 2.1325393319129944, "train/lm_loss": 2.1325393319129944, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.9014257494835862e-05, "perf/tokens_per_sec": 27116.361619481, "train/loss_prose": 3.762699604034424, "train/loss_code": 1.368192121386528, "train/loss_math": 2.0310733914375305} +{"step": 2519, "train/loss": 2.405677318572998, "train/lm_loss": 2.405677318572998, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.8997029692295874e-05, "perf/tokens_per_sec": 27141.122564113095, "train/loss_prose": 3.659313678741455, "train/loss_math": 2.331139326095581, "train/loss_code": 1.48408442735672} +{"step": 2520, "train/loss": 2.7200335264205933, "train/lm_loss": 2.7200335264205933, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.897979994165217e-05, "perf/tokens_per_sec": 27106.906134917055, "train/loss_prose": 3.636591613292694, "train/loss_math": 1.9582011699676514, "train/loss_code": 1.648749589920044} +{"step": 2521, "train/loss": 2.2680676579475403, "train/lm_loss": 2.2680676579475403, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.8962568251302324e-05, "perf/tokens_per_sec": 26773.307501102576, "train/loss_math": 2.3701404333114624, "train/loss_prose": 3.2671791315078735, "train/loss_code": 1.7174755930900574} +{"step": 2522, "train/loss": 2.078418344259262, "train/lm_loss": 2.078418344259262, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.894533462964485e-05, "perf/tokens_per_sec": 26738.971154998497, "train/loss_code": 1.516302466392517, "train/loss_prose": 3.5471794605255127, "train/loss_math": 1.9514752626419067} +{"step": 2523, "train/loss": 2.6200581789016724, "train/lm_loss": 2.6200581789016724, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.8928099085079197e-05, "perf/tokens_per_sec": 26981.82746576203, "train/loss_math": 2.111998716990153, "train/loss_prose": 3.685974915822347, "train/loss_code": 1.7832720875740051} +{"step": 2524, "train/loss": 2.2788114845752716, "train/lm_loss": 2.2788114845752716, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.8910861626005776e-05, "perf/tokens_per_sec": 27088.655607220964, "train/loss_code": 1.1380999485651653, "train/loss_prose": 3.082299768924713, "train/loss_math": 2.4869930744171143} +{"step": 2525, "train/loss": 2.156267464160919, "train/lm_loss": 2.156267464160919, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.8893622260825904e-05, "perf/tokens_per_sec": 26990.687017192162, "train/loss_code": 1.8147209405899047, "train/loss_math": 2.3573856353759766, "train/loss_prose": 3.4617631435394287} +{"step": 2526, "train/loss": 1.8738387525081635, "train/lm_loss": 1.8738387525081635, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.8876380997941847e-05, "perf/tokens_per_sec": 27033.32801054906, "train/loss_prose": 2.684675693511963, "train/loss_code": 1.5026889145374298, "train/loss_math": 2.0984262228012085} +{"step": 2527, "train/loss": 2.360283762216568, "train/lm_loss": 2.360283762216568, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.8859137845756784e-05, "perf/tokens_per_sec": 26713.56739862606, "train/loss_prose": 3.962146520614624, "train/loss_code": 1.7567478120326996, "train/loss_math": 1.9654931426048279} +{"step": 2528, "train/loss": 2.340507209300995, "train/lm_loss": 2.340507209300995, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.8841892812674808e-05, "perf/tokens_per_sec": 27003.881144294246, "train/loss_math": 2.2280458211898804, "train/loss_prose": 4.348246097564697, "train/loss_code": 1.0075360536575317} +{"step": 2529, "train/loss": 2.074373811483383, "train/lm_loss": 2.074373811483383, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.8824645907100954e-05, "perf/tokens_per_sec": 27024.823045837154, "train/loss_prose": 3.034420609474182, "train/loss_math": 2.0623645186424255, "train/loss_code": 1.1383448541164398} +{"step": 2530, "train/loss": 1.7327297031879425, "train/lm_loss": 1.7327297031879425, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.8807397137441145e-05, "perf/tokens_per_sec": 27045.840031611442, "train/loss_code": 1.2702598810195922, "train/loss_math": 2.2534059286117554, "train/loss_prose": 3.0037262439727783} +{"step": 2531, "train/loss": 2.3098406195640564, "train/lm_loss": 2.3098406195640564, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.879014651210223e-05, "perf/tokens_per_sec": 26893.045612079735, "train/loss_code": 1.6393039524555206, "train/loss_math": 2.2893307209014893, "train/loss_prose": 3.210726022720337} +{"step": 2532, "train/loss": 2.553819477558136, "train/lm_loss": 2.553819477558136, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.877289403949194e-05, "perf/tokens_per_sec": 27007.44704424979, "train/loss_math": 2.1726613640785217, "train/loss_prose": 3.4859684705734253, "train/loss_code": 2.278324007987976} +{"step": 2533, "train/loss": 2.637192189693451, "train/lm_loss": 2.637192189693451, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.875563972801893e-05, "perf/tokens_per_sec": 26329.745304518543, "train/loss_prose": 3.2847288846969604, "train/loss_math": 2.1639357805252075, "train/loss_code": 1.8153750896453857} +{"step": 2534, "train/loss": 3.1303765773773193, "train/lm_loss": 3.1303765773773193, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.8738383586092745e-05, "perf/tokens_per_sec": 26092.846259110924, "train/loss_prose": 3.719194936752319, "train/loss_code": 1.8300812244415283, "train/loss_math": 2.3084786534309387} +{"step": 2535, "train/loss": 2.723852038383484, "train/lm_loss": 2.723852038383484, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.8721125622123806e-05, "perf/tokens_per_sec": 25640.677324991342, "train/loss_prose": 3.4841625690460205, "train/loss_code": 1.898376226425171, "train/loss_math": 2.5138588746388755} +{"step": 2536, "train/loss": 2.3000765442848206, "train/lm_loss": 2.3000765442848206, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.8703865844523452e-05, "perf/tokens_per_sec": 26855.84053949699, "train/loss_prose": 3.160709857940674, "train/loss_math": 2.072046677271525, "train/loss_code": 1.954350233078003} +{"step": 2537, "train/loss": 3.1542317271232605, "train/lm_loss": 3.1542317271232605, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.8686604261703875e-05, "perf/tokens_per_sec": 26784.11833743881, "train/loss_prose": 3.7932172298431395, "train/loss_code": 1.7468990087509155, "train/loss_math": 2.2604339122772217} +{"step": 2538, "train/loss": 2.6068088114261627, "train/lm_loss": 2.6068088114261627, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.8669340882078166e-05, "perf/tokens_per_sec": 27030.818489346504, "train/loss_prose": 3.2977656722068787, "train/loss_code": 1.7976537148157756, "train/loss_math": 2.270447015762329} +{"step": 2539, "train/loss": 2.472863107919693, "train/lm_loss": 2.472863107919693, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.8652075714060295e-05, "perf/tokens_per_sec": 25847.643955772808, "train/loss_prose": 3.2472920417785645, "train/loss_math": 2.073182463645935, "train/loss_code": 1.7482982873916626} +{"step": 2540, "train/loss": 2.54561784863472, "train/lm_loss": 2.54561784863472, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.863480876606508e-05, "perf/tokens_per_sec": 26177.212827846182, "train/loss_prose": 3.265830159187317, "train/loss_math": 2.1088773012161255, "train/loss_code": 1.541933298110962} +{"step": 2541, "train/loss": 2.511595755815506, "train/lm_loss": 2.511595755815506, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.861754004650823e-05, "perf/tokens_per_sec": 26911.707910191282, "train/loss_code": 1.6537237962086995, "train/loss_math": 2.212380528450012, "train/loss_prose": 3.5689446131388345} +{"step": 2542, "train/loss": 2.1109014749526978, "train/lm_loss": 2.1109014749526978, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.8600269563806302e-05, "perf/tokens_per_sec": 27083.659165791723, "train/loss_math": 2.5002846717834473, "train/loss_code": 1.2277424782514572, "train/loss_prose": 3.4878368377685547} +{"step": 2543, "train/loss": 2.79168838262558, "train/lm_loss": 2.79168838262558, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.858299732637674e-05, "perf/tokens_per_sec": 26776.771027250794, "train/loss_prose": 3.5422874093055725, "train/loss_math": 2.112230062484741, "train/loss_code": 2.0173755089441934} +{"step": 2544, "train/loss": 2.361052930355072, "train/lm_loss": 2.361052930355072, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.8565723342637796e-05, "perf/tokens_per_sec": 26962.517709392694, "train/loss_prose": 3.323977073033651, "train/loss_math": 2.2194883823394775, "train/loss_code": 1.492505153020223} +{"step": 2545, "train/loss": 3.2583317160606384, "train/lm_loss": 3.2583317160606384, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.854844762100861e-05, "perf/tokens_per_sec": 27069.23534972836, "train/loss_math": 2.3480184078216553, "train/loss_prose": 3.561769445737203} +{"step": 2546, "train/loss": 1.781944826245308, "train/lm_loss": 1.781944826245308, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.853117016990917e-05, "perf/tokens_per_sec": 27056.573843117923, "train/loss_math": 2.1396560668945312, "train/loss_code": 0.9163773059844971, "train/loss_prose": 3.15536892414093} +{"step": 2547, "train/loss": 2.5207506120204926, "train/lm_loss": 2.5207506120204926, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.8513890997760272e-05, "perf/tokens_per_sec": 27119.4435641345, "train/loss_prose": 3.523600459098816, "train/loss_code": 1.5179005563259125} +{"step": 2548, "train/loss": 2.6877450942993164, "train/lm_loss": 2.6877450942993164, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.849661011298361e-05, "perf/tokens_per_sec": 26390.049099997235, "train/loss_prose": 3.5265629291534424, "train/loss_math": 2.2068976759910583, "train/loss_code": 2.0946807861328125} +{"step": 2549, "train/loss": 2.268261581659317, "train/lm_loss": 2.268261581659317, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.8479327524001636e-05, "perf/tokens_per_sec": 25930.821210305392, "train/loss_code": 1.6110356748104095, "train/loss_math": 2.1773569583892822, "train/loss_prose": 3.673617959022522} +{"step": 2550, "train/loss": 2.40737321972847, "train/lm_loss": 2.40737321972847, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.8462043239237707e-05, "perf/tokens_per_sec": 27027.75665394454, "train/loss_code": 1.1064778168996174, "train/loss_math": 2.5125834941864014, "train/loss_prose": 3.3567423224449158} +{"step": 2551, "train/loss": 2.70520281791687, "train/lm_loss": 2.70520281791687, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.844475726711595e-05, "perf/tokens_per_sec": 26773.51612189367, "train/loss_math": 2.1481362183888755, "train/loss_prose": 3.38675993680954, "train/loss_code": 1.6501739025115967} +{"step": 2552, "train/loss": 2.2740717828273773, "train/lm_loss": 2.2740717828273773, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.8427469616061364e-05, "perf/tokens_per_sec": 27196.978839069598, "train/loss_prose": 3.002148985862732, "train/loss_code": 1.8146419525146484, "train/loss_math": 2.2481170097986856} +{"step": 2553, "train/loss": 2.008341759443283, "train/lm_loss": 2.008341759443283, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.841018029449971e-05, "perf/tokens_per_sec": 27163.738163603477, "train/loss_code": 1.3441429138183594, "train/loss_math": 1.85746963818868, "train/loss_prose": 3.2309484481811523} +{"step": 2554, "train/loss": 2.4382729530334473, "train/lm_loss": 2.4382729530334473, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.8392889310857612e-05, "perf/tokens_per_sec": 26672.549551857388, "train/loss_code": 0.995132327079773, "train/loss_prose": 3.441197633743286, "train/loss_math": 1.8755638599395752} +{"step": 2555, "train/loss": 2.228199988603592, "train/lm_loss": 2.228199988603592, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.8375596673562482e-05, "perf/tokens_per_sec": 26811.957762261336, "train/loss_code": 1.8918854951858521, "train/loss_prose": 3.9782984256744385, "train/loss_math": 2.193936586380005} +{"step": 2556, "train/loss": 2.0433207154273987, "train/lm_loss": 2.0433207154273987, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.8358302391042536e-05, "perf/tokens_per_sec": 26816.268426959454, "train/loss_math": 2.06955087184906, "train/loss_code": 1.5293174982070923, "train/loss_prose": 3.48041033744812} +{"step": 2557, "train/loss": 2.4761409163475037, "train/lm_loss": 2.4761409163475037, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.8341006471726816e-05, "perf/tokens_per_sec": 26827.072101037957, "train/loss_math": 2.251552402973175, "train/loss_code": 0.8175898194313049, "train/loss_prose": 3.3284428914388022} +{"step": 2558, "train/loss": 2.345000386238098, "train/lm_loss": 2.345000386238098, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.832370892404511e-05, "perf/tokens_per_sec": 27409.509136306708, "train/loss_math": 2.155505418777466, "train/loss_prose": 3.0950841108957925, "train/loss_code": 0.8527299165725708} +{"step": 2559, "train/loss": 2.6269535422325134, "train/lm_loss": 2.6269535422325134, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.8306409756428064e-05, "perf/tokens_per_sec": 26370.28145501208, "train/loss_math": 2.1748279094696046, "train/loss_prose": 3.3804964224497476} +{"step": 2560, "train/loss": 2.264217495918274, "train/lm_loss": 2.264217495918274, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.8289108977307067e-05, "perf/tokens_per_sec": 27092.03031555044, "train/loss_prose": 4.177879095077515, "train/loss_code": 1.164663831392924, "train/loss_math": 2.0879966417948403} +{"step": 2561, "train/loss": 2.2436842918395996, "train/lm_loss": 2.2436842918395996, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.827180659511431e-05, "perf/tokens_per_sec": 26940.57376018316, "train/loss_prose": 3.6302714347839355, "train/loss_code": 1.7016849994659424, "train/loss_math": 2.1805059909820557} +{"step": 2562, "train/loss": 2.670701265335083, "train/lm_loss": 2.670701265335083, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.8254502618282763e-05, "perf/tokens_per_sec": 26064.06709348545, "train/loss_math": 2.3629722197850547, "train/loss_code": 2.001472532749176, "train/loss_prose": 3.4245827198028564} +{"step": 2563, "train/loss": 2.3464761674404144, "train/lm_loss": 2.3464761674404144, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.8237197055246172e-05, "perf/tokens_per_sec": 26977.16687970102, "train/loss_prose": 3.5486797094345093, "train/loss_math": 2.2578777074813843, "train/loss_code": 1.321469008922577} +{"step": 2564, "train/loss": 2.5579534769058228, "train/lm_loss": 2.5579534769058228, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.8219889914439074e-05, "perf/tokens_per_sec": 27074.482474733704, "train/loss_prose": 3.3955132961273193, "train/loss_math": 2.1450099050998688, "train/loss_code": 1.6970481872558594} +{"step": 2565, "train/loss": 2.618808925151825, "train/lm_loss": 2.618808925151825, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.8202581204296742e-05, "perf/tokens_per_sec": 26335.5170170431, "train/loss_code": 1.6456786195437114, "train/loss_prose": 3.3232625126838684, "train/loss_math": 2.720386266708374} +{"step": 2566, "train/loss": 2.450974017381668, "train/lm_loss": 2.450974017381668, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.8185270933255237e-05, "perf/tokens_per_sec": 26123.516185146887, "train/loss_math": 2.2004230499267576, "train/loss_prose": 3.646396040916443, "train/loss_code": 1.3128845691680908} +{"step": 2567, "train/loss": 2.369792103767395, "train/lm_loss": 2.369792103767395, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.816795910975137e-05, "perf/tokens_per_sec": 26666.58779008672, "train/loss_code": 1.641343355178833, "train/loss_math": 2.5086817741394043, "train/loss_prose": 3.2541303634643555} +{"step": 2568, "train/loss": 2.6346142888069153, "train/lm_loss": 2.6346142888069153, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.8150645742222714e-05, "perf/tokens_per_sec": 26830.92667979591, "train/loss_math": 2.2222470343112946, "train/loss_prose": 3.6503361066182456, "train/loss_code": 1.2369171380996704} +{"step": 2569, "train/loss": 2.120803415775299, "train/lm_loss": 2.120803415775299, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.8133330839107608e-05, "perf/tokens_per_sec": 26586.36406882795, "train/loss_math": 2.2736976146698, "train/loss_prose": 3.336221694946289, "train/loss_code": 1.8165621042251587} +{"step": 2570, "train/loss": 3.108753263950348, "train/lm_loss": 3.108753263950348, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.8116014408845116e-05, "perf/tokens_per_sec": 26467.948863163667, "train/loss_prose": 3.8123058795928957, "train/loss_code": 1.7586765885353088, "train/loss_math": 2.291142225265503} +{"step": 2571, "train/loss": 2.1873329281806946, "train/lm_loss": 2.1873329281806946, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.8098696459875046e-05, "perf/tokens_per_sec": 26966.834596922807, "train/loss_code": 1.8671557426452636, "train/loss_math": 2.0749496817588806, "train/loss_prose": 4.0129852294921875} +{"step": 2572, "train/loss": 2.506619930267334, "train/lm_loss": 2.506619930267334, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.808137700063797e-05, "perf/tokens_per_sec": 26974.24424280773, "train/loss_math": 2.3160195350646973, "train/loss_prose": 3.3715922832489014, "train/loss_code": 1.7296762466430664} +{"step": 2573, "train/loss": 2.6773563027381897, "train/lm_loss": 2.6773563027381897, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.806405603957517e-05, "perf/tokens_per_sec": 27029.37253618628, "train/loss_prose": 3.598588764667511, "train/loss_math": 2.318346858024597, "train/loss_code": 1.1939004361629486} +{"step": 2574, "train/loss": 2.3482432067394257, "train/lm_loss": 2.3482432067394257, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.8046733585128687e-05, "perf/tokens_per_sec": 26980.768100259917, "train/loss_code": 1.529997189839681, "train/loss_prose": 3.27878737449646, "train/loss_math": 2.179795503616333} +{"step": 2575, "train/loss": 2.5073626041412354, "train/lm_loss": 2.5073626041412354, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.8029409645741267e-05, "perf/tokens_per_sec": 27304.696322738248, "train/loss_math": 2.1789066791534424, "train/loss_prose": 3.492730140686035} +{"step": 2576, "train/loss": 2.5380173921585083, "train/lm_loss": 2.5380173921585083, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.8012084229856382e-05, "perf/tokens_per_sec": 27318.50339257688, "train/loss_prose": 3.880502382914225, "train/loss_code": 1.391535798708598, "train/loss_math": 2.2440123558044434} +{"step": 2577, "train/loss": 2.4859135150909424, "train/lm_loss": 2.4859135150909424, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.7994757345918244e-05, "perf/tokens_per_sec": 24970.159391642053, "train/loss_prose": 3.4226292967796326, "train/loss_code": 1.3910301327705383, "train/loss_math": 1.707365334033966} +{"step": 2578, "train/loss": 2.126632869243622, "train/lm_loss": 2.126632869243622, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.7977429002371747e-05, "perf/tokens_per_sec": 26548.443062582483, "train/loss_prose": 3.3619484901428223, "train/loss_math": 2.23787522315979, "train/loss_code": 1.1918466488520305} +{"step": 2579, "train/loss": 1.845592200756073, "train/lm_loss": 1.845592200756073, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.7960099207662532e-05, "perf/tokens_per_sec": 24842.160249059736, "train/loss_math": 1.9401498238245647, "train/loss_code": 1.3158310651779175, "train/loss_prose": 3.6809635162353516} +{"step": 2580, "train/loss": 1.8273553550243378, "train/lm_loss": 1.8273553550243378, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.794276797023691e-05, "perf/tokens_per_sec": 23195.570913774736, "train/loss_code": 1.5779550472895305, "train/loss_math": 2.5755558013916016} +{"step": 2581, "train/loss": 2.1169557869434357, "train/lm_loss": 2.1169557869434357, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.792543529854194e-05, "perf/tokens_per_sec": 24382.200551513113, "train/loss_prose": 3.2986011505126953, "train/loss_math": 2.4166719913482666, "train/loss_code": 1.5843544244766234} +{"step": 2582, "train/loss": 2.8093461394309998, "train/lm_loss": 2.8093461394309998, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.7908101201025337e-05, "perf/tokens_per_sec": 25731.155580318362, "train/loss_code": 1.5521785616874695, "train/loss_math": 2.5733530521392822, "train/loss_prose": 3.5559263229370117} +{"step": 2583, "train/loss": 2.2202257812023163, "train/lm_loss": 2.2202257812023163, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.7890765686135544e-05, "perf/tokens_per_sec": 25829.806416305077, "train/loss_code": 1.4428196946779888, "train/loss_prose": 3.3251644372940063, "train/loss_math": 2.2610061168670654} +{"step": 2584, "train/loss": 1.9219218790531158, "train/lm_loss": 1.9219218790531158, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.787342876232167e-05, "perf/tokens_per_sec": 26332.731748963466, "train/loss_code": 1.249178722500801, "train/loss_math": 2.402729590733846, "train/loss_prose": 3.1704702377319336} +{"step": 2585, "train/loss": 2.847792327404022, "train/lm_loss": 2.847792327404022, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.7856090438033522e-05, "perf/tokens_per_sec": 26270.680571781588, "train/loss_math": 2.214479605356852, "train/loss_prose": 3.2277798652648926} +{"step": 2586, "train/loss": 2.357082337141037, "train/lm_loss": 2.357082337141037, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.783875072172159e-05, "perf/tokens_per_sec": 26334.063760189583, "train/loss_math": 2.443425416946411, "train/loss_prose": 3.6372292041778564, "train/loss_code": 1.4173078934351604} +{"step": 2587, "train/loss": 2.0316096246242523, "train/lm_loss": 2.0316096246242523, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.782140962183704e-05, "perf/tokens_per_sec": 26350.17965805857, "train/loss_code": 1.691561758518219, "train/loss_prose": 3.0469181537628174, "train/loss_math": 2.1465702851613364} +{"step": 2588, "train/loss": 2.625572144985199, "train/lm_loss": 2.625572144985199, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.7804067146831725e-05, "perf/tokens_per_sec": 26370.32193220105, "train/loss_prose": 3.6399661898612976, "train/loss_math": 2.236692190170288, "train/loss_code": 0.9856638610363007} +{"step": 2589, "train/loss": 2.9944689869880676, "train/lm_loss": 2.9944689869880676, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.7786723305158136e-05, "perf/tokens_per_sec": 25845.738555877168, "train/loss_math": 2.228135824203491, "train/loss_prose": 3.2499134143193564} +{"step": 2590, "train/loss": 2.3657249808311462, "train/lm_loss": 2.3657249808311462, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.7769378105269467e-05, "perf/tokens_per_sec": 25780.92824653838, "train/loss_code": 1.1488732695579529, "train/loss_math": 2.065288027127584, "train/loss_prose": 3.477396329243978} +{"step": 2591, "train/loss": 2.1128135919570923, "train/lm_loss": 2.1128135919570923, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.7752031555619555e-05, "perf/tokens_per_sec": 25752.137809462707, "train/loss_prose": 2.581530213356018, "train/loss_code": 1.2955050766468048, "train/loss_math": 2.2871095538139343} +{"step": 2592, "train/loss": 2.3889226019382477, "train/lm_loss": 2.3889226019382477, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.7734683664662892e-05, "perf/tokens_per_sec": 24242.68436010397, "train/loss_code": 1.3141446113586426, "train/loss_math": 2.1561835209528604, "train/loss_prose": 3.3381799856821694} +{"step": 2593, "train/loss": 2.0422690212726593, "train/lm_loss": 2.0422690212726593, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.771733444085463e-05, "perf/tokens_per_sec": 25819.55806439008, "train/loss_code": 1.241630956530571, "train/loss_prose": 3.4619890451431274, "train/loss_math": 2.2238247394561768} +{"step": 2594, "train/loss": 2.0044748187065125, "train/lm_loss": 2.0044748187065125, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.7699983892650573e-05, "perf/tokens_per_sec": 24798.376955521362, "train/loss_prose": 2.9539542198181152, "train/loss_math": 2.1388371706008913, "train/loss_code": 1.1938289999961853} +{"step": 2595, "train/loss": 2.0108191668987274, "train/lm_loss": 2.0108191668987274, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.7682632028507167e-05, "perf/tokens_per_sec": 25396.50061348242, "train/loss_code": 1.1348987221717834, "train/loss_math": 2.243262380361557, "train/loss_prose": 3.7088074684143066} +{"step": 2596, "train/loss": 2.460859566926956, "train/lm_loss": 2.460859566926956, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.76652788568815e-05, "perf/tokens_per_sec": 25380.96735610448, "train/loss_math": 2.1540720224380494, "train/loss_code": 1.091172456741333, "train/loss_prose": 3.912671685218811} +{"step": 2597, "train/loss": 2.461345911026001, "train/lm_loss": 2.461345911026001, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.76479243862313e-05, "perf/tokens_per_sec": 25680.960493173865, "train/loss_math": 2.2325032552083335, "train/loss_prose": 3.462160031000773, "train/loss_code": 1.3033890128135681} +{"step": 2598, "train/loss": 2.74157577753067, "train/lm_loss": 2.74157577753067, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.7630568625014917e-05, "perf/tokens_per_sec": 23827.339492243573, "train/loss_prose": 3.5667904019355774, "train/loss_math": 2.121166467666626, "train/loss_code": 1.7115559577941895} +{"step": 2599, "train/loss": 2.3060954213142395, "train/lm_loss": 2.3060954213142395, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.761321158169134e-05, "perf/tokens_per_sec": 24741.62829956896, "train/loss_math": 2.1948471069335938, "train/loss_code": 1.9545903603235881, "train/loss_prose": 3.80560302734375} +{"step": 2600, "train/loss": 2.7013838291168213, "train/lm_loss": 2.7013838291168213, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.7595853264720184e-05, "perf/tokens_per_sec": 22089.98740423956, "train/loss_math": 2.1528841654459634, "train/loss_code": 1.09181809425354, "train/loss_prose": 3.51514995098114} +{"step": 2600, "eval/loss": 2.187772890595653, "eval/lm_loss": 2.187772890595653, "eval/lb_loss": 0.0, "eval/z_loss": 0.0, "eval/perplexity": 8.915335560501186, "eval/loss_code": 1.57507603081394, "eval/ppl_code": 4.831108917290289, "eval/loss_prose": 3.4882524128545795, "eval/ppl_prose": 32.72870144145774, "eval/loss_math": 2.0907902103109457, "eval/ppl_math": 8.09130647253727} +{"step": 2601, "train/loss": 2.530645579099655, "train/lm_loss": 2.530645579099655, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.7578493682561685e-05, "perf/tokens_per_sec": 26009.376139622058, "train/loss_prose": 3.417851209640503, "train/loss_math": 2.3233695030212402, "train/loss_code": 1.416796326637268} +{"step": 2602, "train/loss": 1.821273922920227, "train/lm_loss": 1.821273922920227, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.756113284367669e-05, "perf/tokens_per_sec": 25952.208876085566, "train/loss_code": 1.3271381497383117, "train/loss_math": 2.163710355758667, "train/loss_prose": 2.8853954076766968} +{"step": 2603, "train/loss": 2.2810858488082886, "train/lm_loss": 2.2810858488082886, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.754377075652666e-05, "perf/tokens_per_sec": 25789.51986393598, "train/loss_math": 2.160199681917826, "train/loss_code": 1.461198329925537, "train/loss_prose": 3.6922463178634644} +{"step": 2604, "train/loss": 2.6412203907966614, "train/lm_loss": 2.6412203907966614, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.7526407429573657e-05, "perf/tokens_per_sec": 25797.303701746063, "train/loss_code": 1.1211784482002258, "train/loss_prose": 3.497274100780487, "train/loss_math": 2.449154853820801} +{"step": 2605, "train/loss": 2.488194227218628, "train/lm_loss": 2.488194227218628, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.7509042871280372e-05, "perf/tokens_per_sec": 26249.1259402316, "train/loss_code": 1.5185606479644775, "train/loss_math": 2.239396810531616, "train/loss_prose": 3.383413791656494} +{"step": 2606, "train/loss": 2.2404774725437164, "train/lm_loss": 2.2404774725437164, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.7491677090110076e-05, "perf/tokens_per_sec": 25695.133717417186, "train/loss_math": 2.357218861579895, "train/loss_prose": 3.852247714996338, "train/loss_code": 1.3762215673923492} +{"step": 2607, "train/loss": 1.9993202090263367, "train/lm_loss": 1.9993202090263367, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.747431009452663e-05, "perf/tokens_per_sec": 25822.00295497348, "train/loss_code": 1.4412367343902588, "train/loss_math": 2.5574036836624146} +{"step": 2608, "train/loss": 2.2273957431316376, "train/lm_loss": 2.2273957431316376, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.7456941892994497e-05, "perf/tokens_per_sec": 25216.083792011224, "train/loss_math": 2.0209823846817017, "train/loss_code": 1.0197734832763672, "train/loss_prose": 3.3472400903701782} +{"step": 2609, "train/loss": 2.482811152935028, "train/lm_loss": 2.482811152935028, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.7439572493978736e-05, "perf/tokens_per_sec": 26347.148848722045, "train/loss_math": 2.30946946144104, "train/loss_prose": 3.5392181873321533, "train/loss_code": 1.773087739944458} +{"step": 2610, "train/loss": 2.582140266895294, "train/lm_loss": 2.582140266895294, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.7422201905944982e-05, "perf/tokens_per_sec": 25124.885648725685, "train/loss_prose": 3.652078151702881, "train/loss_code": 1.9331019222736359, "train/loss_math": 1.9684797525405884} +{"step": 2611, "train/loss": 2.7285228967666626, "train/lm_loss": 2.7285228967666626, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.7404830137359444e-05, "perf/tokens_per_sec": 25676.54655889808, "train/loss_prose": 3.055289649963379, "train/loss_math": 2.2271273136138916, "train/loss_code": 2.097480535507202} +{"step": 2612, "train/loss": 2.211880385875702, "train/lm_loss": 2.211880385875702, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.7387457196688908e-05, "perf/tokens_per_sec": 25646.533711714645, "train/loss_math": 2.4333948294321694, "train/loss_code": 1.0240859786669414, "train/loss_prose": 3.6613004207611084} +{"step": 2613, "train/loss": 1.4910453855991364, "train/lm_loss": 1.4910453855991364, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.7370083092400735e-05, "perf/tokens_per_sec": 25627.940312400708, "train/loss_code": 1.3962453774043493, "train/loss_math": 2.1546456813812256} +{"step": 2614, "train/loss": 2.2883538901805878, "train/lm_loss": 2.2883538901805878, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.7352707832962865e-05, "perf/tokens_per_sec": 25521.151199859767, "train/loss_prose": 3.2543081442515054, "train/loss_math": 2.0876982609430947, "train/loss_code": 1.1404059529304504} +{"step": 2615, "train/loss": 2.511388123035431, "train/lm_loss": 2.511388123035431, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.733533142684377e-05, "perf/tokens_per_sec": 25415.699542129285, "train/loss_prose": 3.6752819220225015, "train/loss_code": 1.539381782213847, "train/loss_math": 2.2235569953918457} +{"step": 2616, "train/loss": 2.1779166162014008, "train/lm_loss": 2.1779166162014008, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.7317953882512504e-05, "perf/tokens_per_sec": 25884.4069879391, "train/loss_math": 2.311994433403015, "train/loss_code": 1.4972925782203674, "train/loss_prose": 2.8687753677368164} +{"step": 2617, "train/loss": 2.4436800479888916, "train/lm_loss": 2.4436800479888916, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.7300575208438683e-05, "perf/tokens_per_sec": 25649.06082217959, "train/loss_math": 2.2684380531311037, "train/loss_code": 1.6849263906478882, "train/loss_prose": 3.261161684989929} +{"step": 2618, "train/loss": 2.1879672408103943, "train/lm_loss": 2.1879672408103943, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.7283195413092445e-05, "perf/tokens_per_sec": 25677.08382630322, "train/loss_code": 1.2100506226221721, "train/loss_math": 2.0597382386525473, "train/loss_prose": 3.847185969352722} +{"step": 2619, "train/loss": 2.8936687707901, "train/lm_loss": 2.8936687707901, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.726581450494451e-05, "perf/tokens_per_sec": 26278.757614090904, "train/loss_prose": 3.731114053726196, "train/loss_code": 1.23053777217865, "train/loss_math": 2.032703399658203} +{"step": 2620, "train/loss": 2.44903227686882, "train/lm_loss": 2.44903227686882, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.7248432492466096e-05, "perf/tokens_per_sec": 26105.37532309067, "train/loss_code": 1.2395477294921875, "train/loss_math": 2.135254144668579, "train/loss_prose": 3.210663676261902} +{"step": 2621, "train/loss": 2.5410832464694977, "train/lm_loss": 2.5410832464694977, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.7231049384129016e-05, "perf/tokens_per_sec": 26279.722368986633, "train/loss_prose": 3.625364065170288, "train/loss_code": 1.502300500869751, "train/loss_math": 2.1493240197499595} +{"step": 2622, "train/loss": 2.1972488164901733, "train/lm_loss": 2.1972488164901733, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.7213665188405556e-05, "perf/tokens_per_sec": 26121.053583537832, "train/loss_math": 2.315159479777018, "train/loss_prose": 3.550417900085449, "train/loss_code": 1.1772252321243286} +{"step": 2623, "train/loss": 2.398752987384796, "train/lm_loss": 2.398752987384796, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.7196279913768584e-05, "perf/tokens_per_sec": 25620.71682479651, "train/loss_math": 2.1576021671295167, "train/loss_prose": 3.517642855644226, "train/loss_code": 1.3667268753051758} +{"step": 2624, "train/loss": 2.3175952434539795, "train/lm_loss": 2.3175952434539795, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.717889356869146e-05, "perf/tokens_per_sec": 26789.63146667332, "train/loss_prose": 3.473204016685486, "train/loss_math": 2.207259774208069, "train/loss_code": 1.657525102297465} +{"step": 2625, "train/loss": 2.5203338265419006, "train/lm_loss": 2.5203338265419006, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.7161506161648076e-05, "perf/tokens_per_sec": 26736.9736939169, "train/loss_code": 1.0624296267827351, "train/loss_prose": 3.594456911087036, "train/loss_math": 2.5975539684295654} +{"step": 2626, "train/loss": 2.2915931046009064, "train/lm_loss": 2.2915931046009064, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.7144117701112846e-05, "perf/tokens_per_sec": 26468.968340222815, "train/loss_math": 2.294048309326172, "train/loss_prose": 3.5009504159291587, "train/loss_code": 1.3839612901210785} +{"step": 2627, "train/loss": 2.137420505285263, "train/lm_loss": 2.137420505285263, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.7126728195560702e-05, "perf/tokens_per_sec": 26324.98453740153, "train/loss_code": 1.2053266167640686, "train/loss_math": 2.439900517463684, "train/loss_prose": 3.6991279125213623} +{"step": 2628, "train/loss": 1.9372382462024689, "train/lm_loss": 1.9372382462024689, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.710933765346707e-05, "perf/tokens_per_sec": 26646.4038465416, "train/loss_code": 1.2542810837427776, "train/loss_math": 2.347012424468994} +{"step": 2629, "train/loss": 2.2603933811187744, "train/lm_loss": 2.2603933811187744, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.7091946083307896e-05, "perf/tokens_per_sec": 25787.23594750051, "train/loss_math": 2.2469104051589968, "train/loss_prose": 3.222773790359497, "train/loss_code": 1.8129104971885681} +{"step": 2630, "train/loss": 2.9344761967658997, "train/lm_loss": 2.9344761967658997, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.70745534935596e-05, "perf/tokens_per_sec": 26820.91545258688, "train/loss_prose": 3.7881492614746093, "train/loss_code": 1.1213228106498718, "train/loss_math": 2.2924163341522217} +{"step": 2631, "train/loss": 2.712683230638504, "train/lm_loss": 2.712683230638504, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.705715989269914e-05, "perf/tokens_per_sec": 26406.639497440014, "train/loss_math": 2.0453388690948486, "train/loss_prose": 3.821562349796295, "train/loss_code": 1.162268340587616} +{"step": 2632, "train/loss": 2.568252772092819, "train/lm_loss": 2.568252772092819, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.7039765289203946e-05, "perf/tokens_per_sec": 26735.475793939186, "train/loss_code": 1.6866204738616943, "train/loss_prose": 3.0661564469337463, "train/loss_math": 2.454077363014221} +{"step": 2633, "train/loss": 3.0075249671936035, "train/lm_loss": 3.0075249671936035, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.7022369691551917e-05, "perf/tokens_per_sec": 24571.52425276539, "train/loss_math": 2.5710264444351196, "train/loss_prose": 3.4867737770080565, "train/loss_code": 1.4842784404754639} +{"step": 2634, "train/loss": 2.3632533252239227, "train/lm_loss": 2.3632533252239227, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.7004973108221472e-05, "perf/tokens_per_sec": 26650.165337767823, "train/loss_math": 2.518159866333008, "train/loss_prose": 3.4750654697418213, "train/loss_code": 1.467138369878133} +{"step": 2635, "train/loss": 2.3216487169265747, "train/lm_loss": 2.3216487169265747, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.6987575547691497e-05, "perf/tokens_per_sec": 25840.140638818866, "train/loss_math": 2.179103970527649, "train/loss_code": 1.3170390923817952, "train/loss_prose": 3.421288013458252} +{"step": 2636, "train/loss": 1.987025648355484, "train/lm_loss": 1.987025648355484, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.697017701844134e-05, "perf/tokens_per_sec": 26855.294791471268, "train/loss_math": 2.2631396651268005, "train/loss_code": 1.6153860449790955, "train/loss_prose": 3.2929954528808594} +{"step": 2637, "train/loss": 2.554472893476486, "train/lm_loss": 2.554472893476486, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.695277752895084e-05, "perf/tokens_per_sec": 26860.753270063633, "train/loss_code": 1.7727500995000203, "train/loss_math": 2.3982352018356323, "train/loss_prose": 3.440354029337565} +{"step": 2638, "train/loss": 2.2401317954063416, "train/lm_loss": 2.2401317954063416, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.6935377087700297e-05, "perf/tokens_per_sec": 25923.073950733713, "train/loss_code": 1.314763069152832, "train/loss_math": 2.1450629830360413, "train/loss_prose": 3.355638265609741} +{"step": 2639, "train/loss": 2.39659982919693, "train/lm_loss": 2.39659982919693, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.6917975703170466e-05, "perf/tokens_per_sec": 26480.02451347594, "train/loss_math": 2.0986850261688232, "train/loss_code": 1.8560888767242432, "train/loss_prose": 3.41164231300354} +{"step": 2640, "train/loss": 2.604112207889557, "train/lm_loss": 2.604112207889557, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.6900573383842583e-05, "perf/tokens_per_sec": 26922.33549747934, "train/loss_code": 2.179290771484375, "train/loss_math": 2.168074131011963, "train/loss_prose": 3.3271034558614097} +{"step": 2641, "train/loss": 2.354006737470627, "train/lm_loss": 2.354006737470627, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.6883170138198323e-05, "perf/tokens_per_sec": 26744.923693878824, "train/loss_code": 1.8892318308353424, "train/loss_math": 2.3611762523651123, "train/loss_prose": 3.2763867378234863} +{"step": 2642, "train/loss": 2.1478313207626343, "train/lm_loss": 2.1478313207626343, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.686576597471981e-05, "perf/tokens_per_sec": 26879.917488351424, "train/loss_prose": 3.2034989992777505, "train/loss_math": 2.1517550945281982, "train/loss_code": 1.3550996780395508} +{"step": 2643, "train/loss": 2.6882450878620148, "train/lm_loss": 2.6882450878620148, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.684836090188963e-05, "perf/tokens_per_sec": 26748.213296049547, "train/loss_math": 2.2059691747029624, "train/loss_prose": 3.4420730471611023, "train/loss_code": 1.1197611093521118} +{"step": 2644, "train/loss": 2.270949959754944, "train/lm_loss": 2.270949959754944, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.6830954928190794e-05, "perf/tokens_per_sec": 26864.07142957666, "train/loss_math": 2.1745660603046417, "train/loss_code": 1.675140917301178, "train/loss_prose": 3.059526562690735} +{"step": 2645, "train/loss": 2.258281707763672, "train/lm_loss": 2.258281707763672, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.6813548062106775e-05, "perf/tokens_per_sec": 26024.426767493154, "train/loss_code": 1.6763871908187866, "train/loss_math": 1.9659745395183563, "train/loss_prose": 3.424791097640991} +{"step": 2646, "train/loss": 2.5981205999851227, "train/lm_loss": 2.5981205999851227, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.6796140312121458e-05, "perf/tokens_per_sec": 26893.087710015796, "train/loss_math": 2.4162919521331787, "train/loss_prose": 3.499213933944702, "train/loss_code": 1.5192238688468933} +{"step": 2647, "train/loss": 2.657834529876709, "train/lm_loss": 2.657834529876709, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.6778731686719178e-05, "perf/tokens_per_sec": 26872.60160048052, "train/loss_code": 1.3121971885363262, "train/loss_prose": 3.7131406664848328, "train/loss_math": 2.473520278930664} +{"step": 2648, "train/loss": 2.5442776679992676, "train/lm_loss": 2.5442776679992676, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.6761322194384674e-05, "perf/tokens_per_sec": 26717.098581868002, "train/loss_math": 2.3006020545959474, "train/loss_prose": 3.4610759019851685, "train/loss_code": 1.9290587902069092} +{"step": 2649, "train/loss": 2.3776840567588806, "train/lm_loss": 2.3776840567588806, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.674391184360313e-05, "perf/tokens_per_sec": 26861.42523617283, "train/loss_code": 1.2750905454158783, "train/loss_prose": 3.4802775382995605} +{"step": 2650, "train/loss": 1.61018206179142, "train/lm_loss": 1.61018206179142, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.6726500642860154e-05, "perf/tokens_per_sec": 26605.674243714788, "train/loss_code": 0.9690999388694763, "train/loss_math": 2.370116949081421, "train/loss_prose": 3.2957229614257812} +{"step": 2651, "train/loss": 2.2274564802646637, "train/lm_loss": 2.2274564802646637, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.6709088600641717e-05, "perf/tokens_per_sec": 26708.33465580499, "train/loss_code": 1.4154928624629974, "train/loss_math": 2.052532911300659, "train/loss_prose": 3.368382294972738} +{"step": 2652, "train/loss": 2.4020729064941406, "train/lm_loss": 2.4020729064941406, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.6691675725434272e-05, "perf/tokens_per_sec": 26906.31298325625, "train/loss_prose": 3.947145620981852, "train/loss_math": 2.232511878013611, "train/loss_code": 0.9700406591097513} +{"step": 2653, "train/loss": 2.1006321907043457, "train/lm_loss": 2.1006321907043457, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.6674262025724627e-05, "perf/tokens_per_sec": 26832.10003232996, "train/loss_code": 1.5884633660316467, "train/loss_math": 2.2711483240127563, "train/loss_prose": 3.63775897026062} +{"step": 2654, "train/loss": 2.2724984288215637, "train/lm_loss": 2.2724984288215637, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.6656847510000012e-05, "perf/tokens_per_sec": 25409.008558966947, "train/loss_code": 1.7047846019268036, "train/loss_prose": 3.5741273164749146, "train/loss_math": 2.1062971353530884} +{"step": 2655, "train/loss": 2.5208003222942352, "train/lm_loss": 2.5208003222942352, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.6639432186748043e-05, "perf/tokens_per_sec": 26727.57299032168, "train/loss_code": 1.5269603729248047, "train/loss_math": 2.186177670955658, "train/loss_prose": 3.737722317377726} +{"step": 2656, "train/loss": 1.877524733543396, "train/lm_loss": 1.877524733543396, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.6622016064456738e-05, "perf/tokens_per_sec": 26330.87523085671, "train/loss_code": 1.3234186172485352, "train/loss_math": 2.0806401073932648, "train/loss_prose": 2.7273824214935303} +{"step": 2657, "train/loss": 1.9095521867275238, "train/lm_loss": 1.9095521867275238, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.6604599151614513e-05, "perf/tokens_per_sec": 27084.128838195535, "train/loss_code": 0.9997891386349996, "train/loss_math": 2.264983296394348, "train/loss_prose": 3.2171175479888916} +{"step": 2658, "train/loss": 2.227682799100876, "train/lm_loss": 2.227682799100876, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.6587181456710153e-05, "perf/tokens_per_sec": 26766.80016515148, "train/loss_code": 1.6287990510463715, "train/loss_prose": 3.04919425646464, "train/loss_math": 2.1586837768554688} +{"step": 2659, "train/loss": 2.2257384061813354, "train/lm_loss": 2.2257384061813354, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.656976298823284e-05, "perf/tokens_per_sec": 27336.45607357668, "train/loss_prose": 2.885826826095581, "train/loss_math": 2.3591216087341307, "train/loss_code": 1.5622360110282898} +{"step": 2660, "train/loss": 2.28789359331131, "train/lm_loss": 2.28789359331131, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.6552343754672103e-05, "perf/tokens_per_sec": 27505.482220564267, "train/loss_prose": 4.014587879180908, "train/loss_math": 2.259005069732666, "train/loss_code": 1.4967674016952515} +{"step": 2661, "train/loss": 2.068464934825897, "train/lm_loss": 2.068464934825897, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.653492376451789e-05, "perf/tokens_per_sec": 27325.194854935195, "train/loss_prose": 3.6819859743118286, "train/loss_code": 1.069272478421529, "train/loss_math": 1.9919767777125041} +{"step": 2662, "train/loss": 2.266445279121399, "train/lm_loss": 2.266445279121399, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.6517503026260477e-05, "perf/tokens_per_sec": 27076.27464188281, "train/loss_math": 2.2519237995147705, "train/loss_prose": 2.976542115211487, "train/loss_code": 1.5853909850120544} +{"step": 2663, "train/loss": 2.27090185880661, "train/lm_loss": 2.27090185880661, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.650008154839052e-05, "perf/tokens_per_sec": 27186.563570043912, "train/loss_prose": 3.308513959248861, "train/loss_code": 1.534364640712738, "train/loss_math": 2.1042141914367676} +{"step": 2664, "train/loss": 2.7647095322608948, "train/lm_loss": 2.7647095322608948, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.6482659339399045e-05, "perf/tokens_per_sec": 27325.890259788805, "train/loss_math": 2.1025267839431763, "train/loss_code": 1.3052142262458801, "train/loss_prose": 3.8255487084388733} +{"step": 2665, "train/loss": 2.4026151597499847, "train/lm_loss": 2.4026151597499847, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.646523640777741e-05, "perf/tokens_per_sec": 27462.261775853167, "train/loss_code": 1.2803366581598918, "train/loss_math": 1.9269688725471497, "train/loss_prose": 3.8419910271962485} +{"step": 2666, "train/loss": 2.839702069759369, "train/lm_loss": 2.839702069759369, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.644781276201736e-05, "perf/tokens_per_sec": 27303.78502635042, "train/loss_prose": 3.4291638374328612, "train/loss_math": 2.174249053001404, "train/loss_code": 1.2232997417449951} +{"step": 2667, "train/loss": 2.3779567778110504, "train/lm_loss": 2.3779567778110504, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.6430388410610955e-05, "perf/tokens_per_sec": 27336.847556929657, "train/loss_prose": 3.252789556980133, "train/loss_code": 1.5031238794326782} +{"step": 2668, "train/loss": 2.259422332048416, "train/lm_loss": 2.259422332048416, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.6412963362050618e-05, "perf/tokens_per_sec": 27430.603372489033, "train/loss_code": 1.5032471865415573, "train/loss_prose": 3.2917939027150473, "train/loss_math": 2.1870079040527344} +{"step": 2669, "train/loss": 2.318339765071869, "train/lm_loss": 2.318339765071869, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.6395537624829096e-05, "perf/tokens_per_sec": 27271.018820003555, "train/loss_code": 1.4671554863452911, "train/loss_prose": 3.436927239100138, "train/loss_math": 2.367314100265503} +{"step": 2670, "train/loss": 2.6667206287384033, "train/lm_loss": 2.6667206287384033, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.6378111207439494e-05, "perf/tokens_per_sec": 27417.645390433117, "train/loss_math": 2.495615084966024, "train/loss_prose": 3.580453395843506, "train/loss_code": 1.5527797937393188} +{"step": 2671, "train/loss": 2.345377027988434, "train/lm_loss": 2.345377027988434, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.636068411837523e-05, "perf/tokens_per_sec": 26414.84098544405, "train/loss_math": 2.1647297739982605, "train/loss_prose": 3.38193142414093, "train/loss_code": 1.6701170206069946} +{"step": 2672, "train/loss": 2.2845101058483124, "train/lm_loss": 2.2845101058483124, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.6343256366130054e-05, "perf/tokens_per_sec": 26317.88693026626, "train/loss_code": 1.5448041558265686, "train/loss_math": 2.3735469579696655, "train/loss_prose": 3.6748846769332886} +{"step": 2673, "train/loss": 2.5954508781433105, "train/lm_loss": 2.5954508781433105, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.6325827959198045e-05, "perf/tokens_per_sec": 26663.276851660725, "train/loss_prose": 3.5422581831614175, "train/loss_code": 1.7826011776924133, "train/loss_math": 2.1905433336893716} +{"step": 2674, "train/loss": 2.077131688594818, "train/lm_loss": 2.077131688594818, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.63083989060736e-05, "perf/tokens_per_sec": 25020.963859141677, "train/loss_prose": 3.6671571731567383, "train/loss_math": 2.2980005741119385, "train/loss_code": 1.5139736533164978} +{"step": 2675, "train/loss": 2.2577500343322754, "train/lm_loss": 2.2577500343322754, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.6290969215251416e-05, "perf/tokens_per_sec": 25851.105502806327, "train/loss_math": 2.1639341910680137, "train/loss_code": 1.5402214924494426, "train/loss_prose": 3.4747668504714966} +{"step": 2676, "train/loss": 2.8112121522426605, "train/lm_loss": 2.8112121522426605, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.6273538895226522e-05, "perf/tokens_per_sec": 26086.269352651692, "train/loss_prose": 3.554756259918213, "train/loss_code": 1.258899748325348, "train/loss_math": 2.198115587234497} +{"step": 2677, "train/loss": 2.6752873361110687, "train/lm_loss": 2.6752873361110687, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.6256107954494242e-05, "perf/tokens_per_sec": 25507.320692414705, "train/loss_prose": 3.4567334175109865, "train/loss_code": 1.3728772203127544} +{"step": 2678, "train/loss": 2.860180914402008, "train/lm_loss": 2.860180914402008, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.6238676401550207e-05, "perf/tokens_per_sec": 25001.956198218417, "train/loss_prose": 3.6602019667625427, "train/loss_code": 1.5952203273773193, "train/loss_math": 2.215139945348104} +{"step": 2679, "train/loss": 1.9057464301586151, "train/lm_loss": 1.9057464301586151, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.6221244244890336e-05, "perf/tokens_per_sec": 26408.0601825521, "train/loss_code": 1.413851946592331, "train/loss_math": 2.207606077194214, "train/loss_prose": 2.9677460193634033} +{"step": 2680, "train/loss": 2.7389535903930664, "train/lm_loss": 2.7389535903930664, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.6203811493010856e-05, "perf/tokens_per_sec": 26246.559404299398, "train/loss_code": 2.0092294216156006, "train/loss_prose": 4.95964252948761, "train/loss_math": 1.9934711158275604} +{"step": 2681, "train/loss": 2.5882105827331543, "train/lm_loss": 2.5882105827331543, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.6186378154408286e-05, "perf/tokens_per_sec": 25762.21680315147, "train/loss_math": 2.373993158340454, "train/loss_prose": 3.4710309505462646, "train/loss_code": 1.848201592763265} +{"step": 2682, "train/loss": 2.6078561544418335, "train/lm_loss": 2.6078561544418335, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.6168944237579406e-05, "perf/tokens_per_sec": 26388.22502353915, "train/loss_math": 2.1917105515797934, "train/loss_prose": 3.7111968199412027, "train/loss_code": 1.5770635604858398} +{"step": 2683, "train/loss": 2.815365791320801, "train/lm_loss": 2.815365791320801, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.615150975102131e-05, "perf/tokens_per_sec": 25242.982726497987, "train/loss_prose": 3.6118083000183105, "train/loss_code": 2.0189234614372253} +{"step": 2684, "train/loss": 2.161039412021637, "train/lm_loss": 2.161039412021637, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.6134074703231344e-05, "perf/tokens_per_sec": 26168.99876770191, "train/loss_prose": 2.8680777549743652, "train/loss_code": 1.2572706937789917, "train/loss_math": 2.044191360473633} +{"step": 2685, "train/loss": 2.5645496249198914, "train/lm_loss": 2.5645496249198914, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.6116639102707156e-05, "perf/tokens_per_sec": 26245.27632453341, "train/loss_code": 1.7827613751093547, "train/loss_prose": 3.421974500020345, "train/loss_math": 2.4510942697525024} +{"step": 2686, "train/loss": 2.7105554938316345, "train/lm_loss": 2.7105554938316345, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.6099202957946624e-05, "perf/tokens_per_sec": 26266.744311631763, "train/loss_math": 2.1480497419834137, "train/loss_code": 2.177668809890747, "train/loss_prose": 3.6381921768188477} +{"step": 2687, "train/loss": 2.4421463012695312, "train/lm_loss": 2.4421463012695312, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.6081766277447927e-05, "perf/tokens_per_sec": 26597.971516111404, "train/loss_prose": 4.022031664848328, "train/loss_math": 2.297286719083786, "train/loss_code": 1.1519801020622253} +{"step": 2688, "train/loss": 3.089780777692795, "train/lm_loss": 3.089780777692795, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.6064329069709493e-05, "perf/tokens_per_sec": 25880.39068539871, "train/loss_code": 1.5127882361412048, "train/loss_prose": 3.8919740676879884, "train/loss_math": 2.2328009605407715} +{"step": 2689, "train/loss": 2.3017860651016235, "train/lm_loss": 2.3017860651016235, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.604689134322999e-05, "perf/tokens_per_sec": 26881.68398915962, "train/loss_code": 1.4396833976109822, "train/loss_prose": 3.3900582790374756, "train/loss_math": 1.9625316262245178} +{"step": 2690, "train/loss": 2.362169176340103, "train/lm_loss": 2.362169176340103, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.602945310650838e-05, "perf/tokens_per_sec": 27286.178578121973, "train/loss_code": 1.0401081244150798, "train/loss_prose": 3.7461183071136475, "train/loss_math": 2.269336700439453} +{"step": 2691, "train/loss": 2.288956344127655, "train/lm_loss": 2.288956344127655, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.6012014368043814e-05, "perf/tokens_per_sec": 26445.255263307776, "train/loss_code": 1.3347047716379166, "train/loss_prose": 3.6712849140167236, "train/loss_math": 1.9589769840240479} +{"step": 2692, "train/loss": 2.134106397628784, "train/lm_loss": 2.134106397628784, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.599457513633575e-05, "perf/tokens_per_sec": 27077.76829575831, "train/loss_math": 2.2735676765441895, "train/loss_prose": 3.2183728218078613, "train/loss_code": 1.2718007564544678} +{"step": 2693, "train/loss": 2.6920206546783447, "train/lm_loss": 2.6920206546783447, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.5977135419883842e-05, "perf/tokens_per_sec": 26403.75800571727, "train/loss_math": 2.112398386001587, "train/loss_prose": 3.7863492170969644, "train/loss_code": 1.727524757385254} +{"step": 2694, "train/loss": 2.1683970391750336, "train/lm_loss": 2.1683970391750336, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.5959695227188004e-05, "perf/tokens_per_sec": 26641.527889301902, "train/loss_prose": 3.2510520219802856, "train/loss_math": 2.3330400784810386, "train/loss_code": 1.2819837828477223} +{"step": 2695, "train/loss": 2.341243863105774, "train/lm_loss": 2.341243863105774, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.594225456674837e-05, "perf/tokens_per_sec": 27111.012684515674, "train/loss_math": 2.264684279759725, "train/loss_prose": 3.8140885829925537, "train/loss_code": 1.3277559280395508} +{"step": 2696, "train/loss": 2.7384941577911377, "train/lm_loss": 2.7384941577911377, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.59248134470653e-05, "perf/tokens_per_sec": 27010.164552583752, "train/loss_prose": 3.6524319648742676, "train/loss_math": 2.1901313781738283} +{"step": 2697, "train/loss": 2.175046682357788, "train/lm_loss": 2.175046682357788, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.5907371876639398e-05, "perf/tokens_per_sec": 26940.19354434032, "train/loss_math": 2.144433617591858, "train/loss_code": 1.86586332321167, "train/loss_prose": 3.2250492572784424} +{"step": 2698, "train/loss": 2.6549007892608643, "train/lm_loss": 2.6549007892608643, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.5889929863971462e-05, "perf/tokens_per_sec": 26614.494605798845, "train/loss_math": 2.15080718199412, "train/loss_prose": 3.640905221303304, "train/loss_code": 1.932033896446228} +{"step": 2699, "train/loss": 2.47334948182106, "train/lm_loss": 2.47334948182106, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.587248741756253e-05, "perf/tokens_per_sec": 27028.47952474824, "train/loss_code": 1.7359592119852703, "train/loss_math": 2.150654911994934, "train/loss_prose": 3.4258693059285483} +{"step": 2700, "train/loss": 2.1787911653518677, "train/lm_loss": 2.1787911653518677, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.5855044545913825e-05, "perf/tokens_per_sec": 26432.27590705861, "train/loss_prose": 4.140741348266602, "train/loss_math": 2.3651978373527527, "train/loss_code": 1.2762652238210042} +{"step": 2701, "train/loss": 2.5225608348846436, "train/lm_loss": 2.5225608348846436, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.583760125752679e-05, "perf/tokens_per_sec": 26863.945408544612, "train/loss_code": 1.605153203010559, "train/loss_prose": 3.531466484069824, "train/loss_math": 2.125260273615519} +{"step": 2702, "train/loss": 3.048401653766632, "train/lm_loss": 3.048401653766632, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.582015756090308e-05, "perf/tokens_per_sec": 26146.058632754655, "train/loss_code": 2.0634583234786987, "train/loss_prose": 3.3767162561416626} +{"step": 2703, "train/loss": 2.4893128275871277, "train/lm_loss": 2.4893128275871277, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.5802713464544542e-05, "perf/tokens_per_sec": 26538.72348077009, "train/loss_prose": 3.058404266834259, "train/loss_code": 1.5116196274757385, "train/loss_math": 2.3288224935531616} +{"step": 2704, "train/loss": 2.515861928462982, "train/lm_loss": 2.515861928462982, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.578526897695321e-05, "perf/tokens_per_sec": 27069.320652567825, "train/loss_prose": 3.9468525648117065, "train/loss_math": 2.0866169929504395, "train/loss_code": 1.943360984325409} +{"step": 2705, "train/loss": 2.7998012006282806, "train/lm_loss": 2.7998012006282806, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.576782410663132e-05, "perf/tokens_per_sec": 27103.44220123276, "train/loss_prose": 3.554245114326477, "train/loss_code": 1.8182784914970398, "train/loss_math": 2.272436022758484} +{"step": 2706, "train/loss": 2.376600503921509, "train/lm_loss": 2.376600503921509, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.575037886208128e-05, "perf/tokens_per_sec": 26915.46034561093, "train/loss_prose": 3.179884433746338, "train/loss_math": 2.084546367327372, "train/loss_code": 1.609755516052246} +{"step": 2707, "train/loss": 2.149477332830429, "train/lm_loss": 2.149477332830429, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.5732933251805713e-05, "perf/tokens_per_sec": 27247.316775466443, "train/loss_math": 2.2898635864257812, "train/loss_code": 1.2763669689496357, "train/loss_prose": 3.2485636472702026} +{"step": 2708, "train/loss": 2.3202854990959167, "train/lm_loss": 2.3202854990959167, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.571548728430737e-05, "perf/tokens_per_sec": 26521.802977627656, "train/loss_math": 2.2570866516658237, "train/loss_code": 2.7626776695251465} +{"step": 2709, "train/loss": 2.1400928497314453, "train/lm_loss": 2.1400928497314453, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.5698040968089225e-05, "perf/tokens_per_sec": 27231.59584073041, "train/loss_math": 2.162718915939331, "train/loss_code": 1.686214804649353, "train/loss_prose": 2.9347188472747803} +{"step": 2710, "train/loss": 2.152661621570587, "train/lm_loss": 2.152661621570587, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.568059431165438e-05, "perf/tokens_per_sec": 26452.7475868228, "train/loss_code": 1.2375043332576752, "train/loss_math": 2.291203165054321, "train/loss_prose": 3.2902684211730957} +{"step": 2711, "train/loss": 1.8930327594280243, "train/lm_loss": 1.8930327594280243, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.566314732350615e-05, "perf/tokens_per_sec": 26585.993785205817, "train/loss_code": 1.3401721715927124, "train/loss_math": 2.2650927305221558, "train/loss_prose": 3.9132144451141357} +{"step": 2712, "train/loss": 2.8606133460998535, "train/lm_loss": 2.8606133460998535, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.564570001214797e-05, "perf/tokens_per_sec": 26788.71245419532, "train/loss_code": 1.5551342964172363, "train/loss_prose": 3.379300260543823, "train/loss_math": 2.216635823249817} +{"step": 2713, "train/loss": 2.557897210121155, "train/lm_loss": 2.557897210121155, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.562825238608344e-05, "perf/tokens_per_sec": 25363.54322243465, "train/loss_math": 1.9549689292907715, "train/loss_prose": 3.783026377360026, "train/loss_code": 1.6245962083339691} +{"step": 2714, "train/loss": 2.1130026280879974, "train/lm_loss": 2.1130026280879974, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.5610804453816333e-05, "perf/tokens_per_sec": 26570.163526837154, "train/loss_math": 2.201240965298244, "train/loss_code": 1.495334506034851} +{"step": 2715, "train/loss": 2.3242452442646027, "train/lm_loss": 2.3242452442646027, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.559335622385055e-05, "perf/tokens_per_sec": 26626.86944985183, "train/loss_prose": 3.888673186302185, "train/loss_code": 1.386487603187561, "train/loss_math": 2.2190506855646768} +{"step": 2716, "train/loss": 2.228112369775772, "train/lm_loss": 2.228112369775772, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.557590770469016e-05, "perf/tokens_per_sec": 26516.76786847089, "train/loss_code": 1.4969837367534637, "train/loss_prose": 3.9812490940093994, "train/loss_math": 1.9372332692146301} +{"step": 2717, "train/loss": 2.361314445734024, "train/lm_loss": 2.361314445734024, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.5558458904839345e-05, "perf/tokens_per_sec": 26790.550542208694, "train/loss_math": 2.285452651977539, "train/loss_code": 0.9060555696487427, "train/loss_prose": 3.2785990238189697} +{"step": 2718, "train/loss": 2.8091012239456177, "train/lm_loss": 2.8091012239456177, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.5541009832802448e-05, "perf/tokens_per_sec": 26212.440146534453, "train/loss_prose": 3.479429543018341, "train/loss_math": 2.304488738377889, "train/loss_code": 1.6416256427764893} +{"step": 2719, "train/loss": 2.6853628158569336, "train/lm_loss": 2.6853628158569336, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.5523560497083926e-05, "perf/tokens_per_sec": 27208.220784198205, "train/loss_prose": 3.3905529379844666, "train/loss_math": 2.2178528904914856, "train/loss_code": 1.7424933910369873} +{"step": 2720, "train/loss": 2.449486583471298, "train/lm_loss": 2.449486583471298, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.550611090618838e-05, "perf/tokens_per_sec": 27453.002249948866, "train/loss_code": 0.9642296135425568, "train/loss_prose": 3.381067991256714, "train/loss_math": 2.0715805292129517} +{"step": 2721, "train/loss": 2.5859840512275696, "train/lm_loss": 2.5859840512275696, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.5488661068620533e-05, "perf/tokens_per_sec": 27350.25174840761, "train/loss_code": 1.5485284725824993, "train/loss_math": 2.062713623046875, "train/loss_prose": 3.494893193244934} +{"step": 2722, "train/loss": 2.09255912899971, "train/lm_loss": 2.09255912899971, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.547121099288521e-05, "perf/tokens_per_sec": 27426.180522474315, "train/loss_code": 1.3471020758152008, "train/loss_math": 2.037198007106781, "train/loss_prose": 3.6388341188430786} +{"step": 2723, "train/loss": 2.413342446088791, "train/lm_loss": 2.413342446088791, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.545376068748737e-05, "perf/tokens_per_sec": 27275.868109536146, "train/loss_code": 1.013248364130656, "train/loss_math": 2.42485773563385, "train/loss_prose": 3.80575958887736} +{"step": 2724, "train/loss": 2.7646168768405914, "train/lm_loss": 2.7646168768405914, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.5436310160932092e-05, "perf/tokens_per_sec": 27124.324742845867, "train/loss_prose": 3.6816828846931458, "train/loss_math": 2.2289488315582275, "train/loss_code": 1.4661532938480377} +{"step": 2725, "train/loss": 2.4733289182186127, "train/lm_loss": 2.4733289182186127, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.5418859421724538e-05, "perf/tokens_per_sec": 26487.74074502349, "train/loss_prose": 3.4131126403808594, "train/loss_math": 2.3709778785705566, "train/loss_code": 1.7382476925849915} +{"step": 2726, "train/loss": 2.3749261498451233, "train/lm_loss": 2.3749261498451233, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.540140847836999e-05, "perf/tokens_per_sec": 27077.426874629615, "train/loss_math": 2.109055924415588, "train/loss_code": 1.289153814315796, "train/loss_prose": 3.5824873447418213} +{"step": 2727, "train/loss": 2.8563560247421265, "train/lm_loss": 2.8563560247421265, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.5383957339373825e-05, "perf/tokens_per_sec": 27192.84620729717, "train/loss_prose": 3.6765889525413513, "train/loss_math": 1.9297540585199993, "train/loss_code": 2.3552308082580566} +{"step": 2728, "train/loss": 2.7080946564674377, "train/lm_loss": 2.7080946564674377, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.536650601324152e-05, "perf/tokens_per_sec": 27210.892650785605, "train/loss_code": 0.8777524828910828, "train/loss_prose": 4.174386183420817, "train/loss_math": 2.4620306491851807} +{"step": 2729, "train/loss": 2.4869461059570312, "train/lm_loss": 2.4869461059570312, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.5349054508478637e-05, "perf/tokens_per_sec": 27113.579909915312, "train/loss_code": 1.8231606880823772, "train/loss_prose": 3.5863643487294516, "train/loss_math": 1.8334966897964478} +{"step": 2730, "train/loss": 2.942356586456299, "train/lm_loss": 2.942356586456299, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.5331602833590824e-05, "perf/tokens_per_sec": 27353.909718672927, "train/loss_math": 2.3044055104255676, "train/loss_prose": 3.5803076028823853} +{"step": 2731, "train/loss": 1.8145005702972412, "train/lm_loss": 1.8145005702972412, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.531415099708382e-05, "perf/tokens_per_sec": 25888.151460401223, "train/loss_code": 1.3491348028182983, "train/loss_math": 2.0278013944625854, "train/loss_prose": 3.0360615253448486} +{"step": 2732, "train/loss": 1.9422601163387299, "train/lm_loss": 1.9422601163387299, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.5296699007463433e-05, "perf/tokens_per_sec": 26209.880779223553, "train/loss_math": 2.466810405254364, "train/loss_code": 1.4177096635103226} +{"step": 2733, "train/loss": 2.4298665523529053, "train/lm_loss": 2.4298665523529053, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.527924687323556e-05, "perf/tokens_per_sec": 27370.469912168483, "train/loss_code": 1.7191026608149211, "train/loss_math": 2.461856245994568, "train/loss_prose": 3.1193038622538247} +{"step": 2734, "train/loss": 2.7435346841812134, "train/lm_loss": 2.7435346841812134, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.5261794602906145e-05, "perf/tokens_per_sec": 27316.114378752416, "train/loss_code": 1.6421114057302475, "train/loss_prose": 3.844957947731018} +{"step": 2735, "train/loss": 1.8905900418758392, "train/lm_loss": 1.8905900418758392, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.524434220498123e-05, "perf/tokens_per_sec": 26694.805480712155, "train/loss_code": 1.2023069560527802, "train/loss_math": 2.317338546117147, "train/loss_prose": 3.363476037979126} +{"step": 2736, "train/loss": 2.450763702392578, "train/lm_loss": 2.450763702392578, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.5226889687966882e-05, "perf/tokens_per_sec": 26982.124103010305, "train/loss_math": 2.553793986638387, "train/loss_code": 1.606324593226115, "train/loss_prose": 3.5628767013549805} +{"step": 2737, "train/loss": 2.0541366934776306, "train/lm_loss": 2.0541366934776306, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.520943706036927e-05, "perf/tokens_per_sec": 26453.31782871731, "train/loss_code": 1.5582235604524612, "train/loss_prose": 3.3352835178375244, "train/loss_math": 2.2883055210113525} +{"step": 2738, "train/loss": 2.757330536842346, "train/lm_loss": 2.757330536842346, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.5191984330694573e-05, "perf/tokens_per_sec": 26420.32501860817, "train/loss_prose": 3.256383001804352, "train/loss_math": 2.258277714252472} +{"step": 2739, "train/loss": 2.6318662762641907, "train/lm_loss": 2.6318662762641907, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.517453150744904e-05, "perf/tokens_per_sec": 26982.25123525624, "train/loss_math": 2.3255118131637573, "train/loss_code": 1.4069206714630127, "train/loss_prose": 4.061047951380412} +{"step": 2740, "train/loss": 2.260026693344116, "train/lm_loss": 2.260026693344116, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.5157078599138977e-05, "perf/tokens_per_sec": 26718.178894966277, "train/loss_code": 1.6447783857584, "train/loss_prose": 3.111828923225403, "train/loss_math": 2.638720989227295} +{"step": 2741, "train/loss": 2.5910324156284332, "train/lm_loss": 2.5910324156284332, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.5139625614270706e-05, "perf/tokens_per_sec": 27156.782108634277, "train/loss_math": 2.2038963079452514, "train/loss_prose": 4.087029933929443, "train/loss_code": 1.5347185134887695} +{"step": 2742, "train/loss": 2.2402053475379944, "train/lm_loss": 2.2402053475379944, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.5122172561350616e-05, "perf/tokens_per_sec": 25820.993896399614, "train/loss_math": 2.180469012260437, "train/loss_prose": 3.9803097248077393, "train/loss_code": 1.5194938778877258} +{"step": 2743, "train/loss": 2.320122331380844, "train/lm_loss": 2.320122331380844, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.51047194488851e-05, "perf/tokens_per_sec": 26281.652091304193, "train/loss_code": 1.2141618728637695, "train/loss_prose": 3.48944091796875, "train/loss_math": 1.888110637664795} +{"step": 2744, "train/loss": 2.425935208797455, "train/lm_loss": 2.425935208797455, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.5087266285380596e-05, "perf/tokens_per_sec": 26763.71445796834, "train/loss_prose": 3.2246782779693604, "train/loss_math": 2.1213930547237396, "train/loss_code": 1.2478736639022827} +{"step": 2745, "train/loss": 2.5581261217594147, "train/lm_loss": 2.5581261217594147, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.506981307934357e-05, "perf/tokens_per_sec": 26231.090620099763, "train/loss_prose": 3.7594120502471924, "train/loss_code": 1.6390566031138103, "train/loss_math": 2.1348010301589966} +{"step": 2746, "train/loss": 2.41140016913414, "train/lm_loss": 2.41140016913414, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.505235983928048e-05, "perf/tokens_per_sec": 26962.009931134926, "train/loss_prose": 3.2484853267669678, "train/loss_math": 2.1624523401260376, "train/loss_code": 0.9861775040626526} +{"step": 2747, "train/loss": 2.293050915002823, "train/lm_loss": 2.293050915002823, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.5034906573697864e-05, "perf/tokens_per_sec": 27055.380776674872, "train/loss_prose": 3.6772408485412598, "train/loss_math": 2.1811933517456055, "train/loss_code": 1.7617465019226075} +{"step": 2748, "train/loss": 3.0631532073020935, "train/lm_loss": 3.0631532073020935, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.501745329110219e-05, "perf/tokens_per_sec": 27072.306590691343, "train/loss_math": 2.119809865951538, "train/loss_prose": 3.6782177448272706, "train/loss_code": 1.874516487121582} +{"step": 2749, "train/loss": 2.4809781908988953, "train/lm_loss": 2.4809781908988953, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.5e-05, "perf/tokens_per_sec": 26382.146589413584, "train/loss_code": 1.3890592455863953, "train/loss_prose": 3.5728970766067505} +{"step": 2750, "train/loss": 2.2974707782268524, "train/lm_loss": 2.2974707782268524, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.4982546708897814e-05, "perf/tokens_per_sec": 26752.586804863466, "train/loss_math": 2.329021155834198, "train/loss_code": 0.9910493493080139, "train/loss_prose": 3.4145896434783936} +{"step": 2751, "train/loss": 2.2974928617477417, "train/lm_loss": 2.2974928617477417, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.496509342630214e-05, "perf/tokens_per_sec": 26478.228787201577, "train/loss_prose": 3.896801233291626, "train/loss_math": 2.2459054946899415, "train/loss_code": 1.6268066763877869} +{"step": 2752, "train/loss": 2.3398267328739166, "train/lm_loss": 2.3398267328739166, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.4947640160719515e-05, "perf/tokens_per_sec": 26150.15782151392, "train/loss_prose": 3.280738194783529, "train/loss_code": 1.6578189730644226, "train/loss_math": 1.85358730951945} +{"step": 2753, "train/loss": 2.14463409781456, "train/lm_loss": 2.14463409781456, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.493018692065644e-05, "perf/tokens_per_sec": 26802.337006851965, "train/loss_math": 2.152812957763672, "train/loss_code": 1.1762314836184184, "train/loss_prose": 3.584969639778137} +{"step": 2754, "train/loss": 2.0818049013614655, "train/lm_loss": 2.0818049013614655, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.4912733714619417e-05, "perf/tokens_per_sec": 27001.97122806466, "train/loss_code": 1.2877199687063694, "train/loss_prose": 3.2617096106211343, "train/loss_math": 1.7184287309646606} +{"step": 2755, "train/loss": 2.9395114183425903, "train/lm_loss": 2.9395114183425903, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.4895280551114907e-05, "perf/tokens_per_sec": 26048.14118867155, "train/loss_code": 1.6172548532485962, "train/loss_prose": 3.4940046310424804, "train/loss_math": 2.811558485031128} +{"step": 2756, "train/loss": 2.6462407410144806, "train/lm_loss": 2.6462407410144806, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.4877827438649393e-05, "perf/tokens_per_sec": 26875.418166231517, "train/loss_prose": 3.1646742820739746, "train/loss_math": 1.625815987586975, "train/loss_code": 1.8603695034980774} +{"step": 2757, "train/loss": 2.7004494071006775, "train/lm_loss": 2.7004494071006775, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.48603743857293e-05, "perf/tokens_per_sec": 26775.143475652047, "train/loss_math": 1.942756215731303, "train/loss_prose": 3.5590564608573914, "train/loss_code": 1.5391011238098145} +{"step": 2758, "train/loss": 2.1440340280532837, "train/lm_loss": 2.1440340280532837, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.484292140086103e-05, "perf/tokens_per_sec": 26997.176406991035, "train/loss_math": 2.1930384635925293, "train/loss_prose": 3.286875565846761, "train/loss_code": 1.2746516466140747} +{"step": 2759, "train/loss": 2.599938690662384, "train/lm_loss": 2.599938690662384, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.4825468492550964e-05, "perf/tokens_per_sec": 26267.949163945053, "train/loss_math": 2.329627799987793, "train/loss_code": 1.3688914775848389, "train/loss_prose": 3.8912391662597656} +{"step": 2760, "train/loss": 2.182269364595413, "train/lm_loss": 2.182269364595413, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.4808015669305433e-05, "perf/tokens_per_sec": 27004.814979455434, "train/loss_prose": 3.8349568843841553, "train/loss_code": 1.0653863350550334, "train/loss_math": 2.19736115137736} +{"step": 2761, "train/loss": 2.524738311767578, "train/lm_loss": 2.524738311767578, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.4790562939630734e-05, "perf/tokens_per_sec": 27140.736666498156, "train/loss_math": 1.9716612100601196, "train/loss_prose": 3.0373872995376585, "train/loss_code": 1.0676474571228027} +{"step": 2762, "train/loss": 2.2569558024406433, "train/lm_loss": 2.2569558024406433, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.4773110312033117e-05, "perf/tokens_per_sec": 26457.26946579933, "train/loss_code": 1.8384901285171509, "train/loss_math": 2.3167365108217512} +{"step": 2763, "train/loss": 2.8175359964370728, "train/lm_loss": 2.8175359964370728, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.475565779501878e-05, "perf/tokens_per_sec": 26201.566286044585, "train/loss_code": 0.8099467754364014, "train/loss_prose": 3.382191801071167, "train/loss_math": 2.4096908569335938} +{"step": 2764, "train/loss": 2.121781885623932, "train/lm_loss": 2.121781885623932, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.4738205397093864e-05, "perf/tokens_per_sec": 25500.429983865437, "train/loss_math": 2.316642920176188, "train/loss_code": 1.1778143246968586, "train/loss_prose": 3.245441436767578} +{"step": 2765, "train/loss": 2.697118818759918, "train/lm_loss": 2.697118818759918, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.4720753126764447e-05, "perf/tokens_per_sec": 26351.2305013981, "train/loss_code": 1.9066576957702637, "train/loss_prose": 3.3698408603668213, "train/loss_math": 2.3901925086975098} +{"step": 2766, "train/loss": 2.27463835477829, "train/lm_loss": 2.27463835477829, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.4703300992536573e-05, "perf/tokens_per_sec": 26771.80552737586, "train/loss_prose": 3.2774319648742676, "train/loss_code": 1.5220254063606262, "train/loss_math": 2.1495477855205536} +{"step": 2767, "train/loss": 2.030586928129196, "train/lm_loss": 2.030586928129196, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.4685849002916183e-05, "perf/tokens_per_sec": 26253.93954823931, "train/loss_code": 1.4441319108009338, "train/loss_math": 2.382459831237793} +{"step": 2768, "train/loss": 2.2828359603881836, "train/lm_loss": 2.2828359603881836, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.466839716640918e-05, "perf/tokens_per_sec": 26963.575642194708, "train/loss_code": 1.4748764336109161, "train/loss_prose": 3.495803713798523, "train/loss_math": 2.685787081718445} +{"step": 2769, "train/loss": 2.5631215572357178, "train/lm_loss": 2.5631215572357178, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.4650945491521372e-05, "perf/tokens_per_sec": 26626.167901119763, "train/loss_prose": 3.4213513135910034, "train/loss_math": 2.1279208660125732, "train/loss_code": 1.2818628549575806} +{"step": 2770, "train/loss": 2.0019049048423767, "train/lm_loss": 2.0019049048423767, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.4633493986758484e-05, "perf/tokens_per_sec": 26662.57338316179, "train/loss_math": 2.0447011590003967, "train/loss_code": 1.4837745030721028, "train/loss_prose": 3.3851118087768555} +{"step": 2771, "train/loss": 2.333799868822098, "train/lm_loss": 2.333799868822098, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.4616042660626177e-05, "perf/tokens_per_sec": 26294.5241527298, "train/loss_math": 2.1597174644470214, "train/loss_code": 2.1781854033470154, "train/loss_prose": 3.5154402256011963} +{"step": 2772, "train/loss": 2.400742381811142, "train/lm_loss": 2.400742381811142, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.459859152163002e-05, "perf/tokens_per_sec": 26303.582373097262, "train/loss_prose": 3.673096537590027, "train/loss_code": 1.7004384994506836, "train/loss_math": 2.1147171556949615} +{"step": 2773, "train/loss": 2.117134392261505, "train/lm_loss": 2.117134392261505, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.458114057827547e-05, "perf/tokens_per_sec": 25485.298644581217, "train/loss_math": 2.095667243003845, "train/loss_code": 1.7370436191558838, "train/loss_prose": 3.343275547027588} +{"step": 2774, "train/loss": 2.6909905672073364, "train/lm_loss": 2.6909905672073364, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.4563689839067913e-05, "perf/tokens_per_sec": 27090.19334471824, "train/loss_prose": 3.2879199981689453, "train/loss_code": 1.047827959060669, "train/loss_math": 2.020248770713806} +{"step": 2775, "train/loss": 2.459177553653717, "train/lm_loss": 2.459177553653717, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.4546239312512635e-05, "perf/tokens_per_sec": 26065.213877480426, "train/loss_math": 2.1858726143836975, "train/loss_prose": 3.195728143056234, "train/loss_code": 1.3427459001541138} +{"step": 2776, "train/loss": 2.5773545503616333, "train/lm_loss": 2.5773545503616333, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.45287890071148e-05, "perf/tokens_per_sec": 26343.593587938896, "train/loss_math": 2.350409825642904, "train/loss_prose": 3.4989991188049316, "train/loss_code": 1.5353050827980042} +{"step": 2777, "train/loss": 2.3293785750865936, "train/lm_loss": 2.3293785750865936, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.4511338931379473e-05, "perf/tokens_per_sec": 27001.75903182711, "train/loss_math": 2.2599008560180662, "train/loss_code": 1.769132912158966, "train/loss_prose": 3.7972590923309326} +{"step": 2778, "train/loss": 1.9831396639347076, "train/lm_loss": 1.9831396639347076, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.4493889093811622e-05, "perf/tokens_per_sec": 26713.56739862606, "train/loss_prose": 3.6352081298828125, "train/loss_math": 2.335829178492228, "train/loss_code": 1.3056053817272186} +{"step": 2779, "train/loss": 2.0786412060260773, "train/lm_loss": 2.0786412060260773, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.447643950291608e-05, "perf/tokens_per_sec": 27188.499689814522, "train/loss_code": 1.2693182826042175, "train/loss_prose": 3.4090405702590942, "train/loss_math": 2.366887927055359} +{"step": 2780, "train/loss": 2.2815267145633698, "train/lm_loss": 2.2815267145633698, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.4458990167197555e-05, "perf/tokens_per_sec": 27205.635606398915, "train/loss_prose": 3.4911367098490396, "train/loss_code": 1.173274278640747, "train/loss_math": 2.129490554332733} +{"step": 2781, "train/loss": 2.69860976934433, "train/lm_loss": 2.69860976934433, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.444154109516066e-05, "perf/tokens_per_sec": 27104.297411188574, "train/loss_prose": 3.5755698680877686, "train/loss_code": 1.2370094855626423} +{"step": 2782, "train/loss": 2.97010600566864, "train/lm_loss": 2.97010600566864, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.442409229530985e-05, "perf/tokens_per_sec": 27087.16075201342, "train/loss_math": 2.349059581756592, "train/loss_prose": 3.342733860015869} +{"step": 2783, "train/loss": 2.583096295595169, "train/lm_loss": 2.583096295595169, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.4406643776149458e-05, "perf/tokens_per_sec": 27393.032715630092, "train/loss_code": 1.6733618179957073, "train/loss_math": 2.1643446683883667, "train/loss_prose": 3.771998087565104} +{"step": 2784, "train/loss": 2.4681001901626587, "train/lm_loss": 2.4681001901626587, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.4389195546183673e-05, "perf/tokens_per_sec": 27380.241107330407, "train/loss_code": 1.4391465783119202, "train/loss_prose": 3.872319221496582, "train/loss_math": 2.371258020401001} +{"step": 2785, "train/loss": 2.1764291524887085, "train/lm_loss": 2.1764291524887085, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.4371747613916566e-05, "perf/tokens_per_sec": 27563.20361950296, "train/loss_prose": 3.2901828289031982, "train/loss_math": 1.993064820766449, "train/loss_code": 1.4294039607048035} +{"step": 2786, "train/loss": 2.124688357114792, "train/lm_loss": 2.124688357114792, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.4354299987852035e-05, "perf/tokens_per_sec": 27477.942599772243, "train/loss_code": 1.6057266394297283, "train/loss_prose": 3.306878089904785, "train/loss_math": 2.218362033367157} +{"step": 2787, "train/loss": 2.7706980109214783, "train/lm_loss": 2.7706980109214783, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.4336852676493847e-05, "perf/tokens_per_sec": 27520.37481818473, "train/loss_prose": 3.611624240875244, "train/loss_math": 2.226548671722412, "train/loss_code": 1.632994532585144} +{"step": 2788, "train/loss": 2.3015585839748383, "train/lm_loss": 2.3015585839748383, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.4319405688345614e-05, "perf/tokens_per_sec": 27431.21655338031, "train/loss_code": 1.621328353881836, "train/loss_math": 2.1807886362075806, "train/loss_prose": 3.7827887535095215} +{"step": 2789, "train/loss": 2.3429014682769775, "train/lm_loss": 2.3429014682769775, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.4301959031910784e-05, "perf/tokens_per_sec": 27102.031222787333, "train/loss_code": 1.5312661131223042, "train/loss_prose": 3.229461669921875, "train/loss_math": 2.2305142879486084} +{"step": 2790, "train/loss": 2.047181785106659, "train/lm_loss": 2.047181785106659, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.4284512715692636e-05, "perf/tokens_per_sec": 27278.293401079707, "train/loss_code": 1.1929022669792175, "train/loss_math": 1.9554893374443054, "train/loss_prose": 3.8474334478378296} +{"step": 2791, "train/loss": 2.0567149817943573, "train/lm_loss": 2.0567149817943573, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.4267066748194296e-05, "perf/tokens_per_sec": 27034.68930170345, "train/loss_code": 1.4741348147392273, "train/loss_prose": 3.3959414958953857, "train/loss_math": 2.291163206100464} +{"step": 2792, "train/loss": 2.0204904079437256, "train/lm_loss": 2.0204904079437256, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.4249621137918722e-05, "perf/tokens_per_sec": 25469.467811566014, "train/loss_math": 2.0929233857563565, "train/loss_code": 1.5134592056274414} +{"step": 2793, "train/loss": 2.4727969765663147, "train/lm_loss": 2.4727969765663147, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.423217589336868e-05, "perf/tokens_per_sec": 26709.704937912677, "train/loss_math": 2.1221184730529785, "train/loss_code": 1.1529478232065837, "train/loss_prose": 4.0264317989349365} +{"step": 2794, "train/loss": 2.5942503213882446, "train/lm_loss": 2.5942503213882446, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.4214731023046793e-05, "perf/tokens_per_sec": 26411.470450597262, "train/loss_code": 1.6657228469848633, "train/loss_math": 2.262109120686849, "train/loss_prose": 3.5454097588857016} +{"step": 2795, "train/loss": 2.3739795684814453, "train/lm_loss": 2.3739795684814453, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.4197286535455464e-05, "perf/tokens_per_sec": 26092.0933125972, "train/loss_math": 2.6729748249053955, "train/loss_code": 1.4173150211572647, "train/loss_prose": 3.5498670736948648} +{"step": 2796, "train/loss": 2.6275976300239563, "train/lm_loss": 2.6275976300239563, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.417984243909692e-05, "perf/tokens_per_sec": 27315.940648591582, "train/loss_code": 1.6708402434984844, "train/loss_prose": 3.9676219622294107, "train/loss_math": 2.052696943283081} +{"step": 2797, "train/loss": 2.9540849328041077, "train/lm_loss": 2.9540849328041077, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.4162398742473214e-05, "perf/tokens_per_sec": 27127.922525347745, "train/loss_math": 2.3002870877583823, "train/loss_prose": 3.3463637828826904} +{"step": 2798, "train/loss": 2.2072908878326416, "train/lm_loss": 2.2072908878326416, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.4144955454086188e-05, "perf/tokens_per_sec": 27143.5668371974, "train/loss_prose": 3.2421422799428306, "train/loss_math": 2.2273151874542236, "train/loss_code": 1.1590900818506877} +{"step": 2799, "train/loss": 1.734101802110672, "train/lm_loss": 1.734101802110672, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.4127512582437485e-05, "perf/tokens_per_sec": 26296.496300400726, "train/loss_code": 1.2243285477161407, "train/loss_prose": 2.8044456243515015, "train/loss_math": 2.1422789096832275} +{"step": 2800, "train/loss": 2.12838476896286, "train/lm_loss": 2.12838476896286, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.411007013602854e-05, "perf/tokens_per_sec": 26936.223042405276, "train/loss_math": 2.336944580078125, "train/loss_code": 1.2390867471694946, "train/loss_prose": 3.244595448176066} +{"step": 2800, "eval/loss": 2.1810011283838673, "eval/lm_loss": 2.1810011283838673, "eval/lb_loss": 0.0, "eval/z_loss": 0.0, "eval/perplexity": 8.855166981629413, "eval/loss_code": 1.5709210189237988, "eval/ppl_code": 4.811077247029531, "eval/loss_prose": 3.484886230083934, "eval/ppl_prose": 32.61871587018299, "eval/loss_math": 2.0774039874986276, "eval/ppl_math": 7.983716161785355} +{"step": 2801, "train/loss": 2.7389844357967377, "train/lm_loss": 2.7389844357967377, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.4092628123360608e-05, "perf/tokens_per_sec": 25551.935200512828, "train/loss_math": 2.2021546959877014, "train/loss_code": 1.7058632373809814, "train/loss_prose": 3.5239598751068115} +{"step": 2802, "train/loss": 2.086578816175461, "train/lm_loss": 2.086578816175461, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.4075186552934707e-05, "perf/tokens_per_sec": 26705.428462172204, "train/loss_code": 1.7102142175038655, "train/loss_math": 2.4396090507507324, "train/loss_prose": 3.9917359352111816} +{"step": 2803, "train/loss": 2.5203264951705933, "train/lm_loss": 2.5203264951705933, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.4057745433251635e-05, "perf/tokens_per_sec": 26911.539285977226, "train/loss_math": 2.074610392252604, "train/loss_prose": 2.991264820098877, "train/loss_code": 1.9737210273742676} +{"step": 2804, "train/loss": 2.4545540511608124, "train/lm_loss": 2.4545540511608124, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.4040304772812002e-05, "perf/tokens_per_sec": 26899.3196581036, "train/loss_code": 1.7988985180854797, "train/loss_math": 2.419707417488098, "train/loss_prose": 3.800711750984192} +{"step": 2805, "train/loss": 2.6889680325984955, "train/lm_loss": 2.6889680325984955, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.4022864580116164e-05, "perf/tokens_per_sec": 27429.37709294287, "train/loss_prose": 3.465965211391449, "train/loss_math": 2.193934679031372, "train/loss_code": 1.06607985496521} +{"step": 2806, "train/loss": 2.28535994887352, "train/lm_loss": 2.28535994887352, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.4005424863664254e-05, "perf/tokens_per_sec": 26637.521023335143, "train/loss_prose": 3.692170023918152, "train/loss_code": 1.7572664022445679, "train/loss_math": 2.1122078895568848} +{"step": 2807, "train/loss": 3.1148977279663086, "train/lm_loss": 3.1148977279663086, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.398798563195619e-05, "perf/tokens_per_sec": 27063.81972019137, "train/loss_prose": 3.4683056275049844, "train/loss_math": 2.0546735525131226} +{"step": 2808, "train/loss": 2.5169519782066345, "train/lm_loss": 2.5169519782066345, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.3970546893491637e-05, "perf/tokens_per_sec": 27182.176052605348, "train/loss_math": 2.316847483317057, "train/loss_code": 0.983711838722229, "train/loss_prose": 3.7392168045043945} +{"step": 2809, "train/loss": 2.1738377809524536, "train/lm_loss": 2.1738377809524536, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.3953108656770016e-05, "perf/tokens_per_sec": 26936.729849824234, "train/loss_math": 2.1389166831970217, "train/loss_prose": 3.3260841369628906, "train/loss_code": 1.6850172877311707} +{"step": 2810, "train/loss": 2.3224600553512573, "train/lm_loss": 2.3224600553512573, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.3935670930290512e-05, "perf/tokens_per_sec": 27086.861800769726, "train/loss_code": 1.5006122589111328, "train/loss_prose": 3.619376262029012, "train/loss_math": 1.7191017866134644} +{"step": 2811, "train/loss": 2.4108968675136566, "train/lm_loss": 2.4108968675136566, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.391823372255208e-05, "perf/tokens_per_sec": 26627.405953238944, "train/loss_prose": 3.764460484186808, "train/loss_code": 1.3128534158070881, "train/loss_math": 2.027616500854492} +{"step": 2812, "train/loss": 2.3350244760513306, "train/lm_loss": 2.3350244760513306, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.3900797042053382e-05, "perf/tokens_per_sec": 26911.66575393966, "train/loss_prose": 4.107110023498535, "train/loss_math": 2.073718229929606, "train/loss_code": 1.4149403770764668} +{"step": 2813, "train/loss": 2.1596869826316833, "train/lm_loss": 2.1596869826316833, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.388336089729285e-05, "perf/tokens_per_sec": 26758.92051387487, "train/loss_prose": 3.456356167793274, "train/loss_math": 2.2442715565363565, "train/loss_code": 1.2106561462084453} +{"step": 2814, "train/loss": 2.4227434396743774, "train/lm_loss": 2.4227434396743774, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.386592529676866e-05, "perf/tokens_per_sec": 27082.250246311633, "train/loss_code": 1.5392807573080063, "train/loss_prose": 4.120314836502075, "train/loss_math": 2.492097496986389} +{"step": 2815, "train/loss": 2.634175479412079, "train/lm_loss": 2.634175479412079, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.384849024897869e-05, "perf/tokens_per_sec": 27041.58294258106, "train/loss_code": 2.157090663909912, "train/loss_math": 2.3152569770812987, "train/loss_prose": 3.6700137853622437} +{"step": 2816, "train/loss": 2.4926071166992188, "train/lm_loss": 2.4926071166992188, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.3831055762420607e-05, "perf/tokens_per_sec": 27062.28497527673, "train/loss_code": 1.9257310628890991, "train/loss_math": 2.0871132910251617, "train/loss_prose": 3.222224394480387} +{"step": 2817, "train/loss": 1.980082392692566, "train/lm_loss": 1.980082392692566, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.3813621845591727e-05, "perf/tokens_per_sec": 27093.52570829292, "train/loss_prose": 3.3425252437591553, "train/loss_math": 2.4084184169769287, "train/loss_code": 1.0846930854022503} +{"step": 2818, "train/loss": 2.591553896665573, "train/lm_loss": 2.591553896665573, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.3796188506989153e-05, "perf/tokens_per_sec": 26885.00732222975, "train/loss_math": 2.149397909641266, "train/loss_prose": 3.657548745473226, "train/loss_code": 1.162193775177002} +{"step": 2819, "train/loss": 2.5373823046684265, "train/lm_loss": 2.5373823046684265, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.377875575510967e-05, "perf/tokens_per_sec": 26996.158257147436, "train/loss_code": 2.3195087909698486, "train/loss_math": 2.1624163389205933, "train/loss_prose": 3.1099613507588706} +{"step": 2820, "train/loss": 3.3227181434631348, "train/lm_loss": 3.3227181434631348, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.3761323598449803e-05, "perf/tokens_per_sec": 26950.25175303467, "train/loss_code": 1.0632622241973877, "train/loss_prose": 3.64549766268049} +{"step": 2821, "train/loss": 2.3438650965690613, "train/lm_loss": 2.3438650965690613, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.3743892045505764e-05, "perf/tokens_per_sec": 26894.14020125361, "train/loss_math": 2.313820314407349, "train/loss_code": 1.7013171911239624, "train/loss_prose": 3.7791850566864014} +{"step": 2822, "train/loss": 2.701209247112274, "train/lm_loss": 2.701209247112274, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.372646110477348e-05, "perf/tokens_per_sec": 26983.522623609977, "train/loss_code": 1.7688136498133342, "train/loss_prose": 3.5153799057006836, "train/loss_math": 2.241713285446167} +{"step": 2823, "train/loss": 2.5990909934043884, "train/lm_loss": 2.5990909934043884, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.3709030784748587e-05, "perf/tokens_per_sec": 26710.6600661708, "train/loss_code": 1.3942983945210774, "train/loss_math": 2.059913754463196, "train/loss_prose": 4.1633351643880205} +{"step": 2824, "train/loss": 2.0499747693538666, "train/lm_loss": 2.0499747693538666, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.3691601093926404e-05, "perf/tokens_per_sec": 27032.775025176234, "train/loss_math": 2.2304542660713196, "train/loss_code": 1.2433610161145527, "train/loss_prose": 3.7478976249694824} +{"step": 2825, "train/loss": 2.730604887008667, "train/lm_loss": 2.730604887008667, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.367417204080196e-05, "perf/tokens_per_sec": 27119.700424478637, "train/loss_code": 1.6209060748418171, "train/loss_prose": 3.6498008966445923, "train/loss_math": 2.3829166889190674} +{"step": 2826, "train/loss": 2.6991924047470093, "train/lm_loss": 2.6991924047470093, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.3656743633869948e-05, "perf/tokens_per_sec": 27115.33446025395, "train/loss_math": 2.1885706186294556, "train/loss_prose": 3.772986650466919, "train/loss_code": 1.5202953815460205} +{"step": 2827, "train/loss": 2.8983523845672607, "train/lm_loss": 2.8983523845672607, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.3639315881624777e-05, "perf/tokens_per_sec": 26941.58772156802, "train/loss_math": 2.0621679425239563, "train/loss_prose": 3.4681094169616697, "train/loss_code": 1.7219362258911133} +{"step": 2828, "train/loss": 2.001854509115219, "train/lm_loss": 2.001854509115219, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.3621888792560515e-05, "perf/tokens_per_sec": 27058.70458064201, "train/loss_prose": 3.3322935104370117, "train/loss_code": 1.3381746783852577, "train/loss_math": 1.9987751245498657} +{"step": 2829, "train/loss": 2.9934208393096924, "train/lm_loss": 2.9934208393096924, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.3604462375170906e-05, "perf/tokens_per_sec": 26934.449366612316, "train/loss_prose": 3.700396776199341, "train/loss_math": 2.2864447832107544} +{"step": 2830, "train/loss": 2.417701333761215, "train/lm_loss": 2.417701333761215, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.3587036637949388e-05, "perf/tokens_per_sec": 27081.908712149278, "train/loss_math": 1.7906976342201233, "train/loss_code": 0.8668805360794067, "train/loss_prose": 3.5066134929656982} +{"step": 2831, "train/loss": 2.6650107502937317, "train/lm_loss": 2.6650107502937317, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.3569611589389047e-05, "perf/tokens_per_sec": 26893.129808083653, "train/loss_prose": 3.808901389439901, "train/loss_math": 2.047478049993515, "train/loss_code": 1.7034695148468018} +{"step": 2832, "train/loss": 2.092352330684662, "train/lm_loss": 2.092352330684662, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.355218723798264e-05, "perf/tokens_per_sec": 26554.63942141977, "train/loss_code": 1.632333219051361, "train/loss_math": 2.245692173639933} +{"step": 2833, "train/loss": 2.770663619041443, "train/lm_loss": 2.770663619041443, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.3534763592222586e-05, "perf/tokens_per_sec": 27126.12351480185, "train/loss_prose": 3.601330816745758, "train/loss_math": 2.124550938606262, "train/loss_code": 1.3863327503204346} +{"step": 2834, "train/loss": 2.346208930015564, "train/lm_loss": 2.346208930015564, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.3517340660600964e-05, "perf/tokens_per_sec": 27155.065112588476, "train/loss_prose": 3.9129425287246704, "train/loss_math": 2.1790711879730225, "train/loss_code": 1.646411120891571} +{"step": 2835, "train/loss": 2.126183569431305, "train/lm_loss": 2.126183569431305, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.349991845160949e-05, "perf/tokens_per_sec": 27205.549442266154, "train/loss_code": 1.6681104898452759, "train/loss_prose": 3.2542022466659546, "train/loss_math": 1.9143105149269104} +{"step": 2836, "train/loss": 2.8461021780967712, "train/lm_loss": 2.8461021780967712, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.348249697373953e-05, "perf/tokens_per_sec": 27308.211892991687, "train/loss_code": 1.87191641330719, "train/loss_prose": 3.433590602874756, "train/loss_math": 1.8570311069488525} +{"step": 2837, "train/loss": 2.1234268248081207, "train/lm_loss": 2.1234268248081207, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.3465076235482116e-05, "perf/tokens_per_sec": 27194.438843705182, "train/loss_math": 2.2770167191823325, "train/loss_code": 1.7192358076572418, "train/loss_prose": 3.2794206142425537} +{"step": 2838, "train/loss": 2.3508233726024628, "train/lm_loss": 2.3508233726024628, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.34476562453279e-05, "perf/tokens_per_sec": 27084.128838195535, "train/loss_math": 1.8340396285057068, "train/loss_prose": 3.3948721885681152, "train/loss_code": 1.651296854019165} +{"step": 2839, "train/loss": 2.5468003153800964, "train/lm_loss": 2.5468003153800964, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.3430237011767167e-05, "perf/tokens_per_sec": 26910.401127801884, "train/loss_math": 2.2846824526786804, "train/loss_prose": 3.380453944206238, "train/loss_code": 1.1416113376617432} +{"step": 2840, "train/loss": 2.24999338388443, "train/lm_loss": 2.24999338388443, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.341281854328985e-05, "perf/tokens_per_sec": 26841.61635882421, "train/loss_math": 2.060412128766378, "train/loss_code": 1.4854964812596638, "train/loss_prose": 3.6811108589172363} +{"step": 2841, "train/loss": 2.467197299003601, "train/lm_loss": 2.467197299003601, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.3395400848385486e-05, "perf/tokens_per_sec": 27327.933216258178, "train/loss_prose": 3.6320180892944336, "train/loss_math": 2.2471468448638916, "train/loss_code": 1.910700519879659} +{"step": 2842, "train/loss": 2.387664794921875, "train/lm_loss": 2.387664794921875, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.3377983935543264e-05, "perf/tokens_per_sec": 27302.917181580084, "train/loss_code": 1.6565846800804138, "train/loss_math": 2.28215092420578, "train/loss_prose": 3.3297722339630127} +{"step": 2843, "train/loss": 2.9391276240348816, "train/lm_loss": 2.9391276240348816, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.336056781325197e-05, "perf/tokens_per_sec": 27079.731634348333, "train/loss_prose": 3.5433568358421326, "train/loss_code": 2.431946039199829, "train/loss_math": 2.302549123764038} +{"step": 2844, "train/loss": 2.1600678265094757, "train/lm_loss": 2.1600678265094757, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.3343152490000004e-05, "perf/tokens_per_sec": 27392.989037937507, "train/loss_math": 2.0886112689971923, "train/loss_prose": 3.5211241245269775, "train/loss_code": 1.658181071281433} +{"step": 2845, "train/loss": 2.8580254316329956, "train/lm_loss": 2.8580254316329956, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.332573797427538e-05, "perf/tokens_per_sec": 26951.055043007498, "train/loss_math": 2.1916624546051025, "train/loss_prose": 3.9686301549275718} +{"step": 2846, "train/loss": 1.8564637303352356, "train/lm_loss": 1.8564637303352356, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.3308324274565734e-05, "perf/tokens_per_sec": 27124.79582672839, "train/loss_code": 1.4587479730447133, "train/loss_prose": 3.816385269165039, "train/loss_math": 2.282837152481079} +{"step": 2847, "train/loss": 2.450431078672409, "train/lm_loss": 2.450431078672409, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.3290911399358285e-05, "perf/tokens_per_sec": 27102.587045243366, "train/loss_prose": 3.4211995005607605, "train/loss_math": 2.2332932353019714, "train/loss_code": 0.7260327637195587} +{"step": 2848, "train/loss": 2.0300236642360687, "train/lm_loss": 2.0300236642360687, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.3273499357139855e-05, "perf/tokens_per_sec": 27114.82090982123, "train/loss_prose": 3.465463161468506, "train/loss_code": 1.1485773175954819, "train/loss_math": 2.3574767112731934} +{"step": 2849, "train/loss": 2.453020930290222, "train/lm_loss": 2.453020930290222, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.3256088156396868e-05, "perf/tokens_per_sec": 26524.464579997806, "train/loss_code": 1.6282384991645813, "train/loss_prose": 3.662113666534424, "train/loss_math": 2.1248724460601807} +{"step": 2850, "train/loss": 2.699845552444458, "train/lm_loss": 2.699845552444458, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.323867780561533e-05, "perf/tokens_per_sec": 26221.281806138086, "train/loss_prose": 3.605240742365519, "train/loss_code": 1.5799301862716675, "train/loss_math": 2.3007779717445374} +{"step": 2851, "train/loss": 1.9402188062667847, "train/lm_loss": 1.9402188062667847, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.3221268313280838e-05, "perf/tokens_per_sec": 26495.91096812601, "train/loss_code": 1.5784141222635906, "train/loss_prose": 3.8318727016448975, "train/loss_math": 2.219393491744995} +{"step": 2852, "train/loss": 2.828229546546936, "train/lm_loss": 2.828229546546936, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.3203859687878548e-05, "perf/tokens_per_sec": 25188.20805207753, "train/loss_prose": 3.8253464698791504, "train/loss_code": 1.9243043065071106, "train/loss_math": 1.7379207611083984} +{"step": 2853, "train/loss": 2.311034679412842, "train/lm_loss": 2.311034679412842, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.3186451937893235e-05, "perf/tokens_per_sec": 26354.181394235784, "train/loss_math": 2.1672983169555664, "train/loss_prose": 3.757300853729248, "train/loss_code": 1.4905933141708374} +{"step": 2854, "train/loss": 2.2098552584648132, "train/lm_loss": 2.2098552584648132, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.3169045071809215e-05, "perf/tokens_per_sec": 26268.431135820138, "train/loss_math": 2.218133640289307, "train/loss_code": 2.035167932510376, "train/loss_prose": 2.2765025794506073} +{"step": 2855, "train/loss": 2.2077063024044037, "train/lm_loss": 2.2077063024044037, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.3151639098110377e-05, "perf/tokens_per_sec": 25791.610582747715, "train/loss_math": 2.2165855765342712, "train/loss_code": 1.6432907581329346, "train/loss_prose": 3.865436553955078} +{"step": 2856, "train/loss": 2.423348218202591, "train/lm_loss": 2.423348218202591, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.3134234025280196e-05, "perf/tokens_per_sec": 26113.430406722055, "train/loss_prose": 3.43562114238739, "train/loss_code": 1.7824580669403076, "train/loss_math": 2.3893893559773765} +{"step": 2857, "train/loss": 2.1607487499713898, "train/lm_loss": 2.1607487499713898, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.3116829861801686e-05, "perf/tokens_per_sec": 26366.3152931932, "train/loss_prose": 4.134537577629089, "train/loss_code": 1.502819001674652} +{"step": 2858, "train/loss": 2.1954709589481354, "train/lm_loss": 2.1954709589481354, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.309942661615742e-05, "perf/tokens_per_sec": 26827.99374737457, "train/loss_code": 1.4483314752578735, "train/loss_math": 2.2314159075419107, "train/loss_prose": 3.26226270198822} +{"step": 2859, "train/loss": 2.576782912015915, "train/lm_loss": 2.576782912015915, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.3082024296829536e-05, "perf/tokens_per_sec": 26586.405212089325, "train/loss_math": 2.1220144033432007, "train/loss_prose": 3.6385175387064614, "train/loss_code": 1.666333556175232} +{"step": 2860, "train/loss": 1.9060471951961517, "train/lm_loss": 1.9060471951961517, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.3064622912299712e-05, "perf/tokens_per_sec": 26720.963181405303, "train/loss_math": 2.1077428658803306, "train/loss_code": 1.3009600043296814} +{"step": 2861, "train/loss": 1.8201044201850891, "train/lm_loss": 1.8201044201850891, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.304722247104917e-05, "perf/tokens_per_sec": 26553.695418448002, "train/loss_code": 1.5791223347187042, "train/loss_math": 2.0610864758491516} +{"step": 2862, "train/loss": 2.705016613006592, "train/lm_loss": 2.705016613006592, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.3029822981558667e-05, "perf/tokens_per_sec": 26885.17561411498, "train/loss_prose": 3.5535070101420083, "train/loss_math": 2.6373393535614014, "train/loss_code": 2.0855681002140045} +{"step": 2863, "train/loss": 2.487617701292038, "train/lm_loss": 2.487617701292038, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.301242445230851e-05, "perf/tokens_per_sec": 27090.15062743033, "train/loss_math": 2.009380300839742, "train/loss_prose": 3.426829735438029, "train/loss_code": 1.7961558103561401} +{"step": 2864, "train/loss": 2.6838961839675903, "train/lm_loss": 2.6838961839675903, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.299502689177853e-05, "perf/tokens_per_sec": 25919.202056654558, "train/loss_math": 2.220247673988342, "train/loss_prose": 3.894958734512329, "train/loss_code": 2.5800137519836426} +{"step": 2865, "train/loss": 3.120893716812134, "train/lm_loss": 3.120893716812134, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.2977630308448085e-05, "perf/tokens_per_sec": 26925.457891429083, "train/loss_math": 2.1782116889953613, "train/loss_prose": 3.7980195045471192, "train/loss_code": 1.6206285953521729} +{"step": 2866, "train/loss": 2.3383765816688538, "train/lm_loss": 2.3383765816688538, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.2960234710796063e-05, "perf/tokens_per_sec": 26865.41572749066, "train/loss_math": 2.2765351136525473, "train/loss_prose": 3.59366512298584, "train/loss_code": 1.5633588433265686} +{"step": 2867, "train/loss": 2.724336266517639, "train/lm_loss": 2.724336266517639, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.294284010730086e-05, "perf/tokens_per_sec": 26945.81338235271, "train/loss_math": 2.1656293869018555, "train/loss_code": 1.485748529434204, "train/loss_prose": 3.453013241291046} +{"step": 2868, "train/loss": 2.1584820449352264, "train/lm_loss": 2.1584820449352264, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.29254465064404e-05, "perf/tokens_per_sec": 26640.24721345234, "train/loss_code": 1.7760695219039917, "train/loss_prose": 2.7538294792175293, "train/loss_math": 2.327959418296814} +{"step": 2869, "train/loss": 2.5244314074516296, "train/lm_loss": 2.5244314074516296, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.2908053916692117e-05, "perf/tokens_per_sec": 26367.407892172134, "train/loss_prose": 3.67569629351298, "train/loss_code": 1.507912039756775, "train/loss_math": 2.050846219062805} +{"step": 2870, "train/loss": 1.9727786481380463, "train/lm_loss": 1.9727786481380463, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.2890662346532936e-05, "perf/tokens_per_sec": 26761.880050408676, "train/loss_math": 2.0442485411961875, "train/loss_prose": 2.876256823539734, "train/loss_code": 1.2989900509516399} +{"step": 2871, "train/loss": 2.3730532824993134, "train/lm_loss": 2.3730532824993134, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.28732718044393e-05, "perf/tokens_per_sec": 26690.94844319741, "train/loss_code": 1.8698648810386658, "train/loss_math": 1.9993187189102173, "train/loss_prose": 3.623710513114929} +{"step": 2872, "train/loss": 2.4548287987709045, "train/lm_loss": 2.4548287987709045, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.2855882298887156e-05, "perf/tokens_per_sec": 26823.553670155212, "train/loss_prose": 3.2841876745224, "train/loss_math": 2.178375780582428} +{"step": 2873, "train/loss": 2.9488449692726135, "train/lm_loss": 2.9488449692726135, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.2838493838351933e-05, "perf/tokens_per_sec": 26862.055234935324, "train/loss_math": 2.1651176810264587, "train/loss_prose": 3.4619428634643556, "train/loss_code": 1.9508090019226074} +{"step": 2874, "train/loss": 2.201627254486084, "train/lm_loss": 2.201627254486084, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.2821106431308544e-05, "perf/tokens_per_sec": 26769.2191898432, "train/loss_code": 1.4983714520931244, "train/loss_math": 2.6134743690490723, "train/loss_prose": 3.1962920427322388} +{"step": 2875, "train/loss": 2.6413271725177765, "train/lm_loss": 2.6413271725177765, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.280372008623142e-05, "perf/tokens_per_sec": 26926.723933149744, "train/loss_prose": 3.810266892115275, "train/loss_code": 1.853661020596822, "train/loss_math": 2.0694167613983154} +{"step": 2876, "train/loss": 2.6816147565841675, "train/lm_loss": 2.6816147565841675, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.2786334811594446e-05, "perf/tokens_per_sec": 26260.36043918418, "train/loss_code": 1.3195006251335144, "train/loss_prose": 3.2777162075042723, "train/loss_math": 2.4253358840942383} +{"step": 2877, "train/loss": 2.162126898765564, "train/lm_loss": 2.162126898765564, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.2768950615870986e-05, "perf/tokens_per_sec": 26304.1864763605, "train/loss_math": 2.0897376239299774, "train/loss_code": 1.6288117170333862, "train/loss_prose": 4.051628589630127} +{"step": 2878, "train/loss": 2.2579182386398315, "train/lm_loss": 2.2579182386398315, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.2751567507533906e-05, "perf/tokens_per_sec": 26572.506038408235, "train/loss_math": 1.9940396149953206, "train/loss_prose": 3.487446427345276, "train/loss_code": 1.7021114031473796} +{"step": 2879, "train/loss": 2.204310268163681, "train/lm_loss": 2.204310268163681, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.2734185495055503e-05, "perf/tokens_per_sec": 25655.419089987292, "train/loss_math": 2.182007610797882, "train/loss_code": 0.937271386384964, "train/loss_prose": 3.515954613685608} +{"step": 2880, "train/loss": 2.641918957233429, "train/lm_loss": 2.641918957233429, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.271680458690756e-05, "perf/tokens_per_sec": 26852.314558973165, "train/loss_code": 1.3222911357879639, "train/loss_prose": 3.37851345539093, "train/loss_math": 2.0996687412261963} +{"step": 2881, "train/loss": 2.829277217388153, "train/lm_loss": 2.829277217388153, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.2699424791561326e-05, "perf/tokens_per_sec": 26049.60255706544, "train/loss_code": 1.2666019201278687, "train/loss_prose": 3.4699289798736572, "train/loss_math": 2.7513697147369385} +{"step": 2882, "train/loss": 2.014933466911316, "train/lm_loss": 2.014933466911316, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.2682046117487498e-05, "perf/tokens_per_sec": 26253.618586114204, "train/loss_code": 1.5427248080571492, "train/loss_prose": 3.284576416015625, "train/loss_math": 2.051679015159607} +{"step": 2883, "train/loss": 2.8125394880771637, "train/lm_loss": 2.8125394880771637, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.266466857315624e-05, "perf/tokens_per_sec": 25878.20758758439, "train/loss_prose": 3.6670867204666138, "train/loss_math": 2.1585994561513266, "train/loss_code": 1.3561702966690063} +{"step": 2884, "train/loss": 2.1905173659324646, "train/lm_loss": 2.1905173659324646, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.2647292167037144e-05, "perf/tokens_per_sec": 25950.131237972673, "train/loss_code": 1.3405302464962006, "train/loss_math": 2.12406849861145, "train/loss_prose": 3.3459832668304443} +{"step": 2885, "train/loss": 2.3537504076957703, "train/lm_loss": 2.3537504076957703, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.2629916907599268e-05, "perf/tokens_per_sec": 26601.019120982914, "train/loss_math": 2.37537948290507, "train/loss_code": 1.5956453084945679, "train/loss_prose": 2.982081651687622} +{"step": 2886, "train/loss": 2.2979227006435394, "train/lm_loss": 2.2979227006435394, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.2612542803311094e-05, "perf/tokens_per_sec": 26696.340311003856, "train/loss_prose": 3.5481868982315063, "train/loss_code": 1.7575585842132568, "train/loss_math": 2.128386378288269} +{"step": 2887, "train/loss": 2.0166231989860535, "train/lm_loss": 2.0166231989860535, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.2595169862640568e-05, "perf/tokens_per_sec": 26703.809571476755, "train/loss_code": 1.170910080273946, "train/loss_prose": 3.3747366666793823, "train/loss_math": 1.9569269021352131} +{"step": 2888, "train/loss": 2.694969058036804, "train/lm_loss": 2.694969058036804, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.2577798094055023e-05, "perf/tokens_per_sec": 26814.636070486507, "train/loss_math": 2.4876549243927, "train/loss_prose": 3.304358184337616, "train/loss_code": 1.9515548944473267} +{"step": 2889, "train/loss": 2.7378768920898438, "train/lm_loss": 2.7378768920898438, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.2560427506021266e-05, "perf/tokens_per_sec": 26769.88658394636, "train/loss_prose": 3.5909287134806314, "train/loss_code": 1.91475909948349, "train/loss_math": 2.433570305506388} +{"step": 2890, "train/loss": 2.346640467643738, "train/lm_loss": 2.346640467643738, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.254305810700551e-05, "perf/tokens_per_sec": 25720.792805309484, "train/loss_code": 1.8989225924015045, "train/loss_prose": 3.358619809150696, "train/loss_math": 2.230096936225891} +{"step": 2891, "train/loss": 2.3155879080295563, "train/lm_loss": 2.3155879080295563, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.2525689905473376e-05, "perf/tokens_per_sec": 25942.5291349807, "train/loss_prose": 3.3810675144195557, "train/loss_code": 1.7912220358848572, "train/loss_math": 2.298839807510376} +{"step": 2892, "train/loss": 1.905076414346695, "train/lm_loss": 1.905076414346695, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.250832290988993e-05, "perf/tokens_per_sec": 26416.506265895387, "train/loss_prose": 3.5470534563064575, "train/loss_code": 1.2625608384609222, "train/loss_math": 1.8337000608444214} +{"step": 2893, "train/loss": 2.047245055437088, "train/lm_loss": 2.047245055437088, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.2490957128719624e-05, "perf/tokens_per_sec": 25689.44718520468, "train/loss_math": 2.3270523945490518, "train/loss_code": 1.2078226208686829} +{"step": 2894, "train/loss": 2.6987867653369904, "train/lm_loss": 2.6987867653369904, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.247359257042634e-05, "perf/tokens_per_sec": 26545.202411027312, "train/loss_code": 1.8364635308583577, "train/loss_math": 2.3805904388427734, "train/loss_prose": 3.7732412020365396} +{"step": 2895, "train/loss": 2.558188498020172, "train/lm_loss": 2.558188498020172, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.2456229243473345e-05, "perf/tokens_per_sec": 27043.20047853292, "train/loss_math": 2.2634450991948447, "train/loss_prose": 3.4424182176589966} +{"step": 2896, "train/loss": 2.3701471388339996, "train/lm_loss": 2.3701471388339996, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.243886715632332e-05, "perf/tokens_per_sec": 26700.6138763432, "train/loss_math": 2.2199755907058716, "train/loss_prose": 3.4604875246683755, "train/loss_code": 1.379921277364095} +{"step": 2897, "train/loss": 2.788469225168228, "train/lm_loss": 2.788469225168228, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.2421506317438325e-05, "perf/tokens_per_sec": 26944.08070111604, "train/loss_math": 2.3210232257843018, "train/loss_prose": 3.5557737946510315, "train/loss_code": 1.7213056087493896} +{"step": 2898, "train/loss": 2.5950068831443787, "train/lm_loss": 2.5950068831443787, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.2404146735279822e-05, "perf/tokens_per_sec": 26679.301152281267, "train/loss_prose": 3.370456039905548, "train/loss_code": 1.5819058418273926, "train/loss_math": 2.0572092533111572} +{"step": 2899, "train/loss": 2.781224548816681, "train/lm_loss": 2.781224548816681, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.238678841830867e-05, "perf/tokens_per_sec": 26800.664536741213, "train/loss_code": 2.2281722724437714, "train/loss_prose": 3.4926467736562095, "train/loss_math": 2.8591668605804443} +{"step": 2900, "train/loss": 2.9019603729248047, "train/lm_loss": 2.9019603729248047, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.236943137498509e-05, "perf/tokens_per_sec": 26079.93330287109, "train/loss_math": 2.0812312066555023, "train/loss_prose": 3.722689390182495} +{"step": 2901, "train/loss": 2.2514592111110687, "train/lm_loss": 2.2514592111110687, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.2352075613768707e-05, "perf/tokens_per_sec": 26352.36227257876, "train/loss_math": 2.1580925583839417, "train/loss_code": 1.8602010409037273, "train/loss_prose": 3.7987005710601807} +{"step": 2902, "train/loss": 1.606497347354889, "train/lm_loss": 1.606497347354889, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.2334721143118504e-05, "perf/tokens_per_sec": 26305.918392846204, "train/loss_code": 1.4138302008310955, "train/loss_math": 2.184498906135559} +{"step": 2903, "train/loss": 2.306475192308426, "train/lm_loss": 2.306475192308426, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.2317367971492835e-05, "perf/tokens_per_sec": 26582.990754681072, "train/loss_math": 2.1583147048950195, "train/loss_prose": 3.4802029132843018, "train/loss_code": 1.429068386554718} +{"step": 2904, "train/loss": 2.8341509103775024, "train/lm_loss": 2.8341509103775024, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.230001610734943e-05, "perf/tokens_per_sec": 25755.92133440076, "train/loss_prose": 3.2603546142578126, "train/loss_math": 2.215145468711853, "train/loss_code": 1.9411424398422241} +{"step": 2905, "train/loss": 3.017770230770111, "train/lm_loss": 3.017770230770111, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.2282665559145378e-05, "perf/tokens_per_sec": 26314.37986735491, "train/loss_prose": 3.231306552886963, "train/loss_math": 2.377161383628845} +{"step": 2906, "train/loss": 1.8697192072868347, "train/lm_loss": 1.8697192072868347, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.226531633533712e-05, "perf/tokens_per_sec": 26447.453657335067, "train/loss_code": 1.4715449412663777, "train/loss_math": 2.1086236953735353} +{"step": 2907, "train/loss": 2.352992832660675, "train/lm_loss": 2.352992832660675, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.224796844438045e-05, "perf/tokens_per_sec": 26476.96376265487, "train/loss_math": 2.02529509862264, "train/loss_prose": 3.6301664113998413, "train/loss_code": 1.8292414744695027} +{"step": 2908, "train/loss": 2.5698265433311462, "train/lm_loss": 2.5698265433311462, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.2230621894730536e-05, "perf/tokens_per_sec": 26551.84865021923, "train/loss_code": 1.7202704846858978, "train/loss_prose": 3.7353660265604653, "train/loss_math": 2.4714322090148926} +{"step": 2909, "train/loss": 2.0526764392852783, "train/lm_loss": 2.0526764392852783, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.2213276694841866e-05, "perf/tokens_per_sec": 27324.629864552782, "train/loss_code": 1.5693127711613972, "train/loss_math": 2.066519558429718, "train/loss_prose": 3.447394847869873} +{"step": 2910, "train/loss": 2.5050941109657288, "train/lm_loss": 2.5050941109657288, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.2195932853168278e-05, "perf/tokens_per_sec": 27351.1226049675, "train/loss_prose": 3.6234822273254395, "train/loss_math": 2.0221975644429526, "train/loss_code": 1.551856279373169} +{"step": 2911, "train/loss": 2.1021804213523865, "train/lm_loss": 2.1021804213523865, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.217859037816296e-05, "perf/tokens_per_sec": 26310.349915003753, "train/loss_code": 1.5191872119903564, "train/loss_math": 1.9135395884513855, "train/loss_prose": 3.4568079710006714} +{"step": 2912, "train/loss": 2.298745334148407, "train/lm_loss": 2.298745334148407, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.2161249278278405e-05, "perf/tokens_per_sec": 26751.96193032914, "train/loss_code": 1.6718269288539886, "train/loss_math": 2.397796392440796, "train/loss_prose": 3.101619243621826} +{"step": 2913, "train/loss": 2.8917436599731445, "train/lm_loss": 2.8917436599731445, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.214390956196649e-05, "perf/tokens_per_sec": 26490.232128979733, "train/loss_math": 2.418662150700887, "train/loss_code": 1.482011079788208, "train/loss_prose": 3.598988115787506} +{"step": 2914, "train/loss": 2.2146356105804443, "train/lm_loss": 2.2146356105804443, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.212657123767834e-05, "perf/tokens_per_sec": 26671.63855462837, "train/loss_math": 1.9960261285305023, "train/loss_prose": 3.75691020488739, "train/loss_code": 1.109579861164093} +{"step": 2915, "train/loss": 2.1768593788146973, "train/lm_loss": 2.1768593788146973, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.2109234313864465e-05, "perf/tokens_per_sec": 26744.50734623348, "train/loss_code": 1.478620395064354, "train/loss_prose": 3.2591781616210938, "train/loss_math": 1.7228587865829468} +{"step": 2916, "train/loss": 2.9112855195999146, "train/lm_loss": 2.9112855195999146, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.209189879897467e-05, "perf/tokens_per_sec": 26069.762462898107, "train/loss_math": 1.9667231639226277, "train/loss_prose": 3.9294722080230713, "train/loss_code": 1.6722239255905151} +{"step": 2917, "train/loss": 2.157595217227936, "train/lm_loss": 2.157595217227936, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.2074564701458065e-05, "perf/tokens_per_sec": 26970.39071884959, "train/loss_code": 1.5051757097244263, "train/loss_math": 2.0824708541234336, "train/loss_prose": 3.2489113807678223} +{"step": 2918, "train/loss": 2.58694326877594, "train/lm_loss": 2.58694326877594, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.205723202976309e-05, "perf/tokens_per_sec": 26199.088947956745, "train/loss_prose": 3.518934726715088, "train/loss_math": 1.8966348767280579, "train/loss_code": 1.4132685661315918} +{"step": 2919, "train/loss": 2.4381422996520996, "train/lm_loss": 2.4381422996520996, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.2039900792337474e-05, "perf/tokens_per_sec": 26363.199860049936, "train/loss_prose": 3.3079139391581216, "train/loss_code": 1.7575533390045166, "train/loss_math": 2.1543679237365723} +{"step": 2920, "train/loss": 2.682823598384857, "train/lm_loss": 2.682823598384857, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.2022570997628256e-05, "perf/tokens_per_sec": 26971.61864541157, "train/loss_prose": 3.2624754309654236, "train/loss_code": 1.9165406227111816, "train/loss_math": 2.66306471824646} +{"step": 2921, "train/loss": 2.972193121910095, "train/lm_loss": 2.972193121910095, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.200524265408176e-05, "perf/tokens_per_sec": 27043.455896635915, "train/loss_prose": 3.536722707748413, "train/loss_code": 1.3081395626068115, "train/loss_math": 2.392896056175232} +{"step": 2922, "train/loss": 2.7781803607940674, "train/lm_loss": 2.7781803607940674, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.1987915770143624e-05, "perf/tokens_per_sec": 26084.605841599343, "train/loss_prose": 3.504814028739929, "train/loss_code": 1.8298893769582112, "train/loss_math": 2.7165184020996094} +{"step": 2923, "train/loss": 2.5220996141433716, "train/lm_loss": 2.5220996141433716, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.1970590354258745e-05, "perf/tokens_per_sec": 26788.921315130865, "train/loss_prose": 3.7725183963775635, "train/loss_math": 2.4058600664138794, "train/loss_code": 1.5041600167751312} +{"step": 2924, "train/loss": 2.8438221216201782, "train/lm_loss": 2.8438221216201782, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.195326641487132e-05, "perf/tokens_per_sec": 25973.239171813228, "train/loss_prose": 3.6988813877105713, "train/loss_code": 1.5661847591400146, "train/loss_math": 2.1296218633651733} +{"step": 2925, "train/loss": 2.3219353556632996, "train/lm_loss": 2.3219353556632996, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.1935943960424833e-05, "perf/tokens_per_sec": 26759.87916529725, "train/loss_code": 1.9022103548049927, "train/loss_math": 2.2429468035697937, "train/loss_prose": 3.897064447402954} +{"step": 2926, "train/loss": 1.9999931156635284, "train/lm_loss": 1.9999931156635284, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.1918622999362035e-05, "perf/tokens_per_sec": 26521.475432713978, "train/loss_math": 2.1368752479553224, "train/loss_prose": 3.4889204502105713, "train/loss_code": 0.9133239388465881} +{"step": 2927, "train/loss": 2.212340831756592, "train/lm_loss": 2.212340831756592, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.1901303540124956e-05, "perf/tokens_per_sec": 27075.122507194348, "train/loss_math": 2.18762469291687, "train/loss_prose": 3.5916635990142822, "train/loss_code": 1.317508578300476} +{"step": 2928, "train/loss": 2.797265410423279, "train/lm_loss": 2.797265410423279, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.1883985591154893e-05, "perf/tokens_per_sec": 26891.530174154508, "train/loss_prose": 3.3197587966918944, "train/loss_code": 1.7217462062835693, "train/loss_math": 2.0287910103797913} +{"step": 2929, "train/loss": 2.237251967191696, "train/lm_loss": 2.237251967191696, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.186666916089239e-05, "perf/tokens_per_sec": 26350.017997291357, "train/loss_code": 1.6681693196296692, "train/loss_prose": 3.2068541049957275, "train/loss_math": 2.405815005302429} +{"step": 2930, "train/loss": 2.4010356068611145, "train/lm_loss": 2.4010356068611145, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.184935425777728e-05, "perf/tokens_per_sec": 26729.652222483262, "train/loss_code": 1.8160800536473591, "train/loss_math": 1.8678778012593586, "train/loss_prose": 4.078205466270447} +{"step": 2931, "train/loss": 1.7967351078987122, "train/lm_loss": 1.7967351078987122, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.183204089024864e-05, "perf/tokens_per_sec": 26881.51574098414, "train/loss_code": 1.07826167345047, "train/loss_math": 2.3033403158187866, "train/loss_prose": 2.727076768875122} +{"step": 2932, "train/loss": 2.6434606313705444, "train/lm_loss": 2.6434606313705444, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.1814729066744776e-05, "perf/tokens_per_sec": 27017.810477580395, "train/loss_prose": 3.043673038482666, "train/loss_code": 1.9638469219207764, "train/loss_math": 2.0016255378723145} +{"step": 2933, "train/loss": 2.0694161653518677, "train/lm_loss": 2.0694161653518677, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.1797418795703267e-05, "perf/tokens_per_sec": 27080.45728805598, "train/loss_math": 1.9590243498484294, "train/loss_code": 1.811782032251358, "train/loss_prose": 3.431128740310669} +{"step": 2934, "train/loss": 2.7258809208869934, "train/lm_loss": 2.7258809208869934, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.1780110085560935e-05, "perf/tokens_per_sec": 27066.889732306736, "train/loss_prose": 3.4813426733016968, "train/loss_code": 1.7931362390518188, "train/loss_math": 2.1477022767066956} +{"step": 2935, "train/loss": 2.4224498569965363, "train/lm_loss": 2.4224498569965363, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.176280294475383e-05, "perf/tokens_per_sec": 27037.58250066493, "train/loss_math": 2.3820383548736572, "train/loss_code": 1.4239248037338257, "train/loss_prose": 3.447916110356649} +{"step": 2936, "train/loss": 2.360514849424362, "train/lm_loss": 2.360514849424362, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.174549738171724e-05, "perf/tokens_per_sec": 27113.02363648702, "train/loss_code": 1.1945725679397583, "train/loss_math": 2.136269211769104, "train/loss_prose": 3.675954262415568} +{"step": 2937, "train/loss": 2.560190051794052, "train/lm_loss": 2.560190051794052, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.1728193404885697e-05, "perf/tokens_per_sec": 27151.331395743942, "train/loss_prose": 3.3818492889404297, "train/loss_code": 1.5973927974700928, "train/loss_math": 2.380395253499349} +{"step": 2938, "train/loss": 2.3398863673210144, "train/lm_loss": 2.3398863673210144, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.171089102269294e-05, "perf/tokens_per_sec": 26020.366899054447, "train/loss_prose": 3.293577035268148, "train/loss_code": 1.6494491497675579, "train/loss_math": 1.945005714893341} +{"step": 2939, "train/loss": 2.4708346724510193, "train/lm_loss": 2.4708346724510193, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.1693590243571938e-05, "perf/tokens_per_sec": 27150.988117006164, "train/loss_code": 1.8635298013687134, "train/loss_math": 2.0226526260375977, "train/loss_prose": 3.4299685955047607} +{"step": 2940, "train/loss": 2.6774197220802307, "train/lm_loss": 2.6774197220802307, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.1676291075954894e-05, "perf/tokens_per_sec": 26408.263150047114, "train/loss_prose": 4.154527346293132, "train/loss_code": 1.6456149816513062, "train/loss_math": 2.009465456008911} +{"step": 2941, "train/loss": 3.176889657974243, "train/lm_loss": 3.176889657974243, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.1658993528273197e-05, "perf/tokens_per_sec": 26434.43146921473, "train/loss_math": 2.3699142932891846, "train/loss_prose": 3.4458812475204468} +{"step": 2942, "train/loss": 2.063665509223938, "train/lm_loss": 2.063665509223938, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.1641697608957466e-05, "perf/tokens_per_sec": 26513.289438432526, "train/loss_code": 2.0059805512428284, "train/loss_math": 2.0828938086827598} +{"step": 2943, "train/loss": 3.131228446960449, "train/lm_loss": 3.131228446960449, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.1624403326437523e-05, "perf/tokens_per_sec": 27456.205076080478, "train/loss_prose": 4.332103729248047, "train/loss_code": 0.797534704208374, "train/loss_math": 2.3079593181610107} +{"step": 2944, "train/loss": 2.062860369682312, "train/lm_loss": 2.062860369682312, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.1607110689142393e-05, "perf/tokens_per_sec": 26621.216843703754, "train/loss_math": 2.3533974091211953, "train/loss_code": 1.1912491917610168} +{"step": 2945, "train/loss": 2.180965840816498, "train/lm_loss": 2.180965840816498, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.158981970550029e-05, "perf/tokens_per_sec": 27090.36421521708, "train/loss_code": 1.2073603719472885, "train/loss_math": 2.3071820735931396, "train/loss_prose": 3.4370340506235757} +{"step": 2946, "train/loss": 2.7218427062034607, "train/lm_loss": 2.7218427062034607, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.1572530383938645e-05, "perf/tokens_per_sec": 27232.63182723157, "train/loss_math": 1.6001921892166138, "train/loss_prose": 3.693347454071045, "train/loss_code": 1.8003863096237183} +{"step": 2947, "train/loss": 2.460720181465149, "train/lm_loss": 2.460720181465149, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.155524273288405e-05, "perf/tokens_per_sec": 27398.79939396839, "train/loss_code": 0.7498598694801331, "train/loss_prose": 3.380376656850179, "train/loss_math": 2.1986928284168243} +{"step": 2948, "train/loss": 2.6331567764282227, "train/lm_loss": 2.6331567764282227, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.1537956760762295e-05, "perf/tokens_per_sec": 26832.98011408008, "train/loss_prose": 3.4506507515907288, "train/loss_code": 1.8156625926494598} +{"step": 2949, "train/loss": 2.5630887746810913, "train/lm_loss": 2.5630887746810913, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.1520672475998373e-05, "perf/tokens_per_sec": 27019.2127000703, "train/loss_math": 2.2823243141174316, "train/loss_code": 1.6748249530792236, "train/loss_prose": 3.4360293547312417} +{"step": 2950, "train/loss": 2.3524782359600067, "train/lm_loss": 2.3524782359600067, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.1503389887016404e-05, "perf/tokens_per_sec": 26413.21652391429, "train/loss_prose": 3.785094658533732, "train/loss_code": 1.4929084300994873} +{"step": 2951, "train/loss": 2.6506229639053345, "train/lm_loss": 2.6506229639053345, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.148610900223973e-05, "perf/tokens_per_sec": 26209.280999804727, "train/loss_code": 1.7521557807922363, "train/loss_math": 2.190344989299774, "train/loss_prose": 3.5638161500295005} +{"step": 2952, "train/loss": 2.3934073448181152, "train/lm_loss": 2.3934073448181152, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.1468829830090838e-05, "perf/tokens_per_sec": 26800.789964759344, "train/loss_math": 2.3117124438285828, "train/loss_prose": 3.579911231994629, "train/loss_code": 1.3702934384346008} +{"step": 2953, "train/loss": 2.587396949529648, "train/lm_loss": 2.587396949529648, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.1451552378991392e-05, "perf/tokens_per_sec": 26235.817484060626, "train/loss_code": 1.738389492034912, "train/loss_math": 2.26317435503006, "train/loss_prose": 3.302696466445923} +{"step": 2954, "train/loss": 2.5094184279441833, "train/lm_loss": 2.5094184279441833, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.1434276657362213e-05, "perf/tokens_per_sec": 25635.8946801383, "train/loss_prose": 3.4359143575032554, "train/loss_math": 2.415802240371704, "train/loss_code": 1.2600985169410706} +{"step": 2955, "train/loss": 2.5378296077251434, "train/lm_loss": 2.5378296077251434, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.1417002673623264e-05, "perf/tokens_per_sec": 26964.125800062153, "train/loss_math": 1.9891011714935303, "train/loss_prose": 3.5598400235176086, "train/loss_code": 1.042537122964859} +{"step": 2956, "train/loss": 1.990120530128479, "train/lm_loss": 1.990120530128479, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.1399730436193697e-05, "perf/tokens_per_sec": 26104.38366138801, "train/loss_math": 1.9852694272994995, "train/loss_code": 1.2112319022417068, "train/loss_prose": 3.552749276161194} +{"step": 2957, "train/loss": 2.2057849168777466, "train/lm_loss": 2.2057849168777466, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.1382459953491774e-05, "perf/tokens_per_sec": 26491.743524662335, "train/loss_math": 2.114269882440567, "train/loss_prose": 3.4365122318267822, "train/loss_code": 1.9175622860590618} +{"step": 2958, "train/loss": 2.3673022389411926, "train/lm_loss": 2.3673022389411926, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.136519123393493e-05, "perf/tokens_per_sec": 26310.71256016822, "train/loss_code": 1.6180110772450764, "train/loss_math": 2.3479838371276855, "train/loss_prose": 3.129472494125366} +{"step": 2959, "train/loss": 1.7950250208377838, "train/lm_loss": 1.7950250208377838, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.1347924285939714e-05, "perf/tokens_per_sec": 26789.339046772548, "train/loss_prose": 3.1747305393218994, "train/loss_code": 1.2140430808067322, "train/loss_math": 2.1097654501597085} +{"step": 2960, "train/loss": 2.15476855635643, "train/lm_loss": 2.15476855635643, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.1330659117921837e-05, "perf/tokens_per_sec": 27059.30124728893, "train/loss_math": 2.385810057322184, "train/loss_prose": 3.7622156143188477, "train/loss_code": 1.57962566614151} +{"step": 2961, "train/loss": 2.491341233253479, "train/lm_loss": 2.491341233253479, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.1313395738296134e-05, "perf/tokens_per_sec": 26655.623454644756, "train/loss_math": 2.4882365067799888, "train/loss_code": 1.7077978054682414, "train/loss_prose": 3.6713136434555054} +{"step": 2962, "train/loss": 2.3924350142478943, "train/lm_loss": 2.3924350142478943, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.129613415547655e-05, "perf/tokens_per_sec": 26130.74779529643, "train/loss_math": 2.2490053517477855, "train/loss_prose": 3.3964428901672363} +{"step": 2963, "train/loss": 3.0055431723594666, "train/lm_loss": 3.0055431723594666, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.1278874377876197e-05, "perf/tokens_per_sec": 26090.27056940399, "train/loss_prose": 3.867705202102661, "train/loss_math": 2.25864315032959, "train/loss_code": 1.223587453365326} +{"step": 2964, "train/loss": 2.541571319103241, "train/lm_loss": 2.541571319103241, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.1261616413907265e-05, "perf/tokens_per_sec": 27116.06202333451, "train/loss_prose": 3.454844057559967, "train/loss_code": 0.9691405594348907, "train/loss_math": 2.28745698928833} +{"step": 2965, "train/loss": 3.076503098011017, "train/lm_loss": 3.076503098011017, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.1244360271981073e-05, "perf/tokens_per_sec": 26320.790034180518, "train/loss_math": 2.289170503616333, "train/loss_prose": 3.5489026069641114} +{"step": 2966, "train/loss": 2.6293246150016785, "train/lm_loss": 2.6293246150016785, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.1227105960508063e-05, "perf/tokens_per_sec": 27000.82540800161, "train/loss_math": 2.2208567460378013, "train/loss_prose": 3.3939578533172607, "train/loss_code": 2.0950762033462524} +{"step": 2967, "train/loss": 2.8827120661735535, "train/lm_loss": 2.8827120661735535, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.1209853487897784e-05, "perf/tokens_per_sec": 26554.680466549813, "train/loss_math": 2.356884479522705, "train/loss_prose": 3.51039981842041, "train/loss_code": 1.9494425058364868} +{"step": 2968, "train/loss": 2.1702943444252014, "train/lm_loss": 2.1702943444252014, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.1192602862558864e-05, "perf/tokens_per_sec": 26285.150435437183, "train/loss_math": 2.2230937480926514, "train/loss_code": 1.4142757852872212, "train/loss_prose": 3.225123167037964} +{"step": 2969, "train/loss": 2.7050779461860657, "train/lm_loss": 2.7050779461860657, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.117535409289905e-05, "perf/tokens_per_sec": 26418.61863095769, "train/loss_prose": 3.5713945229848227, "train/loss_code": 1.4201728105545044, "train/loss_math": 2.3765665888786316} +{"step": 2970, "train/loss": 2.1757163405418396, "train/lm_loss": 2.1757163405418396, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.1158107187325198e-05, "perf/tokens_per_sec": 26440.615409240127, "train/loss_code": 1.6187750101089478, "train/loss_math": 2.035323917865753, "train/loss_prose": 3.429991126060486} +{"step": 2971, "train/loss": 2.282259702682495, "train/lm_loss": 2.282259702682495, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.114086215424322e-05, "perf/tokens_per_sec": 26439.313286702523, "train/loss_prose": 3.7811463673909507, "train/loss_code": 0.8675552606582642, "train/loss_math": 2.1559863090515137} +{"step": 2972, "train/loss": 1.9984636008739471, "train/lm_loss": 1.9984636008739471, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.1123619002058155e-05, "perf/tokens_per_sec": 26429.22618390143, "train/loss_prose": 3.5927462577819824, "train/loss_math": 1.9673105478286743, "train/loss_code": 0.9667614698410034} +{"step": 2973, "train/loss": 2.89993017911911, "train/lm_loss": 2.89993017911911, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.1106377739174098e-05, "perf/tokens_per_sec": 26183.35680505748, "train/loss_prose": 3.4863043785095216, "train/loss_math": 2.0021101236343384, "train/loss_code": 1.763698697090149} +{"step": 2974, "train/loss": 2.24822598695755, "train/lm_loss": 2.24822598695755, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.1089138373994223e-05, "perf/tokens_per_sec": 26238.301773313397, "train/loss_code": 2.032717009385427, "train/loss_math": 2.168983221054077, "train/loss_prose": 3.6205222606658936} +{"step": 2975, "train/loss": 1.9473887979984283, "train/lm_loss": 1.9473887979984283, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.1071900914920816e-05, "perf/tokens_per_sec": 26277.149846664473, "train/loss_math": 2.2967156569163003, "train/loss_code": 1.737792694568634} +{"step": 2976, "train/loss": 2.2708407938480377, "train/lm_loss": 2.2708407938480377, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.1054665370355166e-05, "perf/tokens_per_sec": 26838.848801459746, "train/loss_math": 2.1110660433769226, "train/loss_code": 1.6310901492834091, "train/loss_prose": 3.710116386413574} +{"step": 2977, "train/loss": 2.57488751411438, "train/lm_loss": 2.57488751411438, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.1037431748697688e-05, "perf/tokens_per_sec": 27087.84409392206, "train/loss_math": 2.3408071994781494, "train/loss_prose": 3.1941917737325034, "train/loss_code": 1.6532951593399048} +{"step": 2978, "train/loss": 1.9547859728336334, "train/lm_loss": 1.9547859728336334, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.1020200058347833e-05, "perf/tokens_per_sec": 27241.570483739768, "train/loss_code": 1.3307197451591493, "train/loss_prose": 3.346493363380432, "train/loss_math": 2.2917020320892334} +{"step": 2979, "train/loss": 2.7545540630817413, "train/lm_loss": 2.7545540630817413, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.1002970307704132e-05, "perf/tokens_per_sec": 26336.405197417236, "train/loss_math": 2.2153426011403403, "train/loss_prose": 4.111109733581543, "train/loss_code": 1.528537094593048} +{"step": 2980, "train/loss": 2.542935311794281, "train/lm_loss": 2.542935311794281, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.0985742505164144e-05, "perf/tokens_per_sec": 26647.437117463665, "train/loss_code": 1.3238873183727264, "train/loss_math": 2.0520525376001992, "train/loss_prose": 3.8465160528818765} +{"step": 2981, "train/loss": 2.1789777278900146, "train/lm_loss": 2.1789777278900146, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.0968516659124486e-05, "perf/tokens_per_sec": 26277.511577182748, "train/loss_prose": 3.2643014589945474, "train/loss_code": 1.369174063205719, "train/loss_math": 2.1622207164764404} +{"step": 2982, "train/loss": 2.551850914955139, "train/lm_loss": 2.551850914955139, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.095129277798084e-05, "perf/tokens_per_sec": 26071.819735819736, "train/loss_math": 2.1730205416679382, "train/loss_prose": 3.5229795773824057, "train/loss_code": 1.153786063194275} +{"step": 2983, "train/loss": 2.1825429499149323, "train/lm_loss": 2.1825429499149323, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.0934070870127912e-05, "perf/tokens_per_sec": 27249.82343629552, "train/loss_code": 1.677211582660675, "train/loss_math": 2.0963757038116455, "train/loss_prose": 2.885040283203125} +{"step": 2984, "train/loss": 1.6529155373573303, "train/lm_loss": 1.6529155373573303, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.0916850943959452e-05, "perf/tokens_per_sec": 26687.714367824126, "train/loss_code": 1.467991163333257, "train/loss_math": 2.207688570022583} +{"step": 2985, "train/loss": 2.359969198703766, "train/lm_loss": 2.359969198703766, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.0899633007868227e-05, "perf/tokens_per_sec": 26569.875878451945, "train/loss_code": 1.8859665791193645, "train/loss_prose": 3.3762755393981934, "train/loss_math": 2.156434178352356} +{"step": 2986, "train/loss": 1.9251737296581268, "train/lm_loss": 1.9251737296581268, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.088241707024607e-05, "perf/tokens_per_sec": 27188.800888786194, "train/loss_math": 2.124390125274658, "train/loss_code": 1.3754920214414597, "train/loss_prose": 3.526252031326294} +{"step": 2987, "train/loss": 2.235042244195938, "train/lm_loss": 2.235042244195938, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.0865203139483812e-05, "perf/tokens_per_sec": 26322.322486145436, "train/loss_prose": 3.486300230026245, "train/loss_code": 1.7289122641086578, "train/loss_math": 1.9960448145866394} +{"step": 2988, "train/loss": 2.8600536584854126, "train/lm_loss": 2.8600536584854126, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.0847991223971306e-05, "perf/tokens_per_sec": 27039.965600117102, "train/loss_math": 2.19289430975914, "train/loss_prose": 3.527213156223297} +{"step": 2989, "train/loss": 2.3984444439411163, "train/lm_loss": 2.3984444439411163, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.0830781332097446e-05, "perf/tokens_per_sec": 26977.67522902933, "train/loss_code": 1.7193042039871216, "train/loss_prose": 3.506220976511637, "train/loss_math": 1.7916760444641113} +{"step": 2990, "train/loss": 2.445635676383972, "train/lm_loss": 2.445635676383972, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.0813573472250114e-05, "perf/tokens_per_sec": 26787.709967037455, "train/loss_math": 1.9830644130706787, "train/loss_prose": 3.622422138849894, "train/loss_code": 0.7655611038208008} +{"step": 2991, "train/loss": 2.6649929881095886, "train/lm_loss": 2.6649929881095886, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.0796367652816213e-05, "perf/tokens_per_sec": 26970.39071884959, "train/loss_math": 2.1791224002838137, "train/loss_prose": 4.2507898807525635, "train/loss_code": 1.9227519035339355} +{"step": 2992, "train/loss": 2.5238915383815765, "train/lm_loss": 2.5238915383815765, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.0779163882181655e-05, "perf/tokens_per_sec": 27195.342855469426, "train/loss_prose": 3.4887115160624185, "train/loss_math": 2.0152245362599692, "train/loss_code": 1.8396628499031067} +{"step": 2993, "train/loss": 1.8896295130252838, "train/lm_loss": 1.8896295130252838, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.076196216873135e-05, "perf/tokens_per_sec": 27125.952192908357, "train/loss_code": 1.676611840724945, "train/loss_prose": 2.9328696727752686, "train/loss_math": 2.124495029449463} +{"step": 2994, "train/loss": 3.068699359893799, "train/lm_loss": 3.068699359893799, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.0744762520849193e-05, "perf/tokens_per_sec": 26972.08443990894, "train/loss_prose": 3.6108834743499756, "train/loss_math": 2.165058890978495} +{"step": 2995, "train/loss": 1.7675721645355225, "train/lm_loss": 1.7675721645355225, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.0727564946918087e-05, "perf/tokens_per_sec": 26526.512329981208, "train/loss_code": 1.221362481514613, "train/loss_prose": 4.2947678565979, "train/loss_math": 2.517634868621826} +{"step": 2996, "train/loss": 2.799417018890381, "train/lm_loss": 2.799417018890381, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.0710369455319928e-05, "perf/tokens_per_sec": 26525.03791812378, "train/loss_prose": 3.4815316200256348, "train/loss_math": 2.45894718170166, "train/loss_code": 1.7756571769714355} +{"step": 2997, "train/loss": 3.0300634503364563, "train/lm_loss": 3.0300634503364563, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.0693176054435587e-05, "perf/tokens_per_sec": 25866.674471897255, "train/loss_prose": 3.431943694750468, "train/loss_math": 2.3542652130126953, "train/loss_code": 1.2945806980133057} +{"step": 2998, "train/loss": 2.2496285140514374, "train/lm_loss": 2.2496285140514374, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.067598475264491e-05, "perf/tokens_per_sec": 26691.48753196633, "train/loss_code": 1.5083714326222737, "train/loss_prose": 3.2613539695739746, "train/loss_math": 2.316401958465576} +{"step": 2999, "train/loss": 2.4998117685317993, "train/lm_loss": 2.4998117685317993, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.0658795558326743e-05, "perf/tokens_per_sec": 26123.754526073775, "train/loss_prose": 3.7264721393585205, "train/loss_math": 2.302188754081726, "train/loss_code": 1.4049001932144165} +{"step": 3000, "train/loss": 2.3729266822338104, "train/lm_loss": 2.3729266822338104, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.0641608479858877e-05, "perf/tokens_per_sec": 26927.694645768024, "train/loss_prose": 3.4013512134552, "train/loss_code": 1.653860628604889, "train/loss_math": 2.163917303085327} +{"step": 3000, "eval/loss": 2.1747926272031908, "eval/lm_loss": 2.1747926272031908, "eval/lb_loss": 0.0, "eval/z_loss": 0.0, "eval/perplexity": 8.800359977689986, "eval/loss_code": 1.5658974119517357, "eval/ppl_code": 4.786968891979585, "eval/loss_prose": 3.483038536289282, "eval/ppl_prose": 32.55850211670013, "eval/loss_math": 2.066090376209148, "eval/ppl_math": 7.893900528084005} +{"step": 3001, "train/loss": 2.2552728950977325, "train/lm_loss": 2.2552728950977325, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.0624423525618098e-05, "perf/tokens_per_sec": 25203.28435037233, "train/loss_math": 2.006255586942037, "train/loss_code": 0.7553565800189972, "train/loss_prose": 3.5042343139648438} +{"step": 3002, "train/loss": 1.405886024236679, "train/lm_loss": 1.405886024236679, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.0607240703980142e-05, "perf/tokens_per_sec": 23523.626751962143, "train/loss_code": 1.4058860391378403} +{"step": 3003, "train/loss": 1.8832325637340546, "train/lm_loss": 1.8832325637340546, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.0590060023319696e-05, "perf/tokens_per_sec": 25853.167314004007, "train/loss_math": 2.143098384141922, "train/loss_code": 1.6233667731285095} +{"step": 3004, "train/loss": 2.386362463235855, "train/lm_loss": 2.386362463235855, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.057288149201042e-05, "perf/tokens_per_sec": 25993.831613763217, "train/loss_prose": 3.2001471519470215, "train/loss_math": 2.1314101219177246, "train/loss_code": 1.3863001664479573} +{"step": 3005, "train/loss": 2.0587069392204285, "train/lm_loss": 2.0587069392204285, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.0555705118424927e-05, "perf/tokens_per_sec": 26936.011878275895, "train/loss_math": 1.957481821378072, "train/loss_prose": 3.523844361305237, "train/loss_code": 1.1831737558046977} +{"step": 3006, "train/loss": 2.816455602645874, "train/lm_loss": 2.816455602645874, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.0538530910934768e-05, "perf/tokens_per_sec": 26397.347928299463, "train/loss_code": 0.8980675339698792, "train/loss_prose": 3.7089781761169434, "train/loss_math": 2.2658881346384683} +{"step": 3007, "train/loss": 1.9858740866184235, "train/lm_loss": 1.9858740866184235, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.0521358877910444e-05, "perf/tokens_per_sec": 26501.91929656768, "train/loss_code": 1.6202311118443806, "train/loss_prose": 3.082803249359131} +{"step": 3008, "train/loss": 2.3512589633464813, "train/lm_loss": 2.3512589633464813, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.0504189027721395e-05, "perf/tokens_per_sec": 26463.17875412624, "train/loss_code": 1.5926132202148438, "train/loss_math": 2.1957666079203286, "train/loss_prose": 3.0125149885813394} +{"step": 3009, "train/loss": 2.486221879720688, "train/lm_loss": 2.486221879720688, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.0487021368736003e-05, "perf/tokens_per_sec": 26000.91289022476, "train/loss_prose": 3.5456011295318604, "train/loss_math": 2.165422797203064, "train/loss_code": 1.378351628780365} +{"step": 3010, "train/loss": 2.3614180088043213, "train/lm_loss": 2.3614180088043213, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.0469855909321564e-05, "perf/tokens_per_sec": 26448.43083867412, "train/loss_math": 2.1033787727355957, "train/loss_prose": 3.281676451365153, "train/loss_code": 1.3680887818336487} +{"step": 3011, "train/loss": 2.135329395532608, "train/lm_loss": 2.135329395532608, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.0452692657844333e-05, "perf/tokens_per_sec": 25706.0912288892, "train/loss_math": 2.161891520023346, "train/loss_code": 1.5737008651097615, "train/loss_prose": 3.7139675617218018} +{"step": 3012, "train/loss": 3.0567885041236877, "train/lm_loss": 3.0567885041236877, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.0435531622669453e-05, "perf/tokens_per_sec": 25656.45356415357, "train/loss_prose": 3.480503797531128, "train/loss_math": 2.350596268971761} +{"step": 3013, "train/loss": 1.9348582029342651, "train/lm_loss": 1.9348582029342651, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.0418372812161012e-05, "perf/tokens_per_sec": 26137.386916905907, "train/loss_code": 1.3387244939804077, "train/loss_math": 2.121870994567871, "train/loss_prose": 3.331685781478882} +{"step": 3014, "train/loss": 2.3952161073684692, "train/lm_loss": 2.3952161073684692, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.0401216234681995e-05, "perf/tokens_per_sec": 25234.158297812337, "train/loss_prose": 3.5756945610046387, "train/loss_code": 0.9897792637348175, "train/loss_math": 2.1516950130462646} +{"step": 3015, "train/loss": 2.2009502351284027, "train/lm_loss": 2.2009502351284027, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.038406189859433e-05, "perf/tokens_per_sec": 26064.73933393312, "train/loss_math": 2.1249701023101806, "train/loss_prose": 3.34842848777771, "train/loss_code": 1.8171608448028564} +{"step": 3016, "train/loss": 2.294230669736862, "train/lm_loss": 2.294230669736862, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.0366909812258817e-05, "perf/tokens_per_sec": 27058.576726926796, "train/loss_code": 1.7206257581710815, "train/loss_math": 1.9597189823786418, "train/loss_prose": 3.6564053297042847} +{"step": 3017, "train/loss": 2.3990540206432343, "train/lm_loss": 2.3990540206432343, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.034975998403517e-05, "perf/tokens_per_sec": 26183.197184751447, "train/loss_code": 1.1668819387753804, "train/loss_prose": 3.8598203659057617, "train/loss_math": 2.0561622381210327} +{"step": 3018, "train/loss": 2.4118335843086243, "train/lm_loss": 2.4118335843086243, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.0332612422282027e-05, "perf/tokens_per_sec": 26744.215910594557, "train/loss_code": 1.4210007786750793, "train/loss_prose": 3.398602843284607, "train/loss_math": 1.4372551441192627} +{"step": 3019, "train/loss": 2.5951545536518097, "train/lm_loss": 2.5951545536518097, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.031546713535688e-05, "perf/tokens_per_sec": 27196.031666679857, "train/loss_math": 2.5146846771240234, "train/loss_prose": 3.540750821431478, "train/loss_code": 1.7032047112782795} +{"step": 3020, "train/loss": 2.533871114253998, "train/lm_loss": 2.533871114253998, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.0298324131616158e-05, "perf/tokens_per_sec": 25556.38242635749, "train/loss_prose": 3.6521087487538657, "train/loss_code": 1.8520300388336182, "train/loss_math": 1.8792763948440552} +{"step": 3021, "train/loss": 2.355197161436081, "train/lm_loss": 2.355197161436081, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.0281183419415125e-05, "perf/tokens_per_sec": 26176.57465789638, "train/loss_prose": 3.3151517709096274, "train/loss_math": 2.3342679738998413, "train/loss_code": 1.4091949462890625} +{"step": 3022, "train/loss": 2.4185337126255035, "train/lm_loss": 2.4185337126255035, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.0264045007107973e-05, "perf/tokens_per_sec": 26221.76206698658, "train/loss_math": 1.9699112176895142, "train/loss_prose": 3.3837029933929443, "train/loss_code": 1.2811823884646099} +{"step": 3023, "train/loss": 2.225131094455719, "train/lm_loss": 2.225131094455719, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.024690890304775e-05, "perf/tokens_per_sec": 26256.106247869924, "train/loss_code": 1.7482954859733582, "train/loss_math": 2.3840763568878174} +{"step": 3024, "train/loss": 2.0618919730186462, "train/lm_loss": 2.0618919730186462, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.022977511558638e-05, "perf/tokens_per_sec": 26899.951435666662, "train/loss_code": 1.587854653596878, "train/loss_math": 2.3167107105255127, "train/loss_prose": 3.19358491897583} +{"step": 3025, "train/loss": 2.4152306020259857, "train/lm_loss": 2.4152306020259857, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.021264365307468e-05, "perf/tokens_per_sec": 25998.04057420091, "train/loss_math": 2.323167860507965, "train/loss_code": 1.5165712237358093, "train/loss_prose": 3.4980157613754272} +{"step": 3026, "train/loss": 2.3061589002609253, "train/lm_loss": 2.3061589002609253, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.0195514523862293e-05, "perf/tokens_per_sec": 25669.295648886782, "train/loss_code": 1.9624653458595276, "train/loss_prose": 3.4903416633605957, "train/loss_math": 2.2067998170852663} +{"step": 3027, "train/loss": 2.6499403417110443, "train/lm_loss": 2.6499403417110443, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.0178387736297773e-05, "perf/tokens_per_sec": 26814.51051283532, "train/loss_prose": 3.522731065750122, "train/loss_math": 2.267920196056366, "train/loss_code": 1.5596487522125244} +{"step": 3028, "train/loss": 2.453892797231674, "train/lm_loss": 2.453892797231674, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.0161263298728495e-05, "perf/tokens_per_sec": 25952.248080002417, "train/loss_math": 2.2633564949035643, "train/loss_code": 1.4701614379882812, "train/loss_prose": 3.422099232673645} +{"step": 3029, "train/loss": 2.9177626371383667, "train/lm_loss": 2.9177626371383667, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.0144141219500705e-05, "perf/tokens_per_sec": 25770.02282119398, "train/loss_prose": 3.35350235303243, "train/loss_code": 1.352965235710144, "train/loss_math": 1.8681222200393677} +{"step": 3030, "train/loss": 2.5095062255859375, "train/lm_loss": 2.5095062255859375, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.0127021506959488e-05, "perf/tokens_per_sec": 26170.154804194568, "train/loss_code": 1.586065948009491, "train/loss_prose": 3.8005080223083496, "train/loss_math": 2.3302624225616455} +{"step": 3031, "train/loss": 2.635652005672455, "train/lm_loss": 2.635652005672455, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.01099041694488e-05, "perf/tokens_per_sec": 25958.757572369275, "train/loss_code": 1.1934561729431152, "train/loss_prose": 3.2669963240623474, "train/loss_math": 2.2745913664499917} +{"step": 3032, "train/loss": 2.3046153783798218, "train/lm_loss": 2.3046153783798218, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.009278921531141e-05, "perf/tokens_per_sec": 26641.610517778525, "train/loss_math": 2.355269527435303, "train/loss_prose": 2.883423089981079, "train/loss_code": 1.8885763883590698} +{"step": 3033, "train/loss": 1.9371827840805054, "train/lm_loss": 1.9371827840805054, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.0075676652888936e-05, "perf/tokens_per_sec": 25948.56342946559, "train/loss_code": 1.4977160692214966, "train/loss_math": 2.184380571047465, "train/loss_prose": 2.953455924987793} +{"step": 3034, "train/loss": 2.34354966878891, "train/lm_loss": 2.34354966878891, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.0058566490521847e-05, "perf/tokens_per_sec": 26470.966743090994, "train/loss_math": 2.0309085845947266, "train/loss_prose": 3.630669116973877, "train/loss_code": 1.2648575107256572} +{"step": 3035, "train/loss": 2.4299624264240265, "train/lm_loss": 2.4299624264240265, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.004145873654942e-05, "perf/tokens_per_sec": 26487.944939191355, "train/loss_math": 2.0739282608032226, "train/loss_code": 2.411320447921753, "train/loss_prose": 3.3293687105178833} +{"step": 3036, "train/loss": 2.2259716391563416, "train/lm_loss": 2.2259716391563416, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.0024353399309765e-05, "perf/tokens_per_sec": 26494.562534120876, "train/loss_math": 1.9750914573669434, "train/loss_code": 1.9424793124198914, "train/loss_prose": 4.047356605529785} +{"step": 3037, "train/loss": 2.370093733072281, "train/lm_loss": 2.370093733072281, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.000725048713983e-05, "perf/tokens_per_sec": 26573.328070186293, "train/loss_math": 2.326387047767639, "train/loss_prose": 3.6950899362564087, "train/loss_code": 1.1325106024742126} +{"step": 3038, "train/loss": 2.318286895751953, "train/lm_loss": 2.318286895751953, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.9990150008375347e-05, "perf/tokens_per_sec": 26156.96610987532, "train/loss_math": 2.242576092481613, "train/loss_prose": 3.1510452032089233, "train/loss_code": 1.636949896812439} +{"step": 3039, "train/loss": 2.8272846341133118, "train/lm_loss": 2.8272846341133118, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.9973051971350888e-05, "perf/tokens_per_sec": 26170.872395460432, "train/loss_prose": 3.7501625418663025, "train/loss_code": 1.0362399816513062, "train/loss_math": 2.1937955220540366} +{"step": 3040, "train/loss": 2.4794362783432007, "train/lm_loss": 2.4794362783432007, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.9955956384399828e-05, "perf/tokens_per_sec": 26215.04001562538, "train/loss_prose": 3.658721685409546, "train/loss_code": 1.6431033909320831, "train/loss_math": 2.286911964416504} +{"step": 3041, "train/loss": 1.6695105880498886, "train/lm_loss": 1.6695105880498886, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.9938863255854357e-05, "perf/tokens_per_sec": 26405.50305940361, "train/loss_code": 1.0639525453249614, "train/loss_prose": 3.4861844778060913} +{"step": 3042, "train/loss": 2.704816997051239, "train/lm_loss": 2.704816997051239, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.992177259404545e-05, "perf/tokens_per_sec": 25685.875178853097, "train/loss_code": 1.7041651407877605, "train/loss_math": 2.190967321395874, "train/loss_prose": 3.583768367767334} +{"step": 3043, "train/loss": 2.866152733564377, "train/lm_loss": 2.866152733564377, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.9904684407302883e-05, "perf/tokens_per_sec": 26103.47139915976, "train/loss_code": 1.3856610655784607, "train/loss_prose": 3.6187825202941895, "train/loss_math": 2.0639867782592773} +{"step": 3044, "train/loss": 2.129390835762024, "train/lm_loss": 2.129390835762024, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.9887598703955242e-05, "perf/tokens_per_sec": 25904.741574863463, "train/loss_math": 2.4527422189712524, "train/loss_code": 1.6770872831344605, "train/loss_prose": 3.7442054748535156} +{"step": 3045, "train/loss": 2.4151171445846558, "train/lm_loss": 2.4151171445846558, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.9870515492329884e-05, "perf/tokens_per_sec": 26943.91167097438, "train/loss_prose": 3.4962581396102905, "train/loss_code": 1.0967787901560466, "train/loss_math": 2.0455687046051025} +{"step": 3046, "train/loss": 2.3784357607364655, "train/lm_loss": 2.3784357607364655, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.9853434780752973e-05, "perf/tokens_per_sec": 25910.7583011583, "train/loss_prose": 3.67080295085907, "train/loss_code": 1.9860824346542358, "train/loss_math": 1.9284289181232452} +{"step": 3047, "train/loss": 2.249690353870392, "train/lm_loss": 2.249690353870392, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.983635657754942e-05, "perf/tokens_per_sec": 26007.722420704606, "train/loss_prose": 3.7893991470336914, "train/loss_math": 2.0290051460266114, "train/loss_code": 2.031549036502838} +{"step": 3048, "train/loss": 1.9961645305156708, "train/lm_loss": 1.9961645305156708, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.981928089104294e-05, "perf/tokens_per_sec": 26271.443860800046, "train/loss_math": 1.8705893754959106, "train/loss_prose": 3.65863299369812, "train/loss_code": 1.3562922835350038} +{"step": 3049, "train/loss": 2.3789995312690735, "train/lm_loss": 2.3789995312690735, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.980220772955602e-05, "perf/tokens_per_sec": 25951.3072186875, "train/loss_code": 1.227123538653056, "train/loss_prose": 3.7300464312235513, "train/loss_math": 2.0802427530288696} +{"step": 3050, "train/loss": 2.276813954114914, "train/lm_loss": 2.276813954114914, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.9785137101409908e-05, "perf/tokens_per_sec": 26226.525606168623, "train/loss_prose": 3.8958624601364136, "train/loss_code": 1.4845739901065826, "train/loss_math": 2.2422447204589844} +{"step": 3051, "train/loss": 2.875384360551834, "train/lm_loss": 2.875384360551834, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.9768069014924622e-05, "perf/tokens_per_sec": 26359.72115560179, "train/loss_prose": 3.3427619457244875, "train/loss_math": 2.528170347213745, "train/loss_code": 1.880547046661377} +{"step": 3052, "train/loss": 2.2444156408309937, "train/lm_loss": 2.2444156408309937, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.975100347841894e-05, "perf/tokens_per_sec": 26636.777613773087, "train/loss_math": 2.267642617225647, "train/loss_prose": 3.6510814428329468, "train/loss_code": 1.5294690430164337} +{"step": 3053, "train/loss": 2.8256956934928894, "train/lm_loss": 2.8256956934928894, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.9733940500210398e-05, "perf/tokens_per_sec": 27054.82688873421, "train/loss_prose": 3.2051903009414673, "train/loss_code": 1.14919114112854, "train/loss_math": 2.2252321243286133} +{"step": 3054, "train/loss": 2.197909653186798, "train/lm_loss": 2.197909653186798, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.9716880088615285e-05, "perf/tokens_per_sec": 26001.10964742348, "train/loss_math": 2.2449096043904624, "train/loss_code": 1.6028075218200684, "train/loss_prose": 3.020062804222107} +{"step": 3055, "train/loss": 2.116752475500107, "train/lm_loss": 2.116752475500107, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.969982225194864e-05, "perf/tokens_per_sec": 26266.663992074074, "train/loss_math": 2.1892791986465454, "train/loss_prose": 3.619067351023356, "train/loss_code": 0.5660865704218546} +{"step": 3056, "train/loss": 2.638979583978653, "train/lm_loss": 2.638979583978653, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.968276699852424e-05, "perf/tokens_per_sec": 26085.714825392923, "train/loss_math": 2.2246428728103638, "train/loss_prose": 3.4517822265625, "train/loss_code": 1.427711009979248} +{"step": 3057, "train/loss": 2.7035036087036133, "train/lm_loss": 2.7035036087036133, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.9665714336654602e-05, "perf/tokens_per_sec": 26106.96213853815, "train/loss_math": 2.318800608317057, "train/loss_code": 1.9174224734306335, "train/loss_prose": 3.6122605005900064} +{"step": 3058, "train/loss": 2.2790738940238953, "train/lm_loss": 2.2790738940238953, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.9648664274651e-05, "perf/tokens_per_sec": 25997.961889542625, "train/loss_code": 1.3085362315177917, "train/loss_prose": 3.347869078318278, "train/loss_math": 2.131687641143799} +{"step": 3059, "train/loss": 2.2173796594142914, "train/lm_loss": 2.2173796594142914, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.963161682082342e-05, "perf/tokens_per_sec": 25935.127244620835, "train/loss_math": 2.2600408792495728, "train/loss_prose": 3.8858364820480347, "train/loss_code": 1.3618205785751343} +{"step": 3060, "train/loss": 2.291021078824997, "train/lm_loss": 2.291021078824997, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.961457198348059e-05, "perf/tokens_per_sec": 25882.69113337823, "train/loss_math": 2.3227227926254272, "train/loss_code": 1.6538152992725372, "train/loss_prose": 3.5337308645248413} +{"step": 3061, "train/loss": 2.4194765388965607, "train/lm_loss": 2.4194765388965607, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.959752977092995e-05, "perf/tokens_per_sec": 27057.255642614713, "train/loss_code": 1.5137679278850555, "train/loss_prose": 3.672126213709513, "train/loss_math": 2.2843616008758545} +{"step": 3062, "train/loss": 2.378282755613327, "train/lm_loss": 2.378282755613327, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.958049019147767e-05, "perf/tokens_per_sec": 26285.834128697526, "train/loss_prose": 3.290059725443522, "train/loss_code": 1.6298210223515828, "train/loss_math": 2.1333093643188477} +{"step": 3063, "train/loss": 2.44798544049263, "train/lm_loss": 2.44798544049263, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.956345325342863e-05, "perf/tokens_per_sec": 26012.29034661111, "train/loss_math": 1.9394015669822693, "train/loss_prose": 3.1102782487869263, "train/loss_code": 1.631983757019043} +{"step": 3064, "train/loss": 3.0136225819587708, "train/lm_loss": 3.0136225819587708, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.9546418965086442e-05, "perf/tokens_per_sec": 26036.337765707547, "train/loss_prose": 3.4531931479771933, "train/loss_code": 1.5608868598937988, "train/loss_math": 1.8289343118667603} +{"step": 3065, "train/loss": 2.690570145845413, "train/lm_loss": 2.690570145845413, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.9529387334753395e-05, "perf/tokens_per_sec": 26071.701038171508, "train/loss_prose": 3.7919071912765503, "train/loss_code": 1.2749847173690796, "train/loss_math": 2.531979560852051} +{"step": 3066, "train/loss": 2.0931243896484375, "train/lm_loss": 2.0931243896484375, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.9512358370730493e-05, "perf/tokens_per_sec": 25911.618061651418, "train/loss_prose": 3.478913148244222, "train/loss_code": 1.2616510331630706} +{"step": 3067, "train/loss": 2.39110666513443, "train/lm_loss": 2.39110666513443, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.9495332081317464e-05, "perf/tokens_per_sec": 26122.165668705165, "train/loss_math": 2.3244128227233887, "train/loss_prose": 3.324394623438517, "train/loss_code": 1.502280871073405} +{"step": 3068, "train/loss": 2.3338755071163177, "train/lm_loss": 2.3338755071163177, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.947830847481271e-05, "perf/tokens_per_sec": 26004.21880643057, "train/loss_code": 1.3669239679972331, "train/loss_prose": 3.514206647872925, "train/loss_math": 2.0138062834739685} +{"step": 3069, "train/loss": 2.1931715607643127, "train/lm_loss": 2.1931715607643127, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.946128755951332e-05, "perf/tokens_per_sec": 27012.712713367462, "train/loss_code": 1.3959900935490925, "train/loss_math": 2.092349370320638, "train/loss_prose": 3.5401771068573} +{"step": 3070, "train/loss": 2.4300949573516846, "train/lm_loss": 2.4300949573516846, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.9444269343715092e-05, "perf/tokens_per_sec": 26234.535454305293, "train/loss_code": 1.140872339407603, "train/loss_prose": 3.805421511332194, "train/loss_math": 2.300938844680786} +{"step": 3071, "train/loss": 2.874841183423996, "train/lm_loss": 2.874841183423996, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.942725383571249e-05, "perf/tokens_per_sec": 26095.739563143663, "train/loss_math": 2.261467456817627, "train/loss_code": 1.5372899770736694, "train/loss_prose": 3.5325361251831056} +{"step": 3072, "train/loss": 2.219613879919052, "train/lm_loss": 2.219613879919052, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.9410241043798676e-05, "perf/tokens_per_sec": 26314.01712108982, "train/loss_code": 2.0851466059684753, "train/loss_math": 2.0961855173110964, "train/loss_prose": 3.105689525604248} +{"step": 3073, "train/loss": 2.3527916073799133, "train/lm_loss": 2.3527916073799133, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.9393230976265473e-05, "perf/tokens_per_sec": 25779.496866821926, "train/loss_prose": 3.400958855946859, "train/loss_math": 2.150987188021342, "train/loss_code": 1.0832480192184448} +{"step": 3074, "train/loss": 2.8429654240608215, "train/lm_loss": 2.8429654240608215, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.937622364140338e-05, "perf/tokens_per_sec": 26588.585987182283, "train/loss_code": 1.1876597702503204, "train/loss_math": 2.266125202178955, "train/loss_prose": 3.6204556465148925} +{"step": 3075, "train/loss": 2.174608886241913, "train/lm_loss": 2.174608886241913, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.9359219047501565e-05, "perf/tokens_per_sec": 26009.100505198076, "train/loss_math": 1.9471015930175781, "train/loss_prose": 3.7658896446228027, "train/loss_code": 1.038341999053955} +{"step": 3076, "train/loss": 2.2723130583763123, "train/lm_loss": 2.2723130583763123, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.9342217202847856e-05, "perf/tokens_per_sec": 26858.653592645864, "train/loss_code": 1.91383758187294, "train/loss_math": 2.236696720123291, "train/loss_prose": 3.8130645751953125} +{"step": 3077, "train/loss": 2.5124824941158295, "train/lm_loss": 2.5124824941158295, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.9325218115728755e-05, "perf/tokens_per_sec": 27183.38032813343, "train/loss_prose": 3.501248836517334, "train/loss_code": 1.5237162113189697} +{"step": 3078, "train/loss": 2.1869620084762573, "train/lm_loss": 2.1869620084762573, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.9308221794429403e-05, "perf/tokens_per_sec": 26633.226340740464, "train/loss_prose": 3.4077290296554565, "train/loss_code": 1.3890292048454285, "train/loss_math": 2.171050270398458} +{"step": 3079, "train/loss": 1.9776138365268707, "train/lm_loss": 1.9776138365268707, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.9291228247233605e-05, "perf/tokens_per_sec": 26090.944162220694, "train/loss_code": 1.234610120455424, "train/loss_math": 2.2407559752464294, "train/loss_prose": 3.154055595397949} +{"step": 3080, "train/loss": 2.3585463166236877, "train/lm_loss": 2.3585463166236877, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.9274237482423814e-05, "perf/tokens_per_sec": 26172.58680056733, "train/loss_prose": 3.51127552986145, "train/loss_code": 1.5941054821014404, "train/loss_math": 1.9581222534179688} +{"step": 3081, "train/loss": 2.8567194640636444, "train/lm_loss": 2.8567194640636444, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.9257249508281107e-05, "perf/tokens_per_sec": 26438.41815185124, "train/loss_math": 2.2070757150650024, "train/loss_prose": 4.0051906903584795, "train/loss_code": 2.108478307723999} +{"step": 3082, "train/loss": 2.487465500831604, "train/lm_loss": 2.487465500831604, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.9240264333085245e-05, "perf/tokens_per_sec": 26003.82520089546, "train/loss_prose": 3.4010847409566245, "train/loss_code": 1.8605368435382843, "train/loss_math": 2.2543227672576904} +{"step": 3083, "train/loss": 1.9322901368141174, "train/lm_loss": 1.9322901368141174, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.922328196511456e-05, "perf/tokens_per_sec": 27047.28774723346, "train/loss_prose": 3.7642743587493896, "train/loss_code": 1.069307545820872, "train/loss_math": 2.1215307414531708} +{"step": 3084, "train/loss": 2.356186270713806, "train/lm_loss": 2.356186270713806, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.920630241264607e-05, "perf/tokens_per_sec": 26316.758118352784, "train/loss_prose": 3.274980624516805, "train/loss_code": 1.585310419400533, "train/loss_math": 2.1343084573745728} +{"step": 3085, "train/loss": 2.0736735463142395, "train/lm_loss": 2.0736735463142395, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.91893256839554e-05, "perf/tokens_per_sec": 25991.825953293028, "train/loss_code": 1.7486350536346436, "train/loss_prose": 3.549346446990967, "train/loss_math": 2.015167514483134} +{"step": 3086, "train/loss": 2.377621829509735, "train/lm_loss": 2.377621829509735, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.917235178731681e-05, "perf/tokens_per_sec": 26533.84586776221, "train/loss_code": 1.720906400680542, "train/loss_prose": 3.4721478621164956} +{"step": 3087, "train/loss": 2.5347819328308105, "train/lm_loss": 2.5347819328308105, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.915538073100316e-05, "perf/tokens_per_sec": 26057.899917487746, "train/loss_math": 2.2780532836914062, "train/loss_code": 1.511075496673584, "train/loss_prose": 4.071945905685425} +{"step": 3088, "train/loss": 2.2043312788009644, "train/lm_loss": 2.2043312788009644, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.9138412523285936e-05, "perf/tokens_per_sec": 27067.52940583327, "train/loss_math": 1.9788592159748077, "train/loss_prose": 3.6403132677078247, "train/loss_code": 1.2192934155464172} +{"step": 3089, "train/loss": 2.311838150024414, "train/lm_loss": 2.311838150024414, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.912144717243525e-05, "perf/tokens_per_sec": 26965.73408256161, "train/loss_code": 1.3410202264785767, "train/loss_prose": 3.3423353830973306, "train/loss_math": 2.2223191261291504} +{"step": 3090, "train/loss": 2.4319498240947723, "train/lm_loss": 2.4319498240947723, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.9104484686719795e-05, "perf/tokens_per_sec": 26185.711430653064, "train/loss_math": 2.216925096511841, "train/loss_prose": 3.8540875911712646, "train/loss_code": 0.6627984046936035} +{"step": 3091, "train/loss": 2.0934876203536987, "train/lm_loss": 2.0934876203536987, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.908752507440689e-05, "perf/tokens_per_sec": 26389.88694965607, "train/loss_math": 2.1362348794937134, "train/loss_code": 1.7531520525614421, "train/loss_prose": 2.943504810333252} +{"step": 3092, "train/loss": 2.932631731033325, "train/lm_loss": 2.932631731033325, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.907056834376243e-05, "perf/tokens_per_sec": 26586.89894115863, "train/loss_prose": 3.517174482345581, "train/loss_math": 2.0764971375465393, "train/loss_code": 1.7221864461898804} +{"step": 3093, "train/loss": 2.232525557279587, "train/lm_loss": 2.232525557279587, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.9053614503050928e-05, "perf/tokens_per_sec": 26132.33769686866, "train/loss_code": 1.6889776587486267, "train/loss_prose": 2.859487295150757, "train/loss_math": 2.5258312225341797} +{"step": 3094, "train/loss": 2.572409838438034, "train/lm_loss": 2.572409838438034, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.9036663560535483e-05, "perf/tokens_per_sec": 26238.38191950956, "train/loss_code": 1.8003060817718506, "train/loss_math": 2.121089736620585, "train/loss_prose": 3.538465897242228} +{"step": 3095, "train/loss": 2.1460355520248413, "train/lm_loss": 2.1460355520248413, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.9019715524477767e-05, "perf/tokens_per_sec": 26113.073179480867, "train/loss_code": 1.1630466083685558, "train/loss_prose": 3.9243084192276, "train/loss_math": 1.9435091813405354} +{"step": 3096, "train/loss": 2.4222992956638336, "train/lm_loss": 2.4222992956638336, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.9002770403138065e-05, "perf/tokens_per_sec": 27103.484960448804, "train/loss_code": 1.1687753796577454, "train/loss_prose": 3.8638030290603638, "train/loss_math": 2.32830947637558} +{"step": 3097, "train/loss": 2.812322586774826, "train/lm_loss": 2.812322586774826, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.8985828204775206e-05, "perf/tokens_per_sec": 27075.59188356461, "train/loss_math": 2.3105525970458984, "train/loss_code": 1.6057783961296082, "train/loss_prose": 3.6664796471595764} +{"step": 3098, "train/loss": 2.4883890748023987, "train/lm_loss": 2.4883890748023987, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.8968888937646622e-05, "perf/tokens_per_sec": 26534.337646611828, "train/loss_math": 2.1460402250289916, "train/loss_prose": 3.05897057056427} +{"step": 3099, "train/loss": 2.842262178659439, "train/lm_loss": 2.842262178659439, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.895195261000831e-05, "perf/tokens_per_sec": 26855.084894290514, "train/loss_prose": 3.3815146923065185, "train/loss_code": 0.943771243095398, "train/loss_math": 2.443376064300537} +{"step": 3100, "train/loss": 2.1796995997428894, "train/lm_loss": 2.1796995997428894, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.893501923011482e-05, "perf/tokens_per_sec": 26683.19626743041, "train/loss_math": 2.2650643984476724, "train/loss_prose": 3.7173222303390503, "train/loss_code": 1.0692529280980427} +{"step": 3101, "train/loss": 2.687311589717865, "train/lm_loss": 2.687311589717865, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.891808880621928e-05, "perf/tokens_per_sec": 26197.21127800058, "train/loss_prose": 3.268427789211273, "train/loss_math": 2.0310904582341514, "train/loss_code": 2.3315110206604004} +{"step": 3102, "train/loss": 2.049353778362274, "train/lm_loss": 2.049353778362274, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.890116134657336e-05, "perf/tokens_per_sec": 26050.78757073798, "train/loss_math": 2.1107531785964966, "train/loss_code": 1.278351326783498, "train/loss_prose": 4.116764068603516} +{"step": 3103, "train/loss": 2.5469207167625427, "train/lm_loss": 2.5469207167625427, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.888423685942732e-05, "perf/tokens_per_sec": 27177.48899603883, "train/loss_prose": 3.6550583839416504, "train/loss_math": 2.151830720901489, "train/loss_code": 2.3060953617095947} +{"step": 3104, "train/loss": 2.333479642868042, "train/lm_loss": 2.333479642868042, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.8867315353029935e-05, "perf/tokens_per_sec": 26122.880630603235, "train/loss_code": 1.8094376921653748, "train/loss_prose": 3.2355209589004517, "train/loss_math": 2.4795223474502563} +{"step": 3105, "train/loss": 2.3617482781410217, "train/lm_loss": 2.3617482781410217, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.885039683562855e-05, "perf/tokens_per_sec": 25925.57760906406, "train/loss_prose": 3.3575405279795327, "train/loss_math": 2.068728526433309, "train/loss_code": 1.307589739561081} +{"step": 3106, "train/loss": 2.671254575252533, "train/lm_loss": 2.671254575252533, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.8833481315469042e-05, "perf/tokens_per_sec": 26106.248047714926, "train/loss_prose": 3.5068909525871277, "train/loss_code": 1.5661314725875854, "train/loss_math": 2.644078254699707} +{"step": 3107, "train/loss": 2.8698317408561707, "train/lm_loss": 2.8698317408561707, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.8816568800795822e-05, "perf/tokens_per_sec": 25989.46674896223, "train/loss_prose": 3.4344953060150147, "train/loss_math": 2.1795710921287537, "train/loss_code": 1.4270350933074951} +{"step": 3108, "train/loss": 1.9106133580207825, "train/lm_loss": 1.9106133580207825, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.879965929985187e-05, "perf/tokens_per_sec": 26119.465054687094, "train/loss_code": 1.7033228158950806, "train/loss_math": 2.2560977141062417} +{"step": 3109, "train/loss": 2.4890576601028442, "train/lm_loss": 2.4890576601028442, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.8782752820878634e-05, "perf/tokens_per_sec": 26089.63663146513, "train/loss_math": 2.2944961071014403, "train/loss_code": 1.6926323175430298, "train/loss_prose": 3.373673677444458} +{"step": 3110, "train/loss": 2.4050092697143555, "train/lm_loss": 2.4050092697143555, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.8765849372116153e-05, "perf/tokens_per_sec": 25850.833212705224, "train/loss_prose": 3.3064757585525513, "train/loss_code": 1.1789260109265645, "train/loss_math": 2.4773921966552734} +{"step": 3111, "train/loss": 2.3386171460151672, "train/lm_loss": 2.3386171460151672, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.8748948961802948e-05, "perf/tokens_per_sec": 26780.694657226322, "train/loss_code": 1.5439660549163818, "train/loss_prose": 3.809351086616516, "train/loss_math": 2.4571856260299683} +{"step": 3112, "train/loss": 1.9465424418449402, "train/lm_loss": 1.9465424418449402, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.8732051598176086e-05, "perf/tokens_per_sec": 26015.402180282963, "train/loss_code": 1.6374545097351074, "train/loss_math": 2.084237217903137, "train/loss_prose": 3.216592311859131} +{"step": 3113, "train/loss": 2.9178836345672607, "train/lm_loss": 2.9178836345672607, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.871515728947113e-05, "perf/tokens_per_sec": 26045.495344218554, "train/loss_math": 2.2138640880584717, "train/loss_prose": 3.6219033002853394} +{"step": 3114, "train/loss": 2.0668753683567047, "train/lm_loss": 2.0668753683567047, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.869826604392216e-05, "perf/tokens_per_sec": 25961.66044670112, "train/loss_math": 2.0543500781059265, "train/loss_code": 1.1865777373313904, "train/loss_prose": 2.9555229345957437} +{"step": 3115, "train/loss": 2.3423682749271393, "train/lm_loss": 2.3423682749271393, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.868137786976177e-05, "perf/tokens_per_sec": 26014.062818837476, "train/loss_code": 1.687744935353597, "train/loss_math": 2.104315439860026, "train/loss_prose": 3.6813825368881226} +{"step": 3116, "train/loss": 2.1174087822437286, "train/lm_loss": 2.1174087822437286, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.8664492775221042e-05, "perf/tokens_per_sec": 25854.645796180164, "train/loss_math": 2.0760874152183533, "train/loss_prose": 3.4855881929397583, "train/loss_code": 1.4539797604084015} +{"step": 3117, "train/loss": 2.2658024430274963, "train/lm_loss": 2.2658024430274963, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.864761076852958e-05, "perf/tokens_per_sec": 26398.929873274952, "train/loss_math": 2.3066724141438804, "train/loss_code": 1.6964687903722127, "train/loss_prose": 3.0584980249404907} +{"step": 3118, "train/loss": 2.5022256076335907, "train/lm_loss": 2.5022256076335907, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.863073185791545e-05, "perf/tokens_per_sec": 26221.722044577265, "train/loss_math": 2.0903164446353912, "train/loss_prose": 3.4118473529815674, "train/loss_code": 1.4209973812103271} +{"step": 3119, "train/loss": 1.9990712106227875, "train/lm_loss": 1.9990712106227875, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.8613856051605243e-05, "perf/tokens_per_sec": 27042.43425317137, "train/loss_code": 1.4603503147761028, "train/loss_math": 2.0834761261940002, "train/loss_prose": 3.277613878250122} +{"step": 3120, "train/loss": 2.37093585729599, "train/lm_loss": 2.37093585729599, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.8596983357824012e-05, "perf/tokens_per_sec": 25779.419499473304, "train/loss_math": 2.2402679920196533, "train/loss_prose": 3.839191198348999, "train/loss_code": 1.9634778499603271} +{"step": 3121, "train/loss": 2.4993260502815247, "train/lm_loss": 2.4993260502815247, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.8580113784795305e-05, "perf/tokens_per_sec": 26402.784122554112, "train/loss_code": 1.9512953281402587, "train/loss_prose": 4.004891395568848, "train/loss_math": 2.2283482551574707} +{"step": 3122, "train/loss": 2.3622669875621796, "train/lm_loss": 2.3622669875621796, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.856324734074116e-05, "perf/tokens_per_sec": 26026.122038900106, "train/loss_math": 2.334275484085083, "train/loss_prose": 3.1575988133748374, "train/loss_code": 1.2112563848495483} +{"step": 3123, "train/loss": 2.3700388371944427, "train/lm_loss": 2.3700388371944427, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.8546384033882062e-05, "perf/tokens_per_sec": 25876.141780647573, "train/loss_code": 1.4458629339933395, "train/loss_prose": 3.6775015195210776, "train/loss_math": 2.1443543434143066} +{"step": 3124, "train/loss": 2.4941804111003876, "train/lm_loss": 2.4941804111003876, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.852952387243698e-05, "perf/tokens_per_sec": 26351.553854672682, "train/loss_code": 1.4030504822731018, "train/loss_math": 2.188549200693766, "train/loss_prose": 3.527231534322103} +{"step": 3125, "train/loss": 2.282677471637726, "train/lm_loss": 2.282677471637726, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.8512666864623365e-05, "perf/tokens_per_sec": 25648.67789464255, "train/loss_prose": 3.154749790827433, "train/loss_math": 2.1819519996643066, "train/loss_code": 1.1256569027900696} +{"step": 3126, "train/loss": 2.4944079518318176, "train/lm_loss": 2.4944079518318176, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.8495813018657116e-05, "perf/tokens_per_sec": 25949.70007174739, "train/loss_code": 1.3324376791715622, "train/loss_prose": 4.026025454203288, "train/loss_math": 2.547436475753784} +{"step": 3127, "train/loss": 2.2604037523269653, "train/lm_loss": 2.2604037523269653, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.8478962342752583e-05, "perf/tokens_per_sec": 25997.804521654914, "train/loss_prose": 3.2090033690134683, "train/loss_math": 2.1017932097117105, "train/loss_code": 1.07542023062706} +{"step": 3128, "train/loss": 2.2880789637565613, "train/lm_loss": 2.2880789637565613, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.846211484512258e-05, "perf/tokens_per_sec": 25384.455056022056, "train/loss_math": 2.30633544921875, "train/loss_prose": 3.3001471757888794, "train/loss_code": 1.2394973635673523} +{"step": 3129, "train/loss": 2.555876612663269, "train/lm_loss": 2.555876612663269, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.8445270533978388e-05, "perf/tokens_per_sec": 25943.3910052159, "train/loss_prose": 3.8072335720062256, "train/loss_code": 1.1355881989002228, "train/loss_math": 2.251378575960795} +{"step": 3130, "train/loss": 1.8709959089756012, "train/lm_loss": 1.8709959089756012, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.84284294175297e-05, "perf/tokens_per_sec": 26925.50009090168, "train/loss_math": 2.1756083369255066, "train/loss_code": 1.451993703842163, "train/loss_prose": 3.3567817211151123} +{"step": 3131, "train/loss": 2.0575850307941437, "train/lm_loss": 2.0575850307941437, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.841159150398469e-05, "perf/tokens_per_sec": 25849.82189969049, "train/loss_code": 1.3227836191654205, "train/loss_math": 2.170309901237488, "train/loss_prose": 3.414462685585022} +{"step": 3132, "train/loss": 2.6474669575691223, "train/lm_loss": 2.6474669575691223, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.839475680154994e-05, "perf/tokens_per_sec": 26444.644663159157, "train/loss_prose": 3.281733989715576, "train/loss_code": 1.5903551975886028} +{"step": 3133, "train/loss": 2.403546631336212, "train/lm_loss": 2.403546631336212, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.8377925318430477e-05, "perf/tokens_per_sec": 25511.790284953127, "train/loss_math": 2.13139271736145, "train/loss_prose": 3.077122370402018, "train/loss_code": 1.471435546875} +{"step": 3134, "train/loss": 2.303349256515503, "train/lm_loss": 2.303349256515503, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.8361097062829778e-05, "perf/tokens_per_sec": 26016.465889094507, "train/loss_prose": 3.4216564893722534, "train/loss_code": 1.6587844689687092, "train/loss_math": 2.2023757696151733} +{"step": 3135, "train/loss": 2.762899398803711, "train/lm_loss": 2.762899398803711, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.8344272042949724e-05, "perf/tokens_per_sec": 25958.561455624575, "train/loss_code": 1.6696547269821167, "train/loss_prose": 3.2270321369171144, "train/loss_math": 2.1491901874542236} +{"step": 3136, "train/loss": 2.1443839371204376, "train/lm_loss": 2.1443839371204376, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.8327450266990616e-05, "perf/tokens_per_sec": 25722.525773628073, "train/loss_code": 1.377032470703125, "train/loss_prose": 3.4233030478159585} +{"step": 3137, "train/loss": 2.3592832386493683, "train/lm_loss": 2.3592832386493683, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.8310631743151185e-05, "perf/tokens_per_sec": 25981.959466019784, "train/loss_math": 2.1638576984405518, "train/loss_code": 1.664325475692749, "train/loss_prose": 3.3510354359944663} +{"step": 3138, "train/loss": 2.2598969638347626, "train/lm_loss": 2.2598969638347626, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.8293816479628583e-05, "perf/tokens_per_sec": 26022.14047215852, "train/loss_math": 2.52367901802063, "train/loss_code": 1.4888231456279755, "train/loss_prose": 3.538262963294983} +{"step": 3139, "train/loss": 2.1046349108219147, "train/lm_loss": 2.1046349108219147, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.827700448461836e-05, "perf/tokens_per_sec": 25954.561320761844, "train/loss_code": 1.2955155074596405, "train/loss_math": 2.3414372205734253, "train/loss_prose": 3.4860711097717285} +{"step": 3140, "train/loss": 2.3956870436668396, "train/lm_loss": 2.3956870436668396, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.8260195766314476e-05, "perf/tokens_per_sec": 26411.71407487578, "train/loss_code": 1.3348796665668488, "train/loss_math": 2.128652811050415, "train/loss_prose": 3.3699264526367188} +{"step": 3141, "train/loss": 2.9533435106277466, "train/lm_loss": 2.9533435106277466, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.8243390332909303e-05, "perf/tokens_per_sec": 26694.017393195925, "train/loss_math": 1.983970820903778, "train/loss_prose": 3.679900646209717, "train/loss_code": 1.2593026161193848} +{"step": 3142, "train/loss": 2.0993726551532745, "train/lm_loss": 2.0993726551532745, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.8226588192593605e-05, "perf/tokens_per_sec": 26398.68648478376, "train/loss_code": 1.2269273002942402, "train/loss_math": 2.1658384005228677, "train/loss_prose": 3.308342218399048} +{"step": 3143, "train/loss": 2.188073843717575, "train/lm_loss": 2.188073843717575, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.820978935355653e-05, "perf/tokens_per_sec": 26046.16662699574, "train/loss_code": 1.1718258062998455, "train/loss_math": 2.2672301133473716, "train/loss_prose": 3.593711495399475} +{"step": 3144, "train/loss": 2.02606400847435, "train/lm_loss": 2.02606400847435, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.8192993823985643e-05, "perf/tokens_per_sec": 25871.699464340305, "train/loss_math": 2.200059096018473, "train/loss_code": 1.2911734382311504, "train/loss_prose": 2.867406964302063} +{"step": 3145, "train/loss": 2.451147735118866, "train/lm_loss": 2.451147735118866, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.817620161206687e-05, "perf/tokens_per_sec": 26476.718933155793, "train/loss_code": 1.9641571640968323, "train/loss_math": 2.0527327060699463, "train/loss_prose": 3.174223264058431} +{"step": 3146, "train/loss": 2.515717387199402, "train/lm_loss": 2.515717387199402, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.8159412725984543e-05, "perf/tokens_per_sec": 26005.832713709628, "train/loss_math": 2.2465742429097495, "train/loss_prose": 3.27264674504598, "train/loss_code": 1.784037709236145} +{"step": 3147, "train/loss": 3.169552445411682, "train/lm_loss": 3.169552445411682, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.8142627173921338e-05, "perf/tokens_per_sec": 26100.814302990526, "train/loss_prose": 4.14759761095047, "train/loss_math": 2.1915072798728943} +{"step": 3148, "train/loss": 2.5287596583366394, "train/lm_loss": 2.5287596583366394, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.8125844964058354e-05, "perf/tokens_per_sec": 25937.86819295626, "train/loss_prose": 3.7434047857920327, "train/loss_math": 2.0249908566474915, "train/loss_code": 0.8998998999595642} +{"step": 3149, "train/loss": 2.055145412683487, "train/lm_loss": 2.055145412683487, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.8109066104575023e-05, "perf/tokens_per_sec": 26376.111449898825, "train/loss_math": 2.229248325030009, "train/loss_code": 1.4200956672430038, "train/loss_prose": 4.07303524017334} +{"step": 3150, "train/loss": 2.321408659219742, "train/lm_loss": 2.321408659219742, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.809229060364916e-05, "perf/tokens_per_sec": 26153.461986612612, "train/loss_prose": 3.4841465950012207, "train/loss_math": 2.3856921195983887, "train/loss_code": 1.4332841485738754} +{"step": 3151, "train/loss": 2.448353350162506, "train/lm_loss": 2.448353350162506, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.807551846945694e-05, "perf/tokens_per_sec": 25920.218805729648, "train/loss_math": 2.3053730130195618, "train/loss_code": 2.272843599319458, "train/loss_prose": 3.481745958328247} +{"step": 3152, "train/loss": 2.0163762867450714, "train/lm_loss": 2.0163762867450714, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.80587497101729e-05, "perf/tokens_per_sec": 26368.21728313204, "train/loss_prose": 3.4879984855651855, "train/loss_code": 1.316178560256958, "train/loss_math": 2.1736190915107727} +{"step": 3153, "train/loss": 2.3878234028816223, "train/lm_loss": 2.3878234028816223, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.804198433396994e-05, "perf/tokens_per_sec": 26063.82984042989, "train/loss_code": 1.748786300420761, "train/loss_prose": 3.3180948893229165, "train/loss_math": 2.1531574726104736} +{"step": 3154, "train/loss": 2.3963460326194763, "train/lm_loss": 2.3963460326194763, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.802522234901927e-05, "perf/tokens_per_sec": 25798.89202676017, "train/loss_math": 1.937944730122884, "train/loss_code": 1.120829999446869, "train/loss_prose": 3.7050909996032715} +{"step": 3155, "train/loss": 2.673927366733551, "train/lm_loss": 2.673927366733551, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.800846376349051e-05, "perf/tokens_per_sec": 26626.3742351446, "train/loss_prose": 3.61043373743693, "train/loss_math": 2.2210590839385986, "train/loss_code": 1.6758813858032227} +{"step": 3156, "train/loss": 2.689918875694275, "train/lm_loss": 2.689918875694275, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.7991708585551563e-05, "perf/tokens_per_sec": 25962.641294212764, "train/loss_prose": 3.3514108180999758, "train/loss_code": 1.5874325434366863} +{"step": 3157, "train/loss": 2.2605739533901215, "train/lm_loss": 2.2605739533901215, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.7974956823368727e-05, "perf/tokens_per_sec": 25928.12088210614, "train/loss_code": 1.8436379830042522, "train/loss_prose": 3.2257009744644165, "train/loss_math": 2.0340917110443115} +{"step": 3158, "train/loss": 2.2721477448940277, "train/lm_loss": 2.2721477448940277, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.7958208485106586e-05, "perf/tokens_per_sec": 26245.47679747748, "train/loss_prose": 3.522415558497111, "train/loss_code": 1.4082640707492828, "train/loss_math": 1.9768784046173096} +{"step": 3159, "train/loss": 2.2639114260673523, "train/lm_loss": 2.2639114260673523, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.7941463578928086e-05, "perf/tokens_per_sec": 26267.025433951996, "train/loss_code": 2.0421336591243744, "train/loss_prose": 2.875881552696228, "train/loss_math": 2.095496654510498} +{"step": 3160, "train/loss": 1.7979672849178314, "train/lm_loss": 1.7979672849178314, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.7924722112994495e-05, "perf/tokens_per_sec": 26799.284906022396, "train/loss_math": 1.9960682392120361, "train/loss_code": 1.214504897594452, "train/loss_prose": 3.5375144481658936} +{"step": 3161, "train/loss": 2.7256113290786743, "train/lm_loss": 2.7256113290786743, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.7907984095465395e-05, "perf/tokens_per_sec": 25892.09212711637, "train/loss_prose": 3.786555290222168, "train/loss_math": 2.1498105227947235, "train/loss_code": 1.8459820747375488} +{"step": 3162, "train/loss": 2.6512808799743652, "train/lm_loss": 2.6512808799743652, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.789124953449869e-05, "perf/tokens_per_sec": 25839.32447850943, "train/loss_math": 1.9179044167200725, "train/loss_prose": 3.605306029319763, "train/loss_code": 1.035308599472046} +{"step": 3163, "train/loss": 1.9912963211536407, "train/lm_loss": 1.9912963211536407, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.7874518438250597e-05, "perf/tokens_per_sec": 26223.763343234736, "train/loss_math": 2.1069508492946625, "train/loss_code": 1.528834859530131, "train/loss_prose": 2.916062831878662} +{"step": 3164, "train/loss": 2.0878531634807587, "train/lm_loss": 2.0878531634807587, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.7857790814875663e-05, "perf/tokens_per_sec": 26004.80923707545, "train/loss_prose": 3.8893463611602783, "train/loss_code": 1.404470403989156, "train/loss_math": 2.150016814470291} +{"step": 3165, "train/loss": 2.2173566818237305, "train/lm_loss": 2.2173566818237305, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.7841066672526723e-05, "perf/tokens_per_sec": 26055.96339397432, "train/loss_math": 2.1042831242084503, "train/loss_code": 1.226759672164917, "train/loss_prose": 3.43410062789917} +{"step": 3166, "train/loss": 1.8891026079654694, "train/lm_loss": 1.8891026079654694, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.782434601935491e-05, "perf/tokens_per_sec": 26556.527628737535, "train/loss_code": 1.5607723951339723, "train/loss_math": 2.4363198280334473} +{"step": 3167, "train/loss": 2.6642587184906006, "train/lm_loss": 2.6642587184906006, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.7807628863509685e-05, "perf/tokens_per_sec": 26115.137294007305, "train/loss_prose": 3.6008715629577637, "train/loss_code": 1.5104091167449951, "train/loss_math": 1.9448829293251038} +{"step": 3168, "train/loss": 2.866144299507141, "train/lm_loss": 2.866144299507141, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.7790915213138776e-05, "perf/tokens_per_sec": 26323.25010955336, "train/loss_math": 2.336469888687134, "train/loss_prose": 3.414941740036011, "train/loss_code": 1.1815074682235718} +{"step": 3169, "train/loss": 2.350565731525421, "train/lm_loss": 2.350565731525421, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.7774205076388206e-05, "perf/tokens_per_sec": 26229.92919391974, "train/loss_code": 1.750127136707306, "train/loss_math": 2.3600441217422485, "train/loss_prose": 3.541964530944824} +{"step": 3170, "train/loss": 2.5646047592163086, "train/lm_loss": 2.5646047592163086, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.7757498461402318e-05, "perf/tokens_per_sec": 26192.697981256373, "train/loss_math": 2.1227750380833945, "train/loss_prose": 3.7890512943267822, "train/loss_code": 1.3906795382499695} +{"step": 3171, "train/loss": 2.709346503019333, "train/lm_loss": 2.709346503019333, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.7740795376323692e-05, "perf/tokens_per_sec": 26193.29700192716, "train/loss_prose": 3.241431772708893, "train/loss_code": 2.3728016018867493, "train/loss_math": 1.9817212224006653} +{"step": 3172, "train/loss": 2.548182964324951, "train/lm_loss": 2.548182964324951, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.772409582929321e-05, "perf/tokens_per_sec": 26007.68304900775, "train/loss_code": 1.9364179372787476, "train/loss_prose": 3.5226687590281167, "train/loss_math": 2.0717854499816895} +{"step": 3173, "train/loss": 2.1262764036655426, "train/lm_loss": 2.1262764036655426, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.7707399828450027e-05, "perf/tokens_per_sec": 26082.229765242962, "train/loss_prose": 3.659148335456848, "train/loss_math": 2.0584826866785684, "train/loss_code": 1.1721556584040325} +{"step": 3174, "train/loss": 2.3216520845890045, "train/lm_loss": 2.3216520845890045, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.7690707381931583e-05, "perf/tokens_per_sec": 26087.378477900726, "train/loss_code": 1.70615953207016, "train/loss_prose": 3.709836483001709, "train/loss_math": 2.1644526720046997} +{"step": 3175, "train/loss": 2.532801568508148, "train/lm_loss": 2.532801568508148, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.767401849787357e-05, "perf/tokens_per_sec": 26735.267765549524, "train/loss_math": 2.331981360912323, "train/loss_prose": 3.194813330968221, "train/loss_code": 1.3500468730926514} +{"step": 3176, "train/loss": 2.60990771651268, "train/lm_loss": 2.60990771651268, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.7657333184409936e-05, "perf/tokens_per_sec": 26303.381011508965, "train/loss_prose": 3.805779536565145, "train/loss_math": 2.123094399770101, "train/loss_code": 1.5463197231292725} +{"step": 3177, "train/loss": 2.3606438636779785, "train/lm_loss": 2.3606438636779785, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.7640651449672913e-05, "perf/tokens_per_sec": 25937.633231826534, "train/loss_prose": 3.4224425554275513, "train/loss_math": 2.18343768119812, "train/loss_code": 1.123077154159546} +{"step": 3178, "train/loss": 2.980959951877594, "train/lm_loss": 2.980959951877594, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.7623973301792966e-05, "perf/tokens_per_sec": 26024.939268114966, "train/loss_prose": 3.5581849098205565, "train/loss_math": 2.0254244804382324, "train/loss_code": 2.0156655311584473} +{"step": 3179, "train/loss": 2.034958094358444, "train/lm_loss": 2.034958094358444, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.7607298748898842e-05, "perf/tokens_per_sec": 26163.379493724093, "train/loss_code": 1.452018916606903, "train/loss_math": 2.295494556427002, "train/loss_prose": 3.5851054191589355} +{"step": 3180, "train/loss": 2.800759494304657, "train/lm_loss": 2.800759494304657, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.759062779911748e-05, "perf/tokens_per_sec": 25853.556371866664, "train/loss_prose": 3.416892242431641, "train/loss_code": 1.4991729855537415, "train/loss_math": 2.3232691287994385} +{"step": 3181, "train/loss": 2.432161271572113, "train/lm_loss": 2.432161271572113, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.7573960460574133e-05, "perf/tokens_per_sec": 26341.65475916675, "train/loss_code": 1.4514865279197693, "train/loss_prose": 3.627607822418213, "train/loss_math": 2.1100035905838013} +{"step": 3182, "train/loss": 2.0202485024929047, "train/lm_loss": 2.0202485024929047, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.755729674139224e-05, "perf/tokens_per_sec": 25994.185585978164, "train/loss_code": 1.3794860045115154, "train/loss_math": 2.404706120491028} +{"step": 3183, "train/loss": 2.565106689929962, "train/lm_loss": 2.565106689929962, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.7540636649693496e-05, "perf/tokens_per_sec": 26157.96177071565, "train/loss_math": 2.0993874073028564, "train/loss_prose": 4.4789122343063354, "train/loss_code": 1.5827394723892212} +{"step": 3184, "train/loss": 2.701711058616638, "train/lm_loss": 2.701711058616638, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.7523980193597836e-05, "perf/tokens_per_sec": 26022.613465820446, "train/loss_prose": 3.979596217473348, "train/loss_code": 1.577836513519287, "train/loss_math": 2.4706950187683105} +{"step": 3185, "train/loss": 2.2311769127845764, "train/lm_loss": 2.2311769127845764, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.7507327381223405e-05, "perf/tokens_per_sec": 25960.44429870756, "train/loss_math": 2.179722309112549, "train/loss_code": 1.3653562664985657, "train/loss_prose": 4.220091342926025} +{"step": 3186, "train/loss": 2.3316039741039276, "train/lm_loss": 2.3316039741039276, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.749067822068659e-05, "perf/tokens_per_sec": 26018.51475857724, "train/loss_prose": 3.4948020776112876, "train/loss_code": 1.5682609379291534, "train/loss_math": 1.8953819274902344} +{"step": 3187, "train/loss": 2.7595000863075256, "train/lm_loss": 2.7595000863075256, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.747403272010199e-05, "perf/tokens_per_sec": 27104.46845965616, "train/loss_math": 2.1856587529182434, "train/loss_prose": 3.622474193572998, "train/loss_code": 2.465942621231079} +{"step": 3188, "train/loss": 2.4872854351997375, "train/lm_loss": 2.4872854351997375, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.745739088758242e-05, "perf/tokens_per_sec": 26108.62850238976, "train/loss_code": 1.8074549436569214, "train/loss_math": 2.2346071004867554, "train/loss_prose": 3.3355680306752524} +{"step": 3189, "train/loss": 2.715379476547241, "train/lm_loss": 2.715379476547241, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.744075273123889e-05, "perf/tokens_per_sec": 26022.14047215852, "train/loss_prose": 3.563922643661499, "train/loss_math": 2.078528563181559, "train/loss_code": 1.23175847530365} +{"step": 3190, "train/loss": 2.046313062310219, "train/lm_loss": 2.046313062310219, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.7424118259180654e-05, "perf/tokens_per_sec": 25986.40042353012, "train/loss_prose": 3.1710368394851685, "train/loss_code": 1.3723960369825363, "train/loss_math": 2.269423484802246} +{"step": 3191, "train/loss": 2.6838337779045105, "train/lm_loss": 2.6838337779045105, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.7407487479515147e-05, "perf/tokens_per_sec": 25310.96657394748, "train/loss_math": 2.3770105044047036, "train/loss_prose": 3.6507793267567954, "train/loss_code": 1.693650245666504} +{"step": 3192, "train/loss": 2.294462740421295, "train/lm_loss": 2.294462740421295, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.7390860400348002e-05, "perf/tokens_per_sec": 23571.717543363782, "train/loss_code": 1.6146994233131409, "train/loss_math": 2.1700517535209656, "train/loss_prose": 3.778400421142578} +{"step": 3193, "train/loss": 2.4912429451942444, "train/lm_loss": 2.4912429451942444, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.7374237029783062e-05, "perf/tokens_per_sec": 25158.330551935214, "train/loss_code": 1.542155424753825, "train/loss_math": 2.1302977800369263, "train/loss_prose": 3.680960496266683} +{"step": 3194, "train/loss": 2.2168851494789124, "train/lm_loss": 2.2168851494789124, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.735761737592236e-05, "perf/tokens_per_sec": 26350.098827427013, "train/loss_code": 1.6046900153160095, "train/loss_math": 2.082792949676514, "train/loss_prose": 4.111736297607422} +{"step": 3195, "train/loss": 2.0581494867801666, "train/lm_loss": 2.0581494867801666, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.73410014468661e-05, "perf/tokens_per_sec": 26184.673746845012, "train/loss_math": 2.196664810180664, "train/loss_code": 1.443829337755839, "train/loss_prose": 3.3470489978790283} +{"step": 3196, "train/loss": 1.9252424240112305, "train/lm_loss": 1.9252424240112305, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.7324389250712702e-05, "perf/tokens_per_sec": 26248.36394749065, "train/loss_prose": 3.1472105979919434, "train/loss_math": 2.109215348958969, "train/loss_code": 1.2726223468780518} +{"step": 3197, "train/loss": 2.7754820585250854, "train/lm_loss": 2.7754820585250854, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.730778079555874e-05, "perf/tokens_per_sec": 26282.85831167301, "train/loss_prose": 3.260741710662842, "train/loss_code": 1.8869505524635315, "train/loss_math": 2.126246213912964} +{"step": 3198, "train/loss": 2.7851746678352356, "train/lm_loss": 2.7851746678352356, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.7291176089498967e-05, "perf/tokens_per_sec": 25947.7012155337, "train/loss_code": 2.360791862010956, "train/loss_prose": 3.3146692514419556, "train/loss_math": 2.1505682468414307} +{"step": 3199, "train/loss": 2.693039655685425, "train/lm_loss": 2.693039655685425, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.7274575140626318e-05, "perf/tokens_per_sec": 26921.744855760717, "train/loss_math": 2.1930339336395264, "train/loss_prose": 3.2975950717926024, "train/loss_code": 1.4316542148590088} +{"step": 3200, "train/loss": 2.3474684953689575, "train/lm_loss": 2.3474684953689575, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.72579779570319e-05, "perf/tokens_per_sec": 26093.084040722533, "train/loss_code": 1.9050428867340088, "train/loss_math": 2.3170795142650604, "train/loss_prose": 2.8506717681884766} +{"step": 3200, "eval/loss": 2.1687353356646097, "eval/lm_loss": 2.1687353356646097, "eval/lb_loss": 0.0, "eval/z_loss": 0.0, "eval/perplexity": 8.747214752217827, "eval/loss_code": 1.5617693651756739, "eval/ppl_code": 4.767248791230902, "eval/loss_prose": 3.4808924104038037, "eval/ppl_prose": 32.4887023987964, "eval/loss_math": 2.05407785375913, "eval/ppl_math": 7.7996421447052064} +{"step": 3201, "train/loss": 2.8262126445770264, "train/lm_loss": 2.8262126445770264, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.7241384546804972e-05, "perf/tokens_per_sec": 26462.689609387395, "train/loss_math": 2.2941282987594604, "train/loss_prose": 3.4148201942443848, "train/loss_code": 2.1810821294784546} +{"step": 3202, "train/loss": 2.0377206802368164, "train/lm_loss": 2.0377206802368164, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.722479491803296e-05, "perf/tokens_per_sec": 25834.117812293705, "train/loss_code": 1.2328122407197952, "train/loss_math": 2.1189849376678467, "train/loss_prose": 3.566272735595703} +{"step": 3203, "train/loss": 2.5971534848213196, "train/lm_loss": 2.5971534848213196, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.7208209078801454e-05, "perf/tokens_per_sec": 27111.098250863208, "train/loss_math": 2.259376287460327, "train/loss_prose": 3.5275776386260986, "train/loss_code": 1.1569888591766357} +{"step": 3204, "train/loss": 2.604064106941223, "train/lm_loss": 2.604064106941223, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.7191627037194186e-05, "perf/tokens_per_sec": 26031.13029301154, "train/loss_prose": 3.5372337102890015, "train/loss_code": 1.6708946824073792} +{"step": 3205, "train/loss": 2.3968652486801147, "train/lm_loss": 2.3968652486801147, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.717504880129304e-05, "perf/tokens_per_sec": 26195.49364548597, "train/loss_prose": 3.719085216522217, "train/loss_code": 0.9619058966636658, "train/loss_math": 2.0312846501668296} +{"step": 3206, "train/loss": 2.6178784370422363, "train/lm_loss": 2.6178784370422363, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.7158474379178064e-05, "perf/tokens_per_sec": 25801.294255639008, "train/loss_prose": 3.488813638687134, "train/loss_math": 2.1763238310813904, "train/loss_code": 1.771290898323059} +{"step": 3207, "train/loss": 1.7962162792682648, "train/lm_loss": 1.7962162792682648, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.7141903778927406e-05, "perf/tokens_per_sec": 25793.198206773544, "train/loss_code": 1.2738438606262208, "train/loss_math": 2.2411786317825317, "train/loss_prose": 3.518153429031372} +{"step": 3208, "train/loss": 1.815682828426361, "train/lm_loss": 1.815682828426361, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.7125337008617386e-05, "perf/tokens_per_sec": 26554.105846275597, "train/loss_math": 1.8746054768562317, "train/loss_prose": 2.902286648750305, "train/loss_code": 1.242919459939003} +{"step": 3209, "train/loss": 2.3595825731754303, "train/lm_loss": 2.3595825731754303, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.7108774076322443e-05, "perf/tokens_per_sec": 25954.44368839927, "train/loss_prose": 3.537908951441447, "train/loss_math": 2.28684663772583, "train/loss_code": 1.494021624326706} +{"step": 3210, "train/loss": 2.0133632123470306, "train/lm_loss": 2.0133632123470306, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.7092214990115152e-05, "perf/tokens_per_sec": 25539.8936533327, "train/loss_math": 2.1275402307510376, "train/loss_prose": 3.1499264240264893, "train/loss_code": 1.387993037700653} +{"step": 3211, "train/loss": 2.820166915655136, "train/lm_loss": 2.820166915655136, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.7075659758066208e-05, "perf/tokens_per_sec": 26605.01501228052, "train/loss_prose": 3.819190800189972, "train/loss_code": 1.7177064816157024, "train/loss_math": 2.1314518451690674} +{"step": 3212, "train/loss": 2.101274847984314, "train/lm_loss": 2.101274847984314, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.7059108388244432e-05, "perf/tokens_per_sec": 25837.031732568143, "train/loss_math": 2.2440253496170044, "train/loss_prose": 3.390729308128357, "train/loss_code": 1.3851723372936249} +{"step": 3213, "train/loss": 2.3873044550418854, "train/lm_loss": 2.3873044550418854, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.7042560888716766e-05, "perf/tokens_per_sec": 25937.39827495361, "train/loss_code": 1.3874321381251018, "train/loss_prose": 3.649203618367513, "train/loss_math": 1.9942637085914612} +{"step": 3214, "train/loss": 2.4704201221466064, "train/lm_loss": 2.4704201221466064, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.702601726754825e-05, "perf/tokens_per_sec": 26251.372070955647, "train/loss_prose": 3.80734646320343, "train/loss_math": 2.5771365960439048, "train/loss_code": 1.4724191029866536} +{"step": 3215, "train/loss": 2.244493752717972, "train/lm_loss": 2.244493752717972, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.7009477532802054e-05, "perf/tokens_per_sec": 26298.18694816248, "train/loss_code": 1.7460570335388184, "train/loss_prose": 2.944636662801107, "train/loss_math": 2.1378116607666016} +{"step": 3216, "train/loss": 1.6262870728969574, "train/lm_loss": 1.6262870728969574, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.6992941692539437e-05, "perf/tokens_per_sec": 26181.28189286928, "train/loss_math": 1.895632117986679, "train/loss_code": 0.809055874745051, "train/loss_prose": 3.000600576400757} +{"step": 3217, "train/loss": 2.43952414393425, "train/lm_loss": 2.43952414393425, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.6976409754819767e-05, "perf/tokens_per_sec": 26011.660192469582, "train/loss_math": 2.107296864191691, "train/loss_prose": 3.3205415407816568, "train/loss_code": 1.6163394451141357} +{"step": 3218, "train/loss": 2.496419847011566, "train/lm_loss": 2.496419847011566, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.6959881727700508e-05, "perf/tokens_per_sec": 26044.863580271245, "train/loss_code": 1.011296957731247, "train/loss_prose": 3.400650382041931, "train/loss_math": 2.173081874847412} +{"step": 3219, "train/loss": 2.408432960510254, "train/lm_loss": 2.408432960510254, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.6943357619237226e-05, "perf/tokens_per_sec": 26187.387862861186, "train/loss_prose": 4.011220932006836, "train/loss_math": 2.2896395126978555, "train/loss_code": 1.5184048414230347} +{"step": 3220, "train/loss": 2.553321450948715, "train/lm_loss": 2.553321450948715, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.6926837437483566e-05, "perf/tokens_per_sec": 26065.53024944508, "train/loss_prose": 3.3655107021331787, "train/loss_math": 2.0699198246002197, "train/loss_code": 1.6315364440282185} +{"step": 3221, "train/loss": 2.2415299117565155, "train/lm_loss": 2.2415299117565155, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.6910321190491263e-05, "perf/tokens_per_sec": 26434.43146921473, "train/loss_math": 2.0141304433345795, "train/loss_prose": 3.268752336502075, "train/loss_code": 1.6691065728664398} +{"step": 3222, "train/loss": 2.2240779995918274, "train/lm_loss": 2.2240779995918274, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.689380888631014e-05, "perf/tokens_per_sec": 26424.59195228156, "train/loss_math": 1.9631322622299194, "train/loss_prose": 3.391446908315023, "train/loss_code": 1.4137876629829407} +{"step": 3223, "train/loss": 2.3827700912952423, "train/lm_loss": 2.3827700912952423, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.6877300532988094e-05, "perf/tokens_per_sec": 26078.11225817224, "train/loss_code": 1.427017370859782, "train/loss_math": 2.1128475069999695, "train/loss_prose": 3.5184711615244546} +{"step": 3224, "train/loss": 2.7041823267936707, "train/lm_loss": 2.7041823267936707, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.686079613857109e-05, "perf/tokens_per_sec": 25024.35331509659, "train/loss_prose": 3.3603282451629637, "train/loss_code": 1.3979570865631104, "train/loss_math": 2.0359020233154297} +{"step": 3225, "train/loss": 2.125452369451523, "train/lm_loss": 2.125452369451523, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.6844295711103167e-05, "perf/tokens_per_sec": 25485.185227380614, "train/loss_code": 1.4200682242711384, "train/loss_math": 2.03376575311025, "train/loss_prose": 3.3210586309432983} +{"step": 3226, "train/loss": 2.6053813695907593, "train/lm_loss": 2.6053813695907593, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.6827799258626442e-05, "perf/tokens_per_sec": 25656.33861845986, "train/loss_prose": 3.4577555656433105, "train/loss_math": 2.2026093006134033, "train/loss_code": 1.603139599164327} +{"step": 3227, "train/loss": 2.2512558698654175, "train/lm_loss": 2.2512558698654175, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.681130678918108e-05, "perf/tokens_per_sec": 25914.197685503626, "train/loss_math": 2.1231780767440798, "train/loss_code": 0.8384193778038025, "train/loss_prose": 3.277868628501892} +{"step": 3228, "train/loss": 2.607533246278763, "train/lm_loss": 2.607533246278763, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.679481831080531e-05, "perf/tokens_per_sec": 25614.070229007633, "train/loss_math": 2.222729245821635, "train/loss_code": 1.689035415649414, "train/loss_prose": 3.6046690940856934} +{"step": 3229, "train/loss": 2.567226767539978, "train/lm_loss": 2.567226767539978, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.677833383153542e-05, "perf/tokens_per_sec": 24962.431376765584, "train/loss_math": 2.1962424516677856, "train/loss_prose": 3.138551950454712, "train/loss_code": 1.7955603003501892} +{"step": 3230, "train/loss": 2.5109704732894897, "train/lm_loss": 2.5109704732894897, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.6761853359405737e-05, "perf/tokens_per_sec": 25746.927630800034, "train/loss_math": 2.2734382152557373, "train/loss_prose": 3.3048534393310547, "train/loss_code": 1.1607365608215332} +{"step": 3231, "train/loss": 2.7060971558094025, "train/lm_loss": 2.7060971558094025, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.6745376902448656e-05, "perf/tokens_per_sec": 25965.89823950474, "train/loss_prose": 3.9221904277801514, "train/loss_code": 1.8786574006080627, "train/loss_math": 2.367576837539673} +{"step": 3232, "train/loss": 2.2133454084396362, "train/lm_loss": 2.2133454084396362, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.67289044686946e-05, "perf/tokens_per_sec": 26048.25967189253, "train/loss_prose": 3.4490950107574463, "train/loss_code": 1.269211620092392, "train/loss_math": 2.2826311588287354} +{"step": 3233, "train/loss": 2.3053155541419983, "train/lm_loss": 2.3053155541419983, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.671243606617202e-05, "perf/tokens_per_sec": 26111.28718986911, "train/loss_prose": 3.7839648723602295, "train/loss_code": 1.4913806915283203, "train/loss_math": 2.133484125137329} +{"step": 3234, "train/loss": 2.2474564909934998, "train/lm_loss": 2.2474564909934998, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.6695971702907426e-05, "perf/tokens_per_sec": 26148.088167519505, "train/loss_prose": 3.770365357398987, "train/loss_math": 2.3007436990737915, "train/loss_code": 1.4593584686517715} +{"step": 3235, "train/loss": 2.5396822094917297, "train/lm_loss": 2.5396822094917297, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.6679511386925337e-05, "perf/tokens_per_sec": 26006.108278876007, "train/loss_code": 1.286448359489441, "train/loss_math": 1.9634936451911926, "train/loss_prose": 3.45439350605011} +{"step": 3236, "train/loss": 1.8717669546604156, "train/lm_loss": 1.8717669546604156, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.6663055126248326e-05, "perf/tokens_per_sec": 26176.734197466718, "train/loss_math": 2.0514773527781167, "train/loss_code": 1.345742866396904, "train/loss_prose": 3.436732292175293} +{"step": 3237, "train/loss": 2.1734375059604645, "train/lm_loss": 2.1734375059604645, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.6646602928896963e-05, "perf/tokens_per_sec": 24396.29250781028, "train/loss_math": 2.162209709485372, "train/loss_code": 1.3563214341799419, "train/loss_prose": 3.415953040122986} +{"step": 3238, "train/loss": 2.428213596343994, "train/lm_loss": 2.428213596343994, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.663015480288986e-05, "perf/tokens_per_sec": 25244.837382628244, "train/loss_prose": 3.8132258653640747, "train/loss_math": 2.003549963235855, "train/loss_code": 1.8925284147262573} +{"step": 3239, "train/loss": 2.796321153640747, "train/lm_loss": 2.796321153640747, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.6613710756243626e-05, "perf/tokens_per_sec": 26117.439960595508, "train/loss_prose": 3.6899149417877197, "train/loss_code": 0.8493477404117584, "train/loss_math": 2.222299337387085} +{"step": 3240, "train/loss": 2.1863995492458344, "train/lm_loss": 2.1863995492458344, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.659727079697289e-05, "perf/tokens_per_sec": 25958.836019896888, "train/loss_code": 1.5673387944698334, "train/loss_math": 2.064685344696045, "train/loss_prose": 3.052385171254476} +{"step": 3241, "train/loss": 2.9251280426979065, "train/lm_loss": 2.9251280426979065, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.65808349330903e-05, "perf/tokens_per_sec": 26555.8297817088, "train/loss_code": 0.7316487431526184, "train/loss_prose": 3.427414059638977, "train/loss_math": 2.1048905849456787} +{"step": 3242, "train/loss": 2.35443052649498, "train/lm_loss": 2.35443052649498, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.6564403172606475e-05, "perf/tokens_per_sec": 26436.302619491304, "train/loss_prose": 3.028655529022217, "train/loss_math": 2.1071771383285522, "train/loss_code": 1.3207695484161377} +{"step": 3243, "train/loss": 2.3233139514923096, "train/lm_loss": 2.3233139514923096, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.6547975523530075e-05, "perf/tokens_per_sec": 26270.43954235873, "train/loss_code": 1.11839559674263, "train/loss_prose": 3.167439858118693, "train/loss_math": 2.2824671268463135} +{"step": 3244, "train/loss": 2.5143097043037415, "train/lm_loss": 2.5143097043037415, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.6531551993867717e-05, "perf/tokens_per_sec": 25695.28744241699, "train/loss_code": 1.3937249779701233, "train/loss_prose": 3.769420941670736, "train/loss_math": 2.006254514058431} +{"step": 3245, "train/loss": 2.1191625595092773, "train/lm_loss": 2.1191625595092773, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.6515132591624048e-05, "perf/tokens_per_sec": 25618.348102020853, "train/loss_code": 1.7517475605010986, "train/loss_prose": 3.377092123031616, "train/loss_math": 2.4087352752685547} +{"step": 3246, "train/loss": 2.2330999076366425, "train/lm_loss": 2.2330999076366425, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.6498717324801683e-05, "perf/tokens_per_sec": 25753.875004309833, "train/loss_code": 1.0973623394966125, "train/loss_math": 2.165801763534546, "train/loss_prose": 3.4137027263641357} +{"step": 3247, "train/loss": 2.6343268156051636, "train/lm_loss": 2.6343268156051636, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.648230620140121e-05, "perf/tokens_per_sec": 26017.056876021266, "train/loss_prose": 3.2938719391822815, "train/loss_math": 1.9912253618240356, "train/loss_code": 1.9583382606506348} +{"step": 3248, "train/loss": 2.0092890858650208, "train/lm_loss": 2.0092890858650208, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.6465899229421223e-05, "perf/tokens_per_sec": 25758.508669948227, "train/loss_prose": 3.6186084747314453, "train/loss_code": 1.428983251253764, "train/loss_math": 2.0421886444091797} +{"step": 3249, "train/loss": 2.3486696779727936, "train/lm_loss": 2.3486696779727936, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.6449496416858284e-05, "perf/tokens_per_sec": 25913.92406468575, "train/loss_math": 2.2571394443511963, "train/loss_prose": 3.5416845083236694, "train/loss_code": 1.3387150168418884} +{"step": 3250, "train/loss": 2.882534682750702, "train/lm_loss": 2.882534682750702, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.643309777170692e-05, "perf/tokens_per_sec": 25944.135393778495, "train/loss_code": 1.0442938804626465, "train/loss_prose": 3.5758236408233643, "train/loss_math": 2.0684324502944946} +{"step": 3251, "train/loss": 1.9732906818389893, "train/lm_loss": 1.9732906818389893, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.6416703301959622e-05, "perf/tokens_per_sec": 26485.20899887306, "train/loss_math": 2.2425819635391235, "train/loss_code": 1.495665979385376, "train/loss_prose": 3.822831153869629} +{"step": 3252, "train/loss": 2.491076499223709, "train/lm_loss": 2.491076499223709, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.6400313015606865e-05, "perf/tokens_per_sec": 26292.673017971898, "train/loss_code": 1.6909332275390625, "train/loss_math": 2.123469054698944, "train/loss_prose": 3.536291519800822} +{"step": 3253, "train/loss": 1.6052134931087494, "train/lm_loss": 1.6052134931087494, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.6383926920637077e-05, "perf/tokens_per_sec": 26641.073441842, "train/loss_math": 1.9592912594477336, "train/loss_code": 1.392766797542572} +{"step": 3254, "train/loss": 3.0337374210357666, "train/lm_loss": 3.0337374210357666, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.6367545025036636e-05, "perf/tokens_per_sec": 25802.030512109624, "train/loss_math": 2.050392746925354, "train/loss_prose": 3.361518700917562} +{"step": 3255, "train/loss": 2.1893293261528015, "train/lm_loss": 2.1893293261528015, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.635116733678988e-05, "perf/tokens_per_sec": 25910.601987198435, "train/loss_prose": 3.1032594442367554, "train/loss_math": 1.980373054742813, "train/loss_code": 1.693311631679535} +{"step": 3256, "train/loss": 2.1995275914669037, "train/lm_loss": 2.1995275914669037, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.63347938638791e-05, "perf/tokens_per_sec": 26021.667495690792, "train/loss_code": 1.3686447143554688, "train/loss_prose": 3.6785378456115723, "train/loss_math": 2.044403910636902} +{"step": 3257, "train/loss": 2.3078084588050842, "train/lm_loss": 2.3078084588050842, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.6318424614284524e-05, "perf/tokens_per_sec": 26097.919873610976, "train/loss_code": 1.1312452405691147, "train/loss_prose": 3.4843714237213135} +{"step": 3258, "train/loss": 1.9804587960243225, "train/lm_loss": 1.9804587960243225, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.6302059595984327e-05, "perf/tokens_per_sec": 25830.427790023183, "train/loss_math": 2.0189725955327353, "train/loss_code": 1.0634477138519287, "train/loss_prose": 3.2982044219970703} +{"step": 3259, "train/loss": 2.566733181476593, "train/lm_loss": 2.566733181476593, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.6285698816954624e-05, "perf/tokens_per_sec": 26415.328362867578, "train/loss_prose": 3.397542119026184, "train/loss_code": 1.3948911428451538, "train/loss_math": 2.0769574642181396} +{"step": 3260, "train/loss": 3.065113663673401, "train/lm_loss": 3.065113663673401, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.6269342285169453e-05, "perf/tokens_per_sec": 25848.266188013717, "train/loss_prose": 3.6403971195220945, "train/loss_math": 2.1063075065612793} +{"step": 3261, "train/loss": 2.1409415304660797, "train/lm_loss": 2.1409415304660797, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.6252990008600784e-05, "perf/tokens_per_sec": 26965.69175687999, "train/loss_math": 2.2256155014038086, "train/loss_code": 1.4747420946757, "train/loss_prose": 3.800844669342041} +{"step": 3262, "train/loss": 2.917198896408081, "train/lm_loss": 2.917198896408081, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.623664199521853e-05, "perf/tokens_per_sec": 26089.398912680335, "train/loss_prose": 3.73752224445343, "train/loss_math": 2.2196110486984253, "train/loss_code": 1.9741399884223938} +{"step": 3263, "train/loss": 3.0656684041023254, "train/lm_loss": 3.0656684041023254, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.6220298252990502e-05, "perf/tokens_per_sec": 26062.841332522214, "train/loss_prose": 3.4784992933273315, "train/loss_code": 1.680951714515686, "train/loss_math": 1.9733994007110596} +{"step": 3264, "train/loss": 2.2939632534980774, "train/lm_loss": 2.2939632534980774, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.6203958789882456e-05, "perf/tokens_per_sec": 27080.542661637235, "train/loss_code": 1.48480490843455, "train/loss_math": 1.9051151275634766, "train/loss_prose": 4.090973258018494} +{"step": 3265, "train/loss": 2.656494230031967, "train/lm_loss": 2.656494230031967, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.6187623613858038e-05, "perf/tokens_per_sec": 26118.631154382354, "train/loss_math": 2.2835681438446045, "train/loss_code": 1.3704453706741333, "train/loss_prose": 3.5824121634165444} +{"step": 3266, "train/loss": 2.421464294195175, "train/lm_loss": 2.421464294195175, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.6171292732878812e-05, "perf/tokens_per_sec": 26566.05485514586, "train/loss_code": 1.5206419229507446, "train/loss_math": 2.1400938034057617, "train/loss_prose": 3.167423725128174} +{"step": 3267, "train/loss": 2.7478906512260437, "train/lm_loss": 2.7478906512260437, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.6154966154904265e-05, "perf/tokens_per_sec": 25939.59137132098, "train/loss_prose": 3.2238385677337646, "train/loss_code": 2.1350032091140747, "train/loss_math": 1.5939263105392456} +{"step": 3268, "train/loss": 2.026191860437393, "train/lm_loss": 2.026191860437393, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.6138643887891763e-05, "perf/tokens_per_sec": 25972.610908277282, "train/loss_math": 2.1600695848464966, "train/loss_code": 1.6109364827473958, "train/loss_prose": 2.736447334289551} +{"step": 3269, "train/loss": 2.134750932455063, "train/lm_loss": 2.134750932455063, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.612232593979658e-05, "perf/tokens_per_sec": 25400.25545932232, "train/loss_code": 1.6396470467249553, "train/loss_prose": 3.0460400581359863, "train/loss_math": 2.0223286151885986} +{"step": 3270, "train/loss": 2.3838785886764526, "train/lm_loss": 2.3838785886764526, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.6106012318571877e-05, "perf/tokens_per_sec": 26325.91234844863, "train/loss_math": 2.1144923865795135, "train/loss_prose": 3.2182834148406982, "train/loss_code": 0.9582086205482483} +{"step": 3271, "train/loss": 2.0385743975639343, "train/lm_loss": 2.0385743975639343, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.6089703032168733e-05, "perf/tokens_per_sec": 26829.292538339007, "train/loss_prose": 3.2359808683395386, "train/loss_code": 1.6110679626464843, "train/loss_math": 1.7812937498092651} +{"step": 3272, "train/loss": 2.266844719648361, "train/lm_loss": 2.266844719648361, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.607339808853609e-05, "perf/tokens_per_sec": 27187.854285917754, "train/loss_prose": 3.452487309773763, "train/loss_code": 1.4180457890033722, "train/loss_math": 2.1051127910614014} +{"step": 3273, "train/loss": 2.3088045716285706, "train/lm_loss": 2.3088045716285706, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.6057097495620767e-05, "perf/tokens_per_sec": 26100.140200446956, "train/loss_math": 2.0226235389709473, "train/loss_prose": 3.8291958570480347, "train/loss_code": 1.5813916524251301} +{"step": 3274, "train/loss": 2.750531703233719, "train/lm_loss": 2.750531703233719, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.6040801261367493e-05, "perf/tokens_per_sec": 25982.784661873375, "train/loss_prose": 3.613420009613037, "train/loss_math": 2.051667888959249, "train/loss_code": 1.395569920539856} +{"step": 3275, "train/loss": 3.1107259392738342, "train/lm_loss": 3.1107259392738342, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.6024509393718844e-05, "perf/tokens_per_sec": 25163.12093036881, "train/loss_math": 2.229807734489441, "train/loss_prose": 3.4043654998143515} +{"step": 3276, "train/loss": 2.2612437903881073, "train/lm_loss": 2.2612437903881073, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.600822190061528e-05, "perf/tokens_per_sec": 25852.894980467223, "train/loss_prose": 3.0022263526916504, "train/loss_math": 2.11067271232605, "train/loss_code": 1.5321342945098877} +{"step": 3277, "train/loss": 2.480314314365387, "train/lm_loss": 2.480314314365387, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.5991938789995137e-05, "perf/tokens_per_sec": 26052.683816883724, "train/loss_prose": 3.4712835550308228, "train/loss_code": 1.4893450438976288} +{"step": 3278, "train/loss": 2.283135563135147, "train/lm_loss": 2.283135563135147, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.597566006979459e-05, "perf/tokens_per_sec": 26225.08435290867, "train/loss_math": 2.2286996046702066, "train/loss_code": 1.728956659634908, "train/loss_prose": 3.196057438850403} +{"step": 3279, "train/loss": 2.1042736172676086, "train/lm_loss": 2.1042736172676086, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.5959385747947698e-05, "perf/tokens_per_sec": 25879.72792034165, "train/loss_code": 1.4579291343688965, "train/loss_math": 1.9939656257629395, "train/loss_prose": 3.239252209663391} +{"step": 3280, "train/loss": 2.7087098360061646, "train/lm_loss": 2.7087098360061646, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.594311583238636e-05, "perf/tokens_per_sec": 26267.54753422605, "train/loss_prose": 3.1620117664337157, "train/loss_code": 1.6743889451026917, "train/loss_math": 2.510841131210327} +{"step": 3281, "train/loss": 2.2512625455856323, "train/lm_loss": 2.2512625455856323, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.5926850331040345e-05, "perf/tokens_per_sec": 25363.206217133138, "train/loss_prose": 3.2467993895212808, "train/loss_math": 2.002842585245768, "train/loss_code": 1.1305869221687317} +{"step": 3282, "train/loss": 2.3265450298786163, "train/lm_loss": 2.3265450298786163, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.5910589251837257e-05, "perf/tokens_per_sec": 25541.982066953806, "train/loss_math": 2.0222120583057404, "train/loss_code": 2.4524845480918884, "train/loss_prose": 2.8092708587646484} +{"step": 3283, "train/loss": 2.1821157038211823, "train/lm_loss": 2.1821157038211823, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.5894332602702545e-05, "perf/tokens_per_sec": 26604.23221738208, "train/loss_math": 2.1389094829559325, "train/loss_code": 1.3681225776672363, "train/loss_prose": 4.026133060455322} +{"step": 3284, "train/loss": 2.345664769411087, "train/lm_loss": 2.345664769411087, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.5878080391559508e-05, "perf/tokens_per_sec": 25688.141175204026, "train/loss_math": 2.284998595714569, "train/loss_code": 1.4836350679397583, "train/loss_prose": 3.3290261030197144} +{"step": 3285, "train/loss": 2.343283772468567, "train/lm_loss": 2.343283772468567, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.5861832626329282e-05, "perf/tokens_per_sec": 26126.575223970936, "train/loss_code": 1.385746419429779, "train/loss_prose": 3.38165815671285, "train/loss_math": 1.9432675043741863} +{"step": 3286, "train/loss": 2.6564626693725586, "train/lm_loss": 2.6564626693725586, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.5845589314930813e-05, "perf/tokens_per_sec": 25601.626690271605, "train/loss_math": 2.280584732691447, "train/loss_prose": 3.6381000677744546, "train/loss_code": 1.7478238940238953} +{"step": 3287, "train/loss": 2.482243686914444, "train/lm_loss": 2.482243686914444, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.58293504652809e-05, "perf/tokens_per_sec": 26016.347694931013, "train/loss_code": 1.5381143887837727, "train/loss_math": 2.074209690093994, "train/loss_prose": 3.292349100112915} +{"step": 3288, "train/loss": 2.1119206845760345, "train/lm_loss": 2.1119206845760345, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.581311608529417e-05, "perf/tokens_per_sec": 26699.949931773182, "train/loss_code": 1.3019688526789348, "train/loss_prose": 3.0173158645629883, "train/loss_math": 1.968755841255188} +{"step": 3289, "train/loss": 1.989860326051712, "train/lm_loss": 1.989860326051712, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.5796886182883053e-05, "perf/tokens_per_sec": 25937.829032472357, "train/loss_math": 2.237178365389506, "train/loss_code": 1.2479058504104614} +{"step": 3290, "train/loss": 1.9602191150188446, "train/lm_loss": 1.9602191150188446, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.578066076595781e-05, "perf/tokens_per_sec": 26007.171227796094, "train/loss_code": 1.1780126492182414, "train/loss_math": 2.2192631363868713, "train/loss_prose": 3.270662546157837} +{"step": 3291, "train/loss": 2.0630274415016174, "train/lm_loss": 2.0630274415016174, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.5764439842426515e-05, "perf/tokens_per_sec": 25923.347764810977, "train/loss_math": 2.183937221765518, "train/loss_prose": 3.4845023155212402, "train/loss_code": 1.4279892841974895} +{"step": 3292, "train/loss": 1.980395883321762, "train/lm_loss": 1.980395883321762, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.574822342019504e-05, "perf/tokens_per_sec": 26211.000440921816, "train/loss_code": 1.5749014218648274, "train/loss_prose": 3.196879267692566} +{"step": 3293, "train/loss": 2.0598516762256622, "train/lm_loss": 2.0598516762256622, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.5732011507167084e-05, "perf/tokens_per_sec": 25764.49628976431, "train/loss_math": 2.209583115577698, "train/loss_code": 1.2434857487678528, "train/loss_prose": 2.9439263343811035} +{"step": 3294, "train/loss": 2.158362478017807, "train/lm_loss": 2.158362478017807, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.5715804111244137e-05, "perf/tokens_per_sec": 26846.98537000053, "train/loss_math": 1.9363097846508026, "train/loss_prose": 3.1136181354522705, "train/loss_code": 1.647212266921997} +{"step": 3295, "train/loss": 2.2490333914756775, "train/lm_loss": 2.2490333914756775, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.5699601240325474e-05, "perf/tokens_per_sec": 25966.408437484413, "train/loss_math": 2.0941365162531533, "train/loss_prose": 3.3858695030212402, "train/loss_code": 1.646039605140686} +{"step": 3296, "train/loss": 2.275721251964569, "train/lm_loss": 2.275721251964569, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.5683402902308175e-05, "perf/tokens_per_sec": 26118.47232226609, "train/loss_prose": 3.19354510307312, "train/loss_code": 1.6223768591880798, "train/loss_math": 2.143481284379959} +{"step": 3297, "train/loss": 2.1307079195976257, "train/lm_loss": 2.1307079195976257, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.5667209105087132e-05, "perf/tokens_per_sec": 26843.08423448972, "train/loss_math": 2.2352403163909913, "train/loss_code": 1.2302331924438477, "train/loss_prose": 3.4089949131011963} +{"step": 3298, "train/loss": 2.3280175924301147, "train/lm_loss": 2.3280175924301147, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.5651019856554995e-05, "perf/tokens_per_sec": 25978.10924498088, "train/loss_code": 1.1381811499595642, "train/loss_math": 2.022285282611847, "train/loss_prose": 3.075801968574524} +{"step": 3299, "train/loss": 2.216046988964081, "train/lm_loss": 2.216046988964081, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.56348351646022e-05, "perf/tokens_per_sec": 25998.31597425572, "train/loss_code": 1.359886904557546, "train/loss_prose": 2.875502347946167, "train/loss_math": 2.5111039876937866} +{"step": 3300, "train/loss": 2.2373680770397186, "train/lm_loss": 2.2373680770397186, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.561865503711698e-05, "perf/tokens_per_sec": 26182.638529006104, "train/loss_math": 2.2735140919685364, "train/loss_code": 1.0913170874118805, "train/loss_prose": 3.3111270666122437} +{"step": 3301, "train/loss": 2.1810395419597626, "train/lm_loss": 2.1810395419597626, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.5602479481985333e-05, "perf/tokens_per_sec": 26125.581951008837, "train/loss_code": 1.7562360366185505, "train/loss_prose": 3.985201597213745, "train/loss_math": 2.0486017763614655} +{"step": 3302, "train/loss": 1.8966412246227264, "train/lm_loss": 1.8966412246227264, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.5586308507091018e-05, "perf/tokens_per_sec": 26942.939788814674, "train/loss_prose": 3.7847604751586914, "train/loss_code": 1.571109930674235, "train/loss_math": 1.9617091417312622} +{"step": 3303, "train/loss": 1.9554933309555054, "train/lm_loss": 1.9554933309555054, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.557014212031559e-05, "perf/tokens_per_sec": 24832.788179618456, "train/loss_code": 0.8097120424111685, "train/loss_math": 2.016565481821696, "train/loss_prose": 3.582556962966919} +{"step": 3304, "train/loss": 2.3277547359466553, "train/lm_loss": 2.3277547359466553, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.5553980329538326e-05, "perf/tokens_per_sec": 25527.14275695241, "train/loss_math": 2.2327504754066467, "train/loss_prose": 3.424410343170166, "train/loss_code": 1.4211077988147736} +{"step": 3305, "train/loss": 2.8979973793029785, "train/lm_loss": 2.8979973793029785, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.5537823142636305e-05, "perf/tokens_per_sec": 25658.944307121084, "train/loss_math": 2.2976645628611245, "train/loss_prose": 3.581481456756592, "train/loss_code": 1.965059518814087} +{"step": 3306, "train/loss": 2.37058624625206, "train/lm_loss": 2.37058624625206, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.5521670567484325e-05, "perf/tokens_per_sec": 25658.905984335703, "train/loss_math": 1.9277478456497192, "train/loss_code": 2.1421185731887817, "train/loss_prose": 3.4847309589385986} +{"step": 3307, "train/loss": 2.261648654937744, "train/lm_loss": 2.261648654937744, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.5505522611954975e-05, "perf/tokens_per_sec": 26615.484170018157, "train/loss_code": 1.3538530270258586, "train/loss_math": 2.049091418584188, "train/loss_prose": 3.9421780109405518} +{"step": 3308, "train/loss": 2.4596380591392517, "train/lm_loss": 2.4596380591392517, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.5489379283918566e-05, "perf/tokens_per_sec": 27536.077960711904, "train/loss_code": 1.8024612069129944, "train/loss_prose": 3.3282418251037598, "train/loss_math": 2.029152234395345} +{"step": 3309, "train/loss": 2.086123675107956, "train/lm_loss": 2.086123675107956, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.547324059124315e-05, "perf/tokens_per_sec": 26245.637178037825, "train/loss_code": 1.451534390449524, "train/loss_math": 2.119980216026306, "train/loss_prose": 3.1860201358795166} +{"step": 3310, "train/loss": 1.920573502779007, "train/lm_loss": 1.920573502779007, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.5457106541794543e-05, "perf/tokens_per_sec": 26127.80698734054, "train/loss_prose": 3.5564719438552856, "train/loss_code": 1.255597722530365, "train/loss_math": 1.973655104637146} +{"step": 3311, "train/loss": 1.872815489768982, "train/lm_loss": 1.872815489768982, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.544097714343627e-05, "perf/tokens_per_sec": 26287.20162192351, "train/loss_math": 2.207584778467814, "train/loss_prose": 2.59450101852417, "train/loss_code": 1.4413171708583832} +{"step": 3312, "train/loss": 2.223990321159363, "train/lm_loss": 2.223990321159363, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.5424852404029634e-05, "perf/tokens_per_sec": 26323.61311042365, "train/loss_math": 2.111724396546682, "train/loss_prose": 3.2412447929382324, "train/loss_code": 1.8803318738937378} +{"step": 3313, "train/loss": 2.0873590111732483, "train/lm_loss": 2.0873590111732483, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.5408732331433595e-05, "perf/tokens_per_sec": 26821.79480077874, "train/loss_prose": 3.141428311665853, "train/loss_code": 1.4180417209863663, "train/loss_math": 1.6024202108383179} +{"step": 3314, "train/loss": 2.0614725947380066, "train/lm_loss": 2.0614725947380066, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.539261693350491e-05, "perf/tokens_per_sec": 27011.396103600026, "train/loss_math": 1.9764938751856487, "train/loss_code": 1.5277148485183716, "train/loss_prose": 2.9895769357681274} +{"step": 3315, "train/loss": 2.14615261554718, "train/lm_loss": 2.14615261554718, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.5376506218098015e-05, "perf/tokens_per_sec": 27241.354505005915, "train/loss_math": 2.109495004018148, "train/loss_code": 1.4807917674382527, "train/loss_prose": 3.199179768562317} +{"step": 3316, "train/loss": 2.831713378429413, "train/lm_loss": 2.831713378429413, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.536040019306509e-05, "perf/tokens_per_sec": 26019.184651290292, "train/loss_math": 2.3535850048065186, "train/loss_prose": 3.309841811656952} +{"step": 3317, "train/loss": 1.9451375007629395, "train/lm_loss": 1.9451375007629395, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.5344298866256e-05, "perf/tokens_per_sec": 26460.447927351808, "train/loss_code": 1.1241456270217896, "train/loss_math": 2.241354465484619, "train/loss_prose": 3.223245620727539} +{"step": 3318, "train/loss": 2.3346362709999084, "train/lm_loss": 2.3346362709999084, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.5328202245518347e-05, "perf/tokens_per_sec": 26315.468166159397, "train/loss_prose": 3.373433510462443, "train/loss_code": 1.7113578081130982} +{"step": 3319, "train/loss": 2.4699195325374603, "train/lm_loss": 2.4699195325374603, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.5312110338697426e-05, "perf/tokens_per_sec": 25771.569128485302, "train/loss_prose": 3.189584652582804, "train/loss_math": 2.225103735923767, "train/loss_code": 1.913464903831482} +{"step": 3320, "train/loss": 1.523147851228714, "train/lm_loss": 1.523147851228714, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.5296023153636235e-05, "perf/tokens_per_sec": 26463.790210479616, "train/loss_code": 1.1473857641220093, "train/loss_math": 2.1494179566701255} +{"step": 3321, "train/loss": 1.9442119002342224, "train/lm_loss": 1.9442119002342224, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.527994069817548e-05, "perf/tokens_per_sec": 26960.655949279684, "train/loss_math": 2.121759076913198, "train/loss_code": 1.4115702509880066} +{"step": 3322, "train/loss": 2.411363184452057, "train/lm_loss": 2.411363184452057, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.526386298015354e-05, "perf/tokens_per_sec": 25909.39061795423, "train/loss_code": 1.3530606826146443, "train/loss_prose": 3.7275622685750327, "train/loss_math": 2.0245180130004883} +{"step": 3323, "train/loss": 2.6918825507164, "train/lm_loss": 2.6918825507164, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.524779000740651e-05, "perf/tokens_per_sec": 26130.310606279534, "train/loss_prose": 3.2432469844818117, "train/loss_code": 1.696879506111145, "train/loss_math": 1.9250658750534058} +{"step": 3324, "train/loss": 2.2566832304000854, "train/lm_loss": 2.2566832304000854, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.523172178776816e-05, "perf/tokens_per_sec": 26027.50207403502, "train/loss_code": 1.5491929352283478, "train/loss_prose": 3.311666488647461, "train/loss_math": 1.9216949939727783} +{"step": 3325, "train/loss": 2.0408395528793335, "train/lm_loss": 2.0408395528793335, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.521565832906994e-05, "perf/tokens_per_sec": 26091.736668889087, "train/loss_code": 1.1415916979312897, "train/loss_prose": 3.2095239957173667, "train/loss_math": 2.1317780017852783} +{"step": 3326, "train/loss": 2.0118719041347504, "train/lm_loss": 2.0118719041347504, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.5199599639140994e-05, "perf/tokens_per_sec": 25908.4137897753, "train/loss_code": 1.242092776298523, "train/loss_math": 2.7956981658935547, "train/loss_prose": 3.5444061756134033} +{"step": 3327, "train/loss": 2.3041224479675293, "train/lm_loss": 2.3041224479675293, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.5183545725808127e-05, "perf/tokens_per_sec": 26327.848860446014, "train/loss_math": 2.040236383676529, "train/loss_code": 1.8999210596084595, "train/loss_prose": 3.236096143722534} +{"step": 3328, "train/loss": 2.3827998340129852, "train/lm_loss": 2.3827998340129852, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.5167496596895814e-05, "perf/tokens_per_sec": 26094.986449642216, "train/loss_prose": 3.3601683974266052, "train/loss_math": 1.909713625907898, "train/loss_code": 1.2373372713724773} +{"step": 3329, "train/loss": 2.600175380706787, "train/lm_loss": 2.600175380706787, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.5151452260226224e-05, "perf/tokens_per_sec": 25318.20298928021, "train/loss_code": 1.2040013670921326, "train/loss_prose": 3.437879753112793} +{"step": 3330, "train/loss": 3.1515864729881287, "train/lm_loss": 3.1515864729881287, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.5135412723619158e-05, "perf/tokens_per_sec": 26124.1914957354, "train/loss_prose": 3.601715326309204, "train/loss_code": 1.4968688488006592, "train/loss_math": 2.105531692504883} +{"step": 3331, "train/loss": 2.184157073497772, "train/lm_loss": 2.184157073497772, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.5119377994892094e-05, "perf/tokens_per_sec": 26273.894720099834, "train/loss_prose": 3.255599617958069, "train/loss_math": 1.925963580608368, "train/loss_code": 1.629101276397705} +{"step": 3332, "train/loss": 2.419030249118805, "train/lm_loss": 2.419030249118805, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.5103348081860159e-05, "perf/tokens_per_sec": 25725.761272701136, "train/loss_prose": 3.150449752807617, "train/loss_math": 2.067557772000631, "train/loss_code": 1.8491098880767822} +{"step": 3333, "train/loss": 2.179052859544754, "train/lm_loss": 2.179052859544754, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.5087322992336147e-05, "perf/tokens_per_sec": 26070.197628170597, "train/loss_math": 2.073362171649933, "train/loss_code": 1.5882275998592377, "train/loss_prose": 3.4663941860198975} +{"step": 3334, "train/loss": 2.1971117854118347, "train/lm_loss": 2.1971117854118347, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.5071302734130489e-05, "perf/tokens_per_sec": 26969.755628291772, "train/loss_prose": 3.083237051963806, "train/loss_math": 2.15582337975502, "train/loss_code": 1.3935632109642029} +{"step": 3335, "train/loss": 2.0330474376678467, "train/lm_loss": 2.0330474376678467, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.505528731505126e-05, "perf/tokens_per_sec": 26170.872395460432, "train/loss_code": 1.4621256291866302, "train/loss_prose": 2.994785785675049, "train/loss_math": 2.2131530046463013} +{"step": 3336, "train/loss": 2.4089079797267914, "train/lm_loss": 2.4089079797267914, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.5039276742904185e-05, "perf/tokens_per_sec": 26228.00705932643, "train/loss_prose": 3.216251254081726, "train/loss_math": 2.139793574810028} +{"step": 3337, "train/loss": 2.3958652317523956, "train/lm_loss": 2.3958652317523956, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.5023271025492618e-05, "perf/tokens_per_sec": 26394.83343166175, "train/loss_prose": 3.2614471912384033, "train/loss_math": 2.3263206481933594, "train/loss_code": 1.576646089553833} +{"step": 3338, "train/loss": 2.2473383247852325, "train/lm_loss": 2.2473383247852325, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.500727017061756e-05, "perf/tokens_per_sec": 26549.83801693763, "train/loss_code": 1.5194511413574219, "train/loss_math": 2.187957445780436, "train/loss_prose": 3.428240418434143} +{"step": 3339, "train/loss": 2.302953362464905, "train/lm_loss": 2.302953362464905, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.4991274186077632e-05, "perf/tokens_per_sec": 26295.97304871833, "train/loss_math": 2.1733948230743407, "train/loss_code": 0.9785112142562866, "train/loss_prose": 3.2890708446502686} +{"step": 3340, "train/loss": 2.5694066882133484, "train/lm_loss": 2.5694066882133484, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.4975283079669072e-05, "perf/tokens_per_sec": 27113.836659196364, "train/loss_math": 2.2603278756141663, "train/loss_prose": 3.1729020277659097, "train/loss_code": 1.995235562324524} +{"step": 3341, "train/loss": 2.267264187335968, "train/lm_loss": 2.267264187335968, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.4959296859185754e-05, "perf/tokens_per_sec": 26666.33944118227, "train/loss_prose": 3.3842581113179526, "train/loss_code": 1.1286953687667847, "train/loss_math": 2.299626588821411} +{"step": 3342, "train/loss": 2.5115974247455597, "train/lm_loss": 2.5115974247455597, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.4943315532419177e-05, "perf/tokens_per_sec": 26418.90301359246, "train/loss_math": 2.0270005067189536, "train/loss_code": 1.7621706128120422, "train/loss_prose": 3.495812018712362} +{"step": 3343, "train/loss": 2.9839707016944885, "train/lm_loss": 2.9839707016944885, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.4927339107158437e-05, "perf/tokens_per_sec": 26678.058269795176, "train/loss_prose": 3.4447898467381797, "train/loss_code": 1.311698317527771, "train/loss_math": 1.8913288116455078} +{"step": 3344, "train/loss": 2.134123355150223, "train/lm_loss": 2.134123355150223, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.4911367591190248e-05, "perf/tokens_per_sec": 25992.76975495803, "train/loss_math": 1.928126573562622, "train/loss_prose": 3.282110333442688, "train/loss_code": 1.5747951070467632} +{"step": 3345, "train/loss": 2.861855685710907, "train/lm_loss": 2.861855685710907, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.4895400992298942e-05, "perf/tokens_per_sec": 26075.97483451951, "train/loss_code": 1.1944489479064941, "train/loss_prose": 3.4855865478515624, "train/loss_math": 2.136232018470764} +{"step": 3346, "train/loss": 2.5902591347694397, "train/lm_loss": 2.5902591347694397, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.4879439318266442e-05, "perf/tokens_per_sec": 26060.232031879277, "train/loss_code": 1.6007471879323323, "train/loss_math": 1.9907093048095703, "train/loss_prose": 3.4822805523872375} +{"step": 3347, "train/loss": 2.466510981321335, "train/lm_loss": 2.466510981321335, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.4863482576872275e-05, "perf/tokens_per_sec": 26362.795312358536, "train/loss_prose": 3.3492120107014975, "train/loss_math": 2.1159759163856506, "train/loss_code": 1.817499836285909} +{"step": 3348, "train/loss": 2.68817138671875, "train/lm_loss": 2.68817138671875, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.4847530775893554e-05, "perf/tokens_per_sec": 26415.450210033643, "train/loss_prose": 3.5734359423319497, "train/loss_math": 2.1570125579833985} +{"step": 3349, "train/loss": 2.303788185119629, "train/lm_loss": 2.303788185119629, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.4831583923104999e-05, "perf/tokens_per_sec": 26122.880630603235, "train/loss_math": 2.1489081382751465, "train/loss_prose": 3.8430910110473633, "train/loss_code": 1.0742457509040833} +{"step": 3350, "train/loss": 2.3008662462234497, "train/lm_loss": 2.3008662462234497, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.4815642026278913e-05, "perf/tokens_per_sec": 26123.079238076123, "train/loss_code": 1.4917685985565186, "train/loss_math": 2.2310417890548706, "train/loss_prose": 3.1565133730570474} +{"step": 3351, "train/loss": 2.2564509510993958, "train/lm_loss": 2.2564509510993958, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.4799705093185181e-05, "perf/tokens_per_sec": 26963.23709466554, "train/loss_math": 2.092063920838492, "train/loss_prose": 3.4071595668792725} +{"step": 3352, "train/loss": 2.4297415912151337, "train/lm_loss": 2.4297415912151337, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.4783773131591278e-05, "perf/tokens_per_sec": 26587.968750386906, "train/loss_math": 2.13689390818278, "train/loss_prose": 3.3764464060465493, "train/loss_code": 1.448955774307251} +{"step": 3353, "train/loss": 2.5062058866024017, "train/lm_loss": 2.5062058866024017, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.4767846149262237e-05, "perf/tokens_per_sec": 26561.331453308598, "train/loss_prose": 3.236249566078186, "train/loss_math": 2.280340313911438, "train/loss_code": 1.271983653306961} +{"step": 3354, "train/loss": 2.830877363681793, "train/lm_loss": 2.830877363681793, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.475192415396068e-05, "perf/tokens_per_sec": 26398.727049220633, "train/loss_math": 2.220532864332199, "train/loss_prose": 3.4412218928337097} +{"step": 3355, "train/loss": 2.192287713289261, "train/lm_loss": 2.192287713289261, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.4736007153446801e-05, "perf/tokens_per_sec": 26137.864109457325, "train/loss_prose": 3.552198052406311, "train/loss_math": 1.9873247146606445, "train/loss_code": 1.242303043603897} +{"step": 3356, "train/loss": 1.782255232334137, "train/lm_loss": 1.782255232334137, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.472009515547835e-05, "perf/tokens_per_sec": 25931.173468494442, "train/loss_math": 1.9787556727727253, "train/loss_code": 1.192753791809082} +{"step": 3357, "train/loss": 2.8334582448005676, "train/lm_loss": 2.8334582448005676, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.4704188167810635e-05, "perf/tokens_per_sec": 26216.280134836332, "train/loss_prose": 3.4360234260559084, "train/loss_code": 1.7304506301879883, "train/loss_math": 2.0266475677490234} +{"step": 3358, "train/loss": 2.526433438062668, "train/lm_loss": 2.526433438062668, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.4688286198196524e-05, "perf/tokens_per_sec": 26120.85500686476, "train/loss_math": 2.2199564576148987, "train/loss_code": 1.5771186351776123, "train/loss_prose": 3.2515080769856772} +{"step": 3359, "train/loss": 1.9945899844169617, "train/lm_loss": 1.9945899844169617, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.467238925438646e-05, "perf/tokens_per_sec": 26826.31807386194, "train/loss_code": 1.5095699429512024, "train/loss_prose": 3.2436704635620117, "train/loss_math": 1.7155495882034302} +{"step": 3360, "train/loss": 2.16871777176857, "train/lm_loss": 2.16871777176857, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.4656497344128412e-05, "perf/tokens_per_sec": 26035.390797525557, "train/loss_code": 1.5839467346668243, "train/loss_prose": 3.2882553339004517, "train/loss_math": 2.218722105026245} +{"step": 3361, "train/loss": 2.132462829351425, "train/lm_loss": 2.132462829351425, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.4640610475167898e-05, "perf/tokens_per_sec": 26190.18239359967, "train/loss_math": 2.0224629342556, "train/loss_prose": 3.729663133621216, "train/loss_code": 1.7467292944590251} +{"step": 3362, "train/loss": 2.5004425644874573, "train/lm_loss": 2.5004425644874573, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.4624728655247995e-05, "perf/tokens_per_sec": 26285.11021929415, "train/loss_prose": 3.0835196375846863, "train/loss_code": 0.7949473261833191, "train/loss_math": 2.2915046215057373} +{"step": 3363, "train/loss": 2.066473126411438, "train/lm_loss": 2.066473126411438, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.4608851892109304e-05, "perf/tokens_per_sec": 26414.150564879674, "train/loss_math": 2.1677117347717285, "train/loss_prose": 3.216157555580139, "train/loss_code": 1.441011592745781} +{"step": 3364, "train/loss": 2.2744064033031464, "train/lm_loss": 2.2744064033031464, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.4592980193489975e-05, "perf/tokens_per_sec": 26423.901021896014, "train/loss_math": 2.1805164217948914, "train/loss_code": 1.7886554598808289, "train/loss_prose": 2.947937250137329} +{"step": 3365, "train/loss": 2.3560305535793304, "train/lm_loss": 2.3560305535793304, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.4577113567125669e-05, "perf/tokens_per_sec": 26236.338344440846, "train/loss_prose": 3.372559150060018, "train/loss_code": 1.5572794675827026, "train/loss_math": 2.029363989830017} +{"step": 3366, "train/loss": 2.2583294212818146, "train/lm_loss": 2.2583294212818146, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.4561252020749591e-05, "perf/tokens_per_sec": 27208.048423726614, "train/loss_math": 2.1902409553527833, "train/loss_code": 1.334810197353363, "train/loss_prose": 4.445810317993164} +{"step": 3367, "train/loss": 2.40399432182312, "train/lm_loss": 2.40399432182312, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.4545395562092468e-05, "perf/tokens_per_sec": 26706.134193905422, "train/loss_code": 1.6243496735890706, "train/loss_math": 2.0896575450897217, "train/loss_prose": 3.3931965827941895} +{"step": 3368, "train/loss": 2.1337466537952423, "train/lm_loss": 2.1337466537952423, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.4529544198882544e-05, "perf/tokens_per_sec": 25322.38259787809, "train/loss_code": 1.4569242000579834, "train/loss_math": 2.242109696070353, "train/loss_prose": 2.986435890197754} +{"step": 3369, "train/loss": 2.4045269191265106, "train/lm_loss": 2.4045269191265106, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.4513697938845572e-05, "perf/tokens_per_sec": 26155.6519536198, "train/loss_code": 1.5491920510927837, "train/loss_prose": 3.4199822743733725, "train/loss_math": 2.164346158504486} +{"step": 3370, "train/loss": 2.7009106278419495, "train/lm_loss": 2.7009106278419495, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.4497856789704844e-05, "perf/tokens_per_sec": 26324.21813412276, "train/loss_math": 2.2295374870300293, "train/loss_prose": 3.093868350982666, "train/loss_code": 1.6788685321807861} +{"step": 3371, "train/loss": 2.648312360048294, "train/lm_loss": 2.648312360048294, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.4482020759181135e-05, "perf/tokens_per_sec": 26229.568772223927, "train/loss_math": 2.1034412384033203, "train/loss_prose": 3.7160089015960693, "train/loss_code": 1.8640745878219604} +{"step": 3372, "train/loss": 2.5096763372421265, "train/lm_loss": 2.5096763372421265, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.4466189854992735e-05, "perf/tokens_per_sec": 25715.941736033878, "train/loss_math": 2.3273184299468994, "train/loss_prose": 3.9224859476089478, "train/loss_code": 1.461582511663437} +{"step": 3373, "train/loss": 2.757964313030243, "train/lm_loss": 2.757964313030243, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.4450364084855433e-05, "perf/tokens_per_sec": 26834.99194631711, "train/loss_code": 1.7668362259864807, "train/loss_prose": 3.4558494091033936, "train/loss_math": 2.3533215522766113} +{"step": 3374, "train/loss": 1.7423327565193176, "train/lm_loss": 1.7423327565193176, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.443454345648252e-05, "perf/tokens_per_sec": 25968.135410195366, "train/loss_code": 1.350589966773987, "train/loss_math": 2.1446694135665894, "train/loss_prose": 2.8963735103607178} +{"step": 3375, "train/loss": 2.268029034137726, "train/lm_loss": 2.268029034137726, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.4418727977584774e-05, "perf/tokens_per_sec": 25966.84015912746, "train/loss_math": 2.0518569946289062, "train/loss_code": 1.3177644610404968, "train/loss_prose": 3.6071058909098306} +{"step": 3376, "train/loss": 2.1354525089263916, "train/lm_loss": 2.1354525089263916, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.4402917655870466e-05, "perf/tokens_per_sec": 27020.615068118263, "train/loss_code": 1.325680911540985, "train/loss_math": 2.0880018870035806, "train/loss_prose": 2.7227508227030435} +{"step": 3377, "train/loss": 2.0927881598472595, "train/lm_loss": 2.0927881598472595, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.438711249904536e-05, "perf/tokens_per_sec": 26290.25888603744, "train/loss_prose": 3.33604896068573, "train/loss_math": 2.3229289054870605, "train/loss_code": 1.5494556665420531} +{"step": 3378, "train/loss": 1.8478718400001526, "train/lm_loss": 1.8478718400001526, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.4371312514812685e-05, "perf/tokens_per_sec": 26162.423261891203, "train/loss_code": 1.4056640148162842, "train/loss_prose": 3.5264506340026855, "train/loss_math": 2.1141018867492676} +{"step": 3379, "train/loss": 1.8675094246864319, "train/lm_loss": 1.8675094246864319, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.4355517710873184e-05, "perf/tokens_per_sec": 26096.849493857015, "train/loss_prose": 3.024606704711914, "train/loss_code": 1.3650363981723785, "train/loss_math": 2.1517740885416665} +{"step": 3380, "train/loss": 1.9292969107627869, "train/lm_loss": 1.9292969107627869, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.4339728094925037e-05, "perf/tokens_per_sec": 26258.273305159226, "train/loss_math": 1.9689624309539795, "train/loss_prose": 3.7907581329345703, "train/loss_code": 1.2559224764506023} +{"step": 3381, "train/loss": 2.4796386063098907, "train/lm_loss": 2.4796386063098907, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.4323943674663914e-05, "perf/tokens_per_sec": 26432.92660575867, "train/loss_code": 1.5354421138763428, "train/loss_prose": 3.4885261058807373, "train/loss_math": 2.3826019763946533} +{"step": 3382, "train/loss": 2.2306994199752808, "train/lm_loss": 2.2306994199752808, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.4308164457782952e-05, "perf/tokens_per_sec": 26062.99948874868, "train/loss_math": 2.0020480155944824, "train/loss_code": 1.2608944177627563, "train/loss_prose": 3.352938652038574} +{"step": 3383, "train/loss": 2.3465373814105988, "train/lm_loss": 2.3465373814105988, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.4292390451972745e-05, "perf/tokens_per_sec": 26350.826320964097, "train/loss_code": 1.4079791009426117, "train/loss_prose": 3.653247515360514, "train/loss_math": 2.1806397438049316} +{"step": 3384, "train/loss": 2.643857568502426, "train/lm_loss": 2.643857568502426, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.4276621664921357e-05, "perf/tokens_per_sec": 25924.91252765295, "train/loss_prose": 3.059549617767334, "train/loss_math": 1.9510375658671062} +{"step": 3385, "train/loss": 2.372700810432434, "train/lm_loss": 2.372700810432434, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.4260858104314297e-05, "perf/tokens_per_sec": 26049.207576465164, "train/loss_prose": 3.3531299432118735, "train/loss_math": 2.1037891507148743, "train/loss_code": 1.5715463161468506} +{"step": 3386, "train/loss": 2.346826523542404, "train/lm_loss": 2.346826523542404, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.4245099777834539e-05, "perf/tokens_per_sec": 26560.592290257846, "train/loss_prose": 3.130430062611898, "train/loss_math": 2.197242538134257, "train/loss_code": 1.3957975506782532} +{"step": 3387, "train/loss": 2.1349963545799255, "train/lm_loss": 2.1349963545799255, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.42293466931625e-05, "perf/tokens_per_sec": 26249.24625891916, "train/loss_math": 2.176648050546646, "train/loss_code": 1.061227411031723, "train/loss_prose": 3.125462055206299} +{"step": 3388, "train/loss": 2.4864664673805237, "train/lm_loss": 2.4864664673805237, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.4213598857976024e-05, "perf/tokens_per_sec": 26491.416721793288, "train/loss_prose": 3.513261000315348, "train/loss_math": 1.9912753105163574, "train/loss_code": 1.6890606880187988} +{"step": 3389, "train/loss": 2.1639294922351837, "train/lm_loss": 2.1639294922351837, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.4197856279950438e-05, "perf/tokens_per_sec": 26189.942839023617, "train/loss_code": 0.8935476541519165, "train/loss_prose": 3.115790843963623, "train/loss_math": 2.0372613430023194} +{"step": 3390, "train/loss": 1.6327175945043564, "train/lm_loss": 1.6327175945043564, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.4182118966758481e-05, "perf/tokens_per_sec": 26140.56852974159, "train/loss_code": 0.8164891600608826, "train/loss_math": 1.8338226974010468, "train/loss_prose": 3.2769827842712402} +{"step": 3391, "train/loss": 2.742593288421631, "train/lm_loss": 2.742593288421631, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.4166386926070322e-05, "perf/tokens_per_sec": 26145.421980888434, "train/loss_prose": 3.271095609664917, "train/loss_math": 1.8617560863494873} +{"step": 3392, "train/loss": 2.556024670600891, "train/lm_loss": 2.556024670600891, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.4150660165553564e-05, "perf/tokens_per_sec": 25972.139730571012, "train/loss_code": 1.5988922119140625, "train/loss_prose": 3.194948196411133, "train/loss_math": 2.2353100776672363} +{"step": 3393, "train/loss": 2.302493840456009, "train/lm_loss": 2.302493840456009, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.4134938692873245e-05, "perf/tokens_per_sec": 26092.727369930257, "train/loss_prose": 3.672441840171814, "train/loss_math": 2.168051600456238, "train/loss_code": 1.684741050004959} +{"step": 3394, "train/loss": 2.0643556714057922, "train/lm_loss": 2.0643556714057922, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.4119222515691816e-05, "perf/tokens_per_sec": 25935.166396946934, "train/loss_code": 1.844104677438736, "train/loss_math": 1.905533234278361, "train/loss_prose": 3.421826124191284} +{"step": 3395, "train/loss": 1.9452587068080902, "train/lm_loss": 1.9452587068080902, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.4103511641669152e-05, "perf/tokens_per_sec": 26007.68304900775, "train/loss_math": 1.979901671409607, "train/loss_code": 1.5379465520381927, "train/loss_prose": 3.47057843208313} +{"step": 3396, "train/loss": 2.2778941094875336, "train/lm_loss": 2.2778941094875336, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.408780607846254e-05, "perf/tokens_per_sec": 26501.59224259477, "train/loss_code": 1.5006588697433472, "train/loss_math": 2.10513174533844, "train/loss_prose": 3.7028898000717163} +{"step": 3397, "train/loss": 2.6060895323753357, "train/lm_loss": 2.6060895323753357, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.4072105833726684e-05, "perf/tokens_per_sec": 26415.61267467034, "train/loss_math": 1.9922226071357727, "train/loss_prose": 3.345291256904602, "train/loss_code": 1.7415526509284973} +{"step": 3398, "train/loss": 2.6258549094200134, "train/lm_loss": 2.6258549094200134, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.405641091511368e-05, "perf/tokens_per_sec": 26372.34595009479, "train/loss_prose": 3.2388048648834227, "train/loss_code": 1.604271690050761} +{"step": 3399, "train/loss": 1.9501088559627533, "train/lm_loss": 1.9501088559627533, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.4040721330273062e-05, "perf/tokens_per_sec": 26053.197431056316, "train/loss_code": 1.2229860723018646, "train/loss_prose": 3.590336799621582, "train/loss_math": 2.372863213221232} +{"step": 3400, "train/loss": 1.7496216595172882, "train/lm_loss": 1.7496216595172882, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.4025037086851733e-05, "perf/tokens_per_sec": 26541.92154469598, "train/loss_math": 2.126392126083374, "train/loss_code": 1.208836515744527, "train/loss_prose": 1.8648943901062012} +{"step": 3400, "eval/loss": 2.168118363382423, "eval/lm_loss": 2.168118363382423, "eval/lb_loss": 0.0, "eval/z_loss": 0.0, "eval/perplexity": 8.74181962766167, "eval/loss_code": 1.5622560643850396, "eval/ppl_code": 4.769569572163754, "eval/loss_prose": 3.4883792996406555, "eval/ppl_prose": 32.732854544677465, "eval/loss_math": 2.045463075854934, "eval/ppl_math": 7.732738553453149} +{"step": 3401, "train/loss": 2.4826058745384216, "train/lm_loss": 2.4826058745384216, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.4009358192494016e-05, "perf/tokens_per_sec": 26528.27364786614, "train/loss_prose": 3.18070387840271, "train/loss_code": 1.319109320640564} +{"step": 3402, "train/loss": 1.886261373758316, "train/lm_loss": 1.886261373758316, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.3993684654841574e-05, "perf/tokens_per_sec": 25986.12527585889, "train/loss_math": 1.6704603433609009, "train/loss_code": 1.2575387060642242, "train/loss_prose": 3.359507918357849} +{"step": 3403, "train/loss": 2.1154327988624573, "train/lm_loss": 2.1154327988624573, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.397801648153354e-05, "perf/tokens_per_sec": 26733.354056249398, "train/loss_math": 2.06448432803154, "train/loss_code": 1.1293847858905792, "train/loss_prose": 3.2033777236938477} +{"step": 3404, "train/loss": 2.399383306503296, "train/lm_loss": 2.399383306503296, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.3962353680206373e-05, "perf/tokens_per_sec": 26136.27353537628, "train/loss_code": 1.5385677019755046, "train/loss_prose": 3.0302597284317017, "train/loss_math": 2.458324670791626} +{"step": 3405, "train/loss": 2.104892075061798, "train/lm_loss": 2.104892075061798, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.3946696258493936e-05, "perf/tokens_per_sec": 25739.71405027523, "train/loss_math": 2.1052515506744385, "train/loss_code": 1.5986260573069255, "train/loss_prose": 3.6222524642944336} +{"step": 3406, "train/loss": 2.2063505351543427, "train/lm_loss": 2.2063505351543427, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.3931044224027468e-05, "perf/tokens_per_sec": 26226.725792761492, "train/loss_prose": 3.1975185871124268, "train/loss_math": 1.8227723439534504, "train/loss_code": 1.2949653267860413} +{"step": 3407, "train/loss": 2.056045174598694, "train/lm_loss": 2.056045174598694, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.3915397584435563e-05, "perf/tokens_per_sec": 26541.839533428603, "train/loss_math": 2.059081753094991, "train/loss_code": 1.0583362579345703, "train/loss_prose": 3.0355353355407715} +{"step": 3408, "train/loss": 2.242357522249222, "train/lm_loss": 2.242357522249222, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.3899756347344234e-05, "perf/tokens_per_sec": 26383.84826868115, "train/loss_prose": 3.3602675596872964, "train/loss_code": 1.4286674559116364, "train/loss_math": 2.1433866024017334} +{"step": 3409, "train/loss": 2.0180719196796417, "train/lm_loss": 2.0180719196796417, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.388412052037682e-05, "perf/tokens_per_sec": 26409.35922843501, "train/loss_math": 2.052635610103607, "train/loss_code": 1.193296104669571, "train/loss_prose": 2.773720383644104} +{"step": 3410, "train/loss": 1.8903547525405884, "train/lm_loss": 1.8903547525405884, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.386849011115402e-05, "perf/tokens_per_sec": 26267.868837018712, "train/loss_code": 1.174543485045433, "train/loss_math": 2.1251167456309, "train/loss_prose": 4.049313545227051} +{"step": 3411, "train/loss": 2.2390976548194885, "train/lm_loss": 2.2390976548194885, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.3852865127293902e-05, "perf/tokens_per_sec": 26615.113074812583, "train/loss_math": 1.9253888924916585, "train/loss_code": 1.0545278787612915, "train/loss_prose": 3.3425196011861167} +{"step": 3412, "train/loss": 2.4676182866096497, "train/lm_loss": 2.4676182866096497, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.3837245576411911e-05, "perf/tokens_per_sec": 25947.15256377339, "train/loss_prose": 3.162339925765991, "train/loss_code": 2.0430290699005127, "train/loss_math": 2.0527242720127106} +{"step": 3413, "train/loss": 2.8845545649528503, "train/lm_loss": 2.8845545649528503, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.3821631466120821e-05, "perf/tokens_per_sec": 26720.007316174127, "train/loss_math": 2.3310202757517495, "train/loss_code": 1.9914029240608215, "train/loss_prose": 4.033523241678874} +{"step": 3414, "train/loss": 2.870074838399887, "train/lm_loss": 2.870074838399887, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.380602280403076e-05, "perf/tokens_per_sec": 26252.77609787256, "train/loss_prose": 3.8016945719718933, "train/loss_math": 2.111820101737976, "train/loss_code": 1.418360710144043} +{"step": 3415, "train/loss": 2.139887511730194, "train/lm_loss": 2.139887511730194, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.3790419597749199e-05, "perf/tokens_per_sec": 26570.944318136124, "train/loss_math": 2.1196773052215576, "train/loss_code": 1.3700255900621414, "train/loss_prose": 3.699821949005127} +{"step": 3416, "train/loss": 2.599144756793976, "train/lm_loss": 2.599144756793976, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.3774821854880948e-05, "perf/tokens_per_sec": 26158.997338400724, "train/loss_code": 1.6067040115594864, "train/loss_prose": 3.5915855765342712} +{"step": 3417, "train/loss": 2.218507766723633, "train/lm_loss": 2.218507766723633, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.375922958302815e-05, "perf/tokens_per_sec": 26378.176857963645, "train/loss_code": 1.404756247997284, "train/loss_math": 2.2276426951090493, "train/loss_prose": 3.4254329204559326} +{"step": 3418, "train/loss": 2.120516777038574, "train/lm_loss": 2.120516777038574, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.3743642789790318e-05, "perf/tokens_per_sec": 26113.23194593699, "train/loss_math": 2.0370216369628906, "train/loss_prose": 3.6364702383677163, "train/loss_code": 1.0044254511594772} +{"step": 3419, "train/loss": 2.7208133041858673, "train/lm_loss": 2.7208133041858673, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.3728061482764238e-05, "perf/tokens_per_sec": 26231.01051844048, "train/loss_math": 2.8423774242401123, "train/loss_prose": 3.4575666785240173, "train/loss_code": 1.6979540189107258} +{"step": 3420, "train/loss": 2.8717291355133057, "train/lm_loss": 2.8717291355133057, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.371248566954406e-05, "perf/tokens_per_sec": 25685.606358357305, "train/loss_prose": 3.4555434703826906, "train/loss_math": 1.9566582441329956, "train/loss_code": 1.869728684425354} +{"step": 3421, "train/loss": 2.8062602877616882, "train/lm_loss": 2.8062602877616882, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.369691535772123e-05, "perf/tokens_per_sec": 26881.51574098414, "train/loss_math": 2.1968605518341064, "train/loss_prose": 3.8950042724609375, "train/loss_code": 1.2381721138954163} +{"step": 3422, "train/loss": 1.9371931552886963, "train/lm_loss": 1.9371931552886963, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.3681350554884559e-05, "perf/tokens_per_sec": 25785.57165306585, "train/loss_math": 2.146927332878113, "train/loss_code": 1.5876362323760986} +{"step": 3423, "train/loss": 2.165674164891243, "train/lm_loss": 2.165674164891243, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.366579126862012e-05, "perf/tokens_per_sec": 26105.37532309067, "train/loss_code": 1.7792866230010986, "train/loss_math": 2.3608657121658325, "train/loss_prose": 3.7072277069091797} +{"step": 3424, "train/loss": 2.5459100902080536, "train/lm_loss": 2.5459100902080536, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.3650237506511331e-05, "perf/tokens_per_sec": 26060.706411316318, "train/loss_code": 1.3894882798194885, "train/loss_prose": 3.5156758427619934, "train/loss_math": 2.1361136436462402} +{"step": 3425, "train/loss": 2.502798467874527, "train/lm_loss": 2.502798467874527, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.3634689276138904e-05, "perf/tokens_per_sec": 25960.915052156448, "train/loss_code": 1.1858184337615967, "train/loss_prose": 3.3631606698036194, "train/loss_math": 2.099053919315338} +{"step": 3426, "train/loss": 1.7875301241874695, "train/lm_loss": 1.7875301241874695, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.3619146585080845e-05, "perf/tokens_per_sec": 26331.117370546628, "train/loss_math": 2.070425868034363, "train/loss_code": 1.421616268157959, "train/loss_prose": 3.051307201385498} +{"step": 3427, "train/loss": 3.1895960569381714, "train/lm_loss": 3.1895960569381714, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.3603609440912507e-05, "perf/tokens_per_sec": 26305.032267547795, "train/loss_prose": 3.5331939458847046, "train/loss_math": 2.2107272148132324, "train/loss_code": 2.106877326965332} +{"step": 3428, "train/loss": 2.2083955109119415, "train/lm_loss": 2.2083955109119415, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.3588077851206468e-05, "perf/tokens_per_sec": 26229.248405698418, "train/loss_code": 1.8433221975962322, "train/loss_math": 2.0346109867095947, "train/loss_prose": 3.9987525939941406} +{"step": 3429, "train/loss": 2.6347486674785614, "train/lm_loss": 2.6347486674785614, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.3572551823532654e-05, "perf/tokens_per_sec": 26480.18777339692, "train/loss_math": 2.403035879135132, "train/loss_code": 1.2483700513839722, "train/loss_prose": 3.4437944293022156} +{"step": 3430, "train/loss": 2.2202245593070984, "train/lm_loss": 2.2202245593070984, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.3557031365458256e-05, "perf/tokens_per_sec": 25923.425998466915, "train/loss_code": 1.2454948127269745, "train/loss_prose": 3.6173341274261475, "train/loss_math": 1.9278141260147095} +{"step": 3431, "train/loss": 2.6663402915000916, "train/lm_loss": 2.6663402915000916, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.3541516484547753e-05, "perf/tokens_per_sec": 26182.877949977825, "train/loss_code": 1.9598417282104492, "train/loss_math": 2.0262093544006348, "train/loss_prose": 3.3396549820899963} +{"step": 3432, "train/loss": 2.5392073690891266, "train/lm_loss": 2.5392073690891266, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.3526007188362924e-05, "perf/tokens_per_sec": 26048.062200456374, "train/loss_math": 2.1158021688461304, "train/loss_prose": 3.239931106567383, "train/loss_code": 2.1207537253697715} +{"step": 3433, "train/loss": 2.3315823078155518, "train/lm_loss": 2.3315823078155518, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.3510503484462805e-05, "perf/tokens_per_sec": 26140.528754800584, "train/loss_code": 1.9029994010925293, "train/loss_prose": 3.6403201818466187, "train/loss_math": 1.8876731793085735} +{"step": 3434, "train/loss": 2.376053810119629, "train/lm_loss": 2.376053810119629, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.349500538040371e-05, "perf/tokens_per_sec": 25988.32662038753, "train/loss_code": 1.4902335007985432, "train/loss_math": 2.042846918106079, "train/loss_prose": 3.123720645904541} +{"step": 3435, "train/loss": 2.3372121155261993, "train/lm_loss": 2.3372121155261993, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.3479512883739232e-05, "perf/tokens_per_sec": 25973.042836193214, "train/loss_code": 1.464915931224823, "train/loss_math": 2.029821276664734, "train/loss_prose": 3.226133187611898} +{"step": 3436, "train/loss": 2.281323581933975, "train/lm_loss": 2.281323581933975, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.3464026002020219e-05, "perf/tokens_per_sec": 26260.92241657355, "train/loss_math": 2.0556352138519287, "train/loss_prose": 3.674914836883545, "train/loss_code": 1.3391090631484985} +{"step": 3437, "train/loss": 2.0267456769943237, "train/lm_loss": 2.0267456769943237, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.3448544742794791e-05, "perf/tokens_per_sec": 26328.292684571472, "train/loss_code": 1.300524612267812, "train/loss_math": 2.2258263429005942, "train/loss_prose": 2.8174561262130737} +{"step": 3438, "train/loss": 1.394727811217308, "train/lm_loss": 1.394727811217308, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.343306911360833e-05, "perf/tokens_per_sec": 25127.531506917436, "train/loss_code": 1.0300651788711548, "train/loss_math": 2.002499222755432} +{"step": 3439, "train/loss": 2.7106425762176514, "train/lm_loss": 2.7106425762176514, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.3417599122003464e-05, "perf/tokens_per_sec": 26938.334865809065, "train/loss_code": 2.047126531600952, "train/loss_prose": 3.585016409556071, "train/loss_math": 2.2786126136779785} +{"step": 3440, "train/loss": 2.4852574467658997, "train/lm_loss": 2.4852574467658997, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.340213477552007e-05, "perf/tokens_per_sec": 26379.756505930116, "train/loss_math": 2.122455596923828, "train/loss_prose": 3.3267482121785483, "train/loss_code": 1.4119924306869507} +{"step": 3441, "train/loss": 2.1969840228557587, "train/lm_loss": 2.1969840228557587, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.3386676081695309e-05, "perf/tokens_per_sec": 26034.40444558268, "train/loss_code": 1.3965539783239365, "train/loss_math": 2.346956491470337, "train/loss_prose": 3.647871971130371} +{"step": 3442, "train/loss": 2.750533938407898, "train/lm_loss": 2.750533938407898, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.337122304806354e-05, "perf/tokens_per_sec": 26253.217394493506, "train/loss_prose": 3.1944138407707214, "train/loss_code": 1.512739658355713, "train/loss_math": 2.571292002995809} +{"step": 3443, "train/loss": 2.1081672310829163, "train/lm_loss": 2.1081672310829163, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.3355775682156393e-05, "perf/tokens_per_sec": 26398.68648478376, "train/loss_math": 2.109677731990814, "train/loss_prose": 3.5486886501312256, "train/loss_code": 1.6259793837865193} +{"step": 3444, "train/loss": 2.3808647990226746, "train/lm_loss": 2.3808647990226746, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.3340333991502724e-05, "perf/tokens_per_sec": 26129.197827518605, "train/loss_code": 0.8672511279582977, "train/loss_math": 2.1331307093302407, "train/loss_prose": 3.637674649556478} +{"step": 3445, "train/loss": 1.7879144549369812, "train/lm_loss": 1.7879144549369812, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.332489798362862e-05, "perf/tokens_per_sec": 26246.198525435175, "train/loss_math": 2.1048263907432556, "train/loss_code": 1.417244291305542, "train/loss_prose": 3.0074410438537598} +{"step": 3446, "train/loss": 2.4006143510341644, "train/lm_loss": 2.4006143510341644, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.3309467666057409e-05, "perf/tokens_per_sec": 26303.058839380174, "train/loss_code": 1.6213516592979431, "train/loss_math": 2.226238399744034, "train/loss_prose": 3.528628945350647} +{"step": 3447, "train/loss": 2.3657368421554565, "train/lm_loss": 2.3657368421554565, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.329404304630964e-05, "perf/tokens_per_sec": 26484.47406644601, "train/loss_prose": 3.305797259012858, "train/loss_code": 1.2361568808555603, "train/loss_math": 2.178729852040609} +{"step": 3448, "train/loss": 1.727604016661644, "train/lm_loss": 1.727604016661644, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.3278624131903087e-05, "perf/tokens_per_sec": 27437.04313940935, "train/loss_math": 1.9851760268211365, "train/loss_code": 1.3021878600120544, "train/loss_prose": 3.33954119682312} +{"step": 3449, "train/loss": 1.7077657580375671, "train/lm_loss": 1.7077657580375671, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.3263210930352737e-05, "perf/tokens_per_sec": 26587.145812570026, "train/loss_code": 1.0836843401193619, "train/loss_math": 1.940133770306905, "train/loss_prose": 3.5069875717163086} +{"step": 3450, "train/loss": 2.2351993024349213, "train/lm_loss": 2.2351993024349213, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.324780344917079e-05, "perf/tokens_per_sec": 26260.72170760191, "train/loss_math": 1.9868411779403687, "train/loss_prose": 3.064818024635315, "train/loss_code": 1.8177523612976074} +{"step": 3451, "train/loss": 2.238052397966385, "train/lm_loss": 2.238052397966385, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.3232401695866687e-05, "perf/tokens_per_sec": 26315.347239624658, "train/loss_prose": 3.4661715030670166, "train/loss_code": 1.2960483034451802, "train/loss_math": 1.808879792690277} +{"step": 3452, "train/loss": 2.5358435213565826, "train/lm_loss": 2.5358435213565826, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.3217005677947037e-05, "perf/tokens_per_sec": 26502.369008961203, "train/loss_code": 1.1084955930709839, "train/loss_math": 2.179483413696289, "train/loss_prose": 3.427697718143463} +{"step": 3453, "train/loss": 2.9221190214157104, "train/lm_loss": 2.9221190214157104, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.3201615402915684e-05, "perf/tokens_per_sec": 26628.39647783301, "train/loss_math": 2.160916010538737, "train/loss_code": 1.8844481706619263, "train/loss_prose": 3.7524391412734985} +{"step": 3454, "train/loss": 2.1920672953128815, "train/lm_loss": 2.1920672953128815, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.3186230878273653e-05, "perf/tokens_per_sec": 26451.85154208694, "train/loss_code": 1.7445147335529327, "train/loss_prose": 3.082653045654297, "train/loss_math": 2.1965869665145874} +{"step": 3455, "train/loss": 2.326373189687729, "train/lm_loss": 2.326373189687729, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.3170852111519175e-05, "perf/tokens_per_sec": 26066.439861625295, "train/loss_prose": 3.503614902496338, "train/loss_math": 2.558965802192688, "train/loss_code": 0.9940698345502218} +{"step": 3456, "train/loss": 2.282408505678177, "train/lm_loss": 2.282408505678177, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.3155479110147679e-05, "perf/tokens_per_sec": 26077.637244856913, "train/loss_code": 1.6068538427352905, "train/loss_prose": 3.10953958829244, "train/loss_math": 2.5032339096069336} +{"step": 3457, "train/loss": 2.407315194606781, "train/lm_loss": 2.407315194606781, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.3140111881651773e-05, "perf/tokens_per_sec": 25667.953337158157, "train/loss_math": 2.1528178453445435, "train/loss_prose": 3.34078041712443, "train/loss_code": 0.6249083280563354} +{"step": 3458, "train/loss": 2.2970163226127625, "train/lm_loss": 2.2970163226127625, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.3124750433521261e-05, "perf/tokens_per_sec": 26033.536517692482, "train/loss_math": 2.126693367958069, "train/loss_code": 1.692059318224589, "train/loss_prose": 3.4599359035491943} +{"step": 3459, "train/loss": 2.229592114686966, "train/lm_loss": 2.229592114686966, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.3109394773243117e-05, "perf/tokens_per_sec": 26180.164768666462, "train/loss_math": 2.0488246083259583, "train/loss_code": 1.305921236673991, "train/loss_prose": 3.2737749417622886} +{"step": 3460, "train/loss": 2.0955893099308014, "train/lm_loss": 2.0955893099308014, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.309404490830152e-05, "perf/tokens_per_sec": 26353.817549812622, "train/loss_math": 2.2490189552307127, "train/loss_prose": 3.0396201610565186, "train/loss_code": 1.239999234676361} +{"step": 3461, "train/loss": 2.746190667152405, "train/lm_loss": 2.746190667152405, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.3078700846177799e-05, "perf/tokens_per_sec": 25908.140291084244, "train/loss_prose": 3.2000560760498047, "train/loss_code": 0.6455011367797852, "train/loss_math": 2.1236884593963623} +{"step": 3462, "train/loss": 2.668922543525696, "train/lm_loss": 2.668922543525696, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.3063362594350465e-05, "perf/tokens_per_sec": 25687.79548890168, "train/loss_prose": 3.34362131357193, "train/loss_math": 2.0221298933029175, "train/loss_code": 1.9663175344467163} +{"step": 3463, "train/loss": 2.0884013175964355, "train/lm_loss": 2.0884013175964355, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.3048030160295196e-05, "perf/tokens_per_sec": 27035.965636679808, "train/loss_code": 1.5543406903743744, "train/loss_math": 1.4453879594802856, "train/loss_prose": 3.7995359897613525} +{"step": 3464, "train/loss": 2.509749561548233, "train/lm_loss": 2.509749561548233, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.3032703551484832e-05, "perf/tokens_per_sec": 26117.162031012467, "train/loss_prose": 3.107863426208496, "train/loss_math": 2.2111417651176453, "train/loss_code": 1.9098385572433472} +{"step": 3465, "train/loss": 2.4245555996894836, "train/lm_loss": 2.4245555996894836, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.3017382775389375e-05, "perf/tokens_per_sec": 26689.911795228716, "train/loss_prose": 3.843326687812805, "train/loss_math": 2.178140640258789, "train/loss_code": 1.4986141324043274} +{"step": 3466, "train/loss": 2.444365471601486, "train/lm_loss": 2.444365471601486, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.300206783947599e-05, "perf/tokens_per_sec": 26246.35891481632, "train/loss_code": 1.4226738810539246, "train/loss_math": 1.9914605021476746, "train/loss_prose": 3.1816635727882385} +{"step": 3467, "train/loss": 2.218455582857132, "train/lm_loss": 2.218455582857132, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.2986758751208983e-05, "perf/tokens_per_sec": 26250.449506463345, "train/loss_prose": 3.1434365113576255, "train/loss_math": 1.8962302803993225, "train/loss_code": 1.508291244506836} +{"step": 3468, "train/loss": 2.8063297271728516, "train/lm_loss": 2.8063297271728516, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.2971455518049828e-05, "perf/tokens_per_sec": 26629.015593127864, "train/loss_code": 1.8686188062032063, "train/loss_prose": 3.368956279754639} +{"step": 3469, "train/loss": 2.260664641857147, "train/lm_loss": 2.260664641857147, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.2956158147457115e-05, "perf/tokens_per_sec": 26167.364545540397, "train/loss_prose": 3.2864830493927, "train/loss_code": 1.5359426140785217, "train/loss_math": 2.1101165413856506} +{"step": 3470, "train/loss": 2.1905875504016876, "train/lm_loss": 2.1905875504016876, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.2940866646886621e-05, "perf/tokens_per_sec": 26387.6981150623, "train/loss_math": 2.097736805677414, "train/loss_code": 1.3447919487953186, "train/loss_prose": 3.222084641456604} +{"step": 3471, "train/loss": 1.9074948132038116, "train/lm_loss": 1.9074948132038116, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.292558102379124e-05, "perf/tokens_per_sec": 26809.907544128226, "train/loss_code": 1.1784865458806355, "train/loss_math": 1.8803364435831706, "train/loss_prose": 3.0417442321777344} +{"step": 3472, "train/loss": 2.3102203011512756, "train/lm_loss": 2.3102203011512756, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.2910301285620974e-05, "perf/tokens_per_sec": 26684.10805437511, "train/loss_prose": 3.358394682407379, "train/loss_code": 1.2620458155870438} +{"step": 3473, "train/loss": 2.114610880613327, "train/lm_loss": 2.114610880613327, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.2895027439822983e-05, "perf/tokens_per_sec": 26917.9063161985, "train/loss_math": 2.260229468345642, "train/loss_prose": 3.3660008907318115, "train/loss_code": 1.134731948375702} +{"step": 3474, "train/loss": 2.5899940133094788, "train/lm_loss": 2.5899940133094788, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.2879759493841575e-05, "perf/tokens_per_sec": 27115.848030140172, "train/loss_prose": 3.5542640686035156, "train/loss_math": 2.0882436335086823, "train/loss_code": 1.7041858434677124} +{"step": 3475, "train/loss": 1.9556421637535095, "train/lm_loss": 1.9556421637535095, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.2864497455118152e-05, "perf/tokens_per_sec": 27061.81606016023, "train/loss_code": 1.1492083072662354, "train/loss_math": 1.7001543045043945, "train/loss_prose": 4.099470496177673} +{"step": 3476, "train/loss": 2.0322669744491577, "train/lm_loss": 2.0322669744491577, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.2849241331091246e-05, "perf/tokens_per_sec": 26618.824492603846, "train/loss_code": 1.3714109460512798, "train/loss_math": 2.1328652799129486, "train/loss_prose": 3.612440586090088} +{"step": 3477, "train/loss": 2.3314324021339417, "train/lm_loss": 2.3314324021339417, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.2833991129196507e-05, "perf/tokens_per_sec": 25913.72862478204, "train/loss_code": 1.3639789074659348, "train/loss_prose": 3.599435647328695, "train/loss_math": 2.3972368240356445} +{"step": 3478, "train/loss": 2.578692704439163, "train/lm_loss": 2.578692704439163, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.2818746856866687e-05, "perf/tokens_per_sec": 26534.583542872675, "train/loss_code": 1.5554462373256683, "train/loss_prose": 3.601939082145691} +{"step": 3479, "train/loss": 2.843527853488922, "train/lm_loss": 2.843527853488922, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.280350852153168e-05, "perf/tokens_per_sec": 26995.776470754616, "train/loss_math": 2.1386099755764008, "train/loss_prose": 3.5484455823898315} +{"step": 3480, "train/loss": 1.9867787063121796, "train/lm_loss": 1.9867787063121796, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.2788276130618466e-05, "perf/tokens_per_sec": 27083.232205014378, "train/loss_code": 1.4179660081863403, "train/loss_math": 2.180077393849691, "train/loss_prose": 3.682133436203003} +{"step": 3481, "train/loss": 2.4975580275058746, "train/lm_loss": 2.4975580275058746, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.2773049691551104e-05, "perf/tokens_per_sec": 26066.202565370833, "train/loss_code": 1.6811475157737732, "train/loss_math": 2.285857836405436, "train/loss_prose": 3.253531535466512} +{"step": 3482, "train/loss": 2.4634898602962494, "train/lm_loss": 2.4634898602962494, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.2757829211750786e-05, "perf/tokens_per_sec": 26905.00672473682, "train/loss_code": 1.5540539026260376, "train/loss_math": 2.0848024686177573, "train/loss_prose": 3.4484676520029702} +{"step": 3483, "train/loss": 2.2690297663211823, "train/lm_loss": 2.2690297663211823, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.2742614698635782e-05, "perf/tokens_per_sec": 26962.68697306222, "train/loss_code": 1.8081594705581665, "train/loss_math": 2.060516655445099, "train/loss_prose": 3.399283766746521} +{"step": 3484, "train/loss": 2.154267966747284, "train/lm_loss": 2.154267966747284, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.272740615962148e-05, "perf/tokens_per_sec": 27042.17885436621, "train/loss_prose": 3.2648184299468994, "train/loss_math": 2.3229064345359802, "train/loss_code": 1.5592333873112996} +{"step": 3485, "train/loss": 2.253076434135437, "train/lm_loss": 2.253076434135437, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.2712203602120326e-05, "perf/tokens_per_sec": 26488.108296792416, "train/loss_code": 0.8152347207069397, "train/loss_prose": 3.293152411778768, "train/loss_math": 2.171561598777771} +{"step": 3486, "train/loss": 2.297447830438614, "train/lm_loss": 2.297447830438614, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.2697007033541864e-05, "perf/tokens_per_sec": 27102.501532612383, "train/loss_math": 2.023407220840454, "train/loss_code": 1.605451504389445, "train/loss_prose": 3.746502995491028} +{"step": 3487, "train/loss": 2.772513210773468, "train/lm_loss": 2.772513210773468, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.2681816461292715e-05, "perf/tokens_per_sec": 25860.055369238642, "train/loss_code": 1.3947259187698364, "train/loss_prose": 3.6755616068840027, "train/loss_math": 2.0277109146118164} +{"step": 3488, "train/loss": 2.550435572862625, "train/lm_loss": 2.550435572862625, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.266663189277657e-05, "perf/tokens_per_sec": 26187.108442586843, "train/loss_prose": 3.4439051747322083, "train/loss_code": 1.5139268239339192, "train/loss_math": 2.08608341217041} +{"step": 3489, "train/loss": 2.594157040119171, "train/lm_loss": 2.594157040119171, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.2651453335394231e-05, "perf/tokens_per_sec": 25956.44358358779, "train/loss_math": 2.2195757031440735, "train/loss_prose": 2.9687381982803345} +{"step": 3490, "train/loss": 2.109574407339096, "train/lm_loss": 2.109574407339096, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.2636280796543515e-05, "perf/tokens_per_sec": 26950.463140490163, "train/loss_math": 2.345115804672241, "train/loss_code": 1.2579599618911743, "train/loss_prose": 2.635096311569214} +{"step": 3491, "train/loss": 2.2125325798988342, "train/lm_loss": 2.2125325798988342, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.2621114283619345e-05, "perf/tokens_per_sec": 26856.974087043523, "train/loss_math": 1.968031883239746, "train/loss_code": 1.5605580012003581, "train/loss_prose": 3.5572457313537598} +{"step": 3492, "train/loss": 2.143299549818039, "train/lm_loss": 2.143299549818039, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.2605953804013681e-05, "perf/tokens_per_sec": 26077.280996226506, "train/loss_prose": 3.295788288116455, "train/loss_code": 1.367352843284607, "train/loss_math": 1.9550285637378693} +{"step": 3493, "train/loss": 2.115975081920624, "train/lm_loss": 2.115975081920624, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.259079936511558e-05, "perf/tokens_per_sec": 27049.970688819052, "train/loss_code": 1.4102859646081924, "train/loss_math": 2.1621227264404297, "train/loss_prose": 3.481205463409424} +{"step": 3494, "train/loss": 2.228185147047043, "train/lm_loss": 2.228185147047043, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.2575650974311119e-05, "perf/tokens_per_sec": 26438.62158626782, "train/loss_math": 2.124663829803467, "train/loss_prose": 3.7436673641204834, "train/loss_code": 1.8610527118047078} +{"step": 3495, "train/loss": 2.026412546634674, "train/lm_loss": 2.026412546634674, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.2560508638983437e-05, "perf/tokens_per_sec": 25910.406597400797, "train/loss_prose": 3.497169613838196, "train/loss_code": 1.2843571305274963, "train/loss_math": 2.0397667288780212} +{"step": 3496, "train/loss": 1.908240407705307, "train/lm_loss": 1.908240407705307, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.254537236651273e-05, "perf/tokens_per_sec": 26319.096479066353, "train/loss_code": 1.3886672258377075, "train/loss_math": 2.0201178789138794, "train/loss_prose": 3.6509008407592773} +{"step": 3497, "train/loss": 2.5481080412864685, "train/lm_loss": 2.5481080412864685, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.2530242164276235e-05, "perf/tokens_per_sec": 27003.49913943374, "train/loss_math": 2.294231653213501, "train/loss_prose": 3.368614355723063, "train/loss_code": 1.6981629729270935} +{"step": 3498, "train/loss": 1.4995082020759583, "train/lm_loss": 1.4995082020759583, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.2515118039648222e-05, "perf/tokens_per_sec": 26187.787045025656, "train/loss_code": 1.3153263131777446, "train/loss_math": 2.052053928375244} +{"step": 3499, "train/loss": 2.1375678181648254, "train/lm_loss": 2.1375678181648254, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.2500000000000006e-05, "perf/tokens_per_sec": 26243.271763521574, "train/loss_prose": 3.2867352962493896, "train/loss_math": 2.1269978682200112, "train/loss_code": 1.3820263147354126} +{"step": 3500, "train/loss": 2.3506704568862915, "train/lm_loss": 2.3506704568862915, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.2484888052699939e-05, "perf/tokens_per_sec": 26097.880228350594, "train/loss_math": 2.048859794934591, "train/loss_code": 1.4304649233818054, "train/loss_prose": 3.265951156616211} +{"step": 3501, "train/loss": 2.260823369026184, "train/lm_loss": 2.260823369026184, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.24697822051134e-05, "perf/tokens_per_sec": 25524.374156300095, "train/loss_code": 1.7654702186584472, "train/loss_math": 2.2579588890075684, "train/loss_prose": 3.500638723373413} +{"step": 3502, "train/loss": 2.1665433049201965, "train/lm_loss": 2.1665433049201965, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.2454682464602788e-05, "perf/tokens_per_sec": 26667.41565317815, "train/loss_prose": 3.1309704780578613, "train/loss_math": 2.0544349253177643, "train/loss_code": 1.4263327717781067} +{"step": 3503, "train/loss": 2.8388906717300415, "train/lm_loss": 2.8388906717300415, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.243958883852755e-05, "perf/tokens_per_sec": 27159.358009858384, "train/loss_prose": 3.602462720870972, "train/loss_code": 1.4334348440170288, "train/loss_math": 1.8319419622421265} +{"step": 3504, "train/loss": 2.2090576589107513, "train/lm_loss": 2.2090576589107513, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.2424501334244123e-05, "perf/tokens_per_sec": 26238.301773313397, "train/loss_math": 2.1291534900665283, "train/loss_prose": 3.082057237625122, "train/loss_code": 1.019414335489273} +{"step": 3505, "train/loss": 2.2946030497550964, "train/lm_loss": 2.2946030497550964, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.2409419959105981e-05, "perf/tokens_per_sec": 26060.192501039077, "train/loss_code": 1.245689570903778, "train/loss_prose": 3.65614652633667, "train/loss_math": 2.4056265354156494} +{"step": 3506, "train/loss": 3.093836784362793, "train/lm_loss": 3.093836784362793, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.2394344720463597e-05, "perf/tokens_per_sec": 26478.84093925079, "train/loss_prose": 3.577981424331665, "train/loss_math": 2.286929210027059} +{"step": 3507, "train/loss": 2.2452996969223022, "train/lm_loss": 2.2452996969223022, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.2379275625664461e-05, "perf/tokens_per_sec": 26427.965837209875, "train/loss_code": 1.932373285293579, "train/loss_math": 2.1801992416381837, "train/loss_prose": 3.1966545581817627} +{"step": 3508, "train/loss": 2.9014371037483215, "train/lm_loss": 2.9014371037483215, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.2364212682053068e-05, "perf/tokens_per_sec": 26064.54161255477, "train/loss_math": 2.3597359657287598, "train/loss_prose": 3.4555995941162108, "train/loss_code": 1.214028000831604} +{"step": 3509, "train/loss": 2.046770989894867, "train/lm_loss": 2.046770989894867, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.234915589697091e-05, "perf/tokens_per_sec": 25981.645119481695, "train/loss_prose": 3.8496508598327637, "train/loss_math": 2.0505725741386414, "train/loss_code": 1.4407422542572021} +{"step": 3510, "train/loss": 2.291222423315048, "train/lm_loss": 2.291222423315048, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.2334105277756486e-05, "perf/tokens_per_sec": 26032.9053337556, "train/loss_code": 1.4228486220041912, "train/loss_prose": 3.3682099978129068, "train/loss_math": 1.978301227092743} +{"step": 3511, "train/loss": 2.746929883956909, "train/lm_loss": 2.746929883956909, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.2319060831745272e-05, "perf/tokens_per_sec": 26199.92799387545, "train/loss_math": 2.4314736127853394, "train/loss_prose": 3.5765397548675537, "train/loss_code": 1.4031664729118347} +{"step": 3512, "train/loss": 1.7132816314697266, "train/lm_loss": 1.7132816314697266, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.2304022566269768e-05, "perf/tokens_per_sec": 26170.23453461413, "train/loss_math": 2.1822431484858194, "train/loss_code": 1.4319046020507813} +{"step": 3513, "train/loss": 2.593346267938614, "train/lm_loss": 2.593346267938614, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.2288990488659433e-05, "perf/tokens_per_sec": 26316.354994707584, "train/loss_code": 1.8877756595611572, "train/loss_prose": 3.5951383113861084, "train/loss_math": 2.0183947384357452} +{"step": 3514, "train/loss": 2.1403229236602783, "train/lm_loss": 2.1403229236602783, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.2273964606240718e-05, "perf/tokens_per_sec": 26972.973728625955, "train/loss_prose": 3.4143519401550293, "train/loss_code": 0.8878773152828217, "train/loss_math": 2.1295311748981476} +{"step": 3515, "train/loss": 2.6720008552074432, "train/lm_loss": 2.6720008552074432, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.2258944926337057e-05, "perf/tokens_per_sec": 26047.667266566095, "train/loss_prose": 3.5213274359703064, "train/loss_math": 1.9964423179626465, "train/loss_code": 1.301370620727539} +{"step": 3516, "train/loss": 1.9079580307006836, "train/lm_loss": 1.9079580307006836, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.2243931456268859e-05, "perf/tokens_per_sec": 25743.725382187844, "train/loss_code": 1.4768541653951008, "train/loss_math": 2.1666203498840333} +{"step": 3517, "train/loss": 2.2399606704711914, "train/lm_loss": 2.2399606704711914, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.2228924203353506e-05, "perf/tokens_per_sec": 25984.43521084155, "train/loss_prose": 3.1613197326660156, "train/loss_code": 1.364705463250478, "train/loss_math": 2.170804738998413} +{"step": 3518, "train/loss": 2.4803807139396667, "train/lm_loss": 2.4803807139396667, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.2213923174905354e-05, "perf/tokens_per_sec": 25977.245066478365, "train/loss_prose": 3.306557059288025, "train/loss_math": 2.0222620964050293, "train/loss_code": 1.5315184195836384} +{"step": 3519, "train/loss": 2.1436803340911865, "train/lm_loss": 2.1436803340911865, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.2198928378235716e-05, "perf/tokens_per_sec": 26065.213877480426, "train/loss_prose": 3.204357385635376, "train/loss_math": 2.125452011823654, "train/loss_code": 1.1194594502449036} +{"step": 3520, "train/loss": 2.187246710062027, "train/lm_loss": 2.187246710062027, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.2183939820652879e-05, "perf/tokens_per_sec": 26166.16890253561, "train/loss_prose": 3.3768726189931235, "train/loss_code": 1.1444013714790344, "train/loss_math": 1.9670757055282593} +{"step": 3521, "train/loss": 2.73833367228508, "train/lm_loss": 2.73833367228508, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.2168957509462073e-05, "perf/tokens_per_sec": 26192.857717422958, "train/loss_prose": 3.626427412033081, "train/loss_code": 1.8502395749092102} +{"step": 3522, "train/loss": 2.3187226057052612, "train/lm_loss": 2.3187226057052612, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.2153981451965512e-05, "perf/tokens_per_sec": 26234.134845701494, "train/loss_prose": 3.049321949481964, "train/loss_math": 2.362638235092163, "train/loss_code": 1.3299514055252075} +{"step": 3523, "train/loss": 2.6731643676757812, "train/lm_loss": 2.6731643676757812, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.2139011655462337e-05, "perf/tokens_per_sec": 26114.899110292965, "train/loss_prose": 3.5657419562339783, "train/loss_math": 1.9107071161270142, "train/loss_code": 1.650466799736023} +{"step": 3524, "train/loss": 2.4862890243530273, "train/lm_loss": 2.4862890243530273, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.2124048127248644e-05, "perf/tokens_per_sec": 25935.792850241545, "train/loss_math": 2.209409157435099, "train/loss_code": 1.6615302562713623, "train/loss_prose": 3.3130077521006265} +{"step": 3525, "train/loss": 2.4607934057712555, "train/lm_loss": 2.4607934057712555, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.2109090874617476e-05, "perf/tokens_per_sec": 26938.250386515094, "train/loss_code": 1.138059099515279, "train/loss_prose": 4.050745010375977, "train/loss_math": 2.0599673986434937} +{"step": 3526, "train/loss": 2.4438998699188232, "train/lm_loss": 2.4438998699188232, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.2094139904858814e-05, "perf/tokens_per_sec": 26108.866571733823, "train/loss_prose": 3.4211934407552085, "train/loss_code": 1.7188351551691692, "train/loss_math": 2.065556585788727} +{"step": 3527, "train/loss": 2.1290323436260223, "train/lm_loss": 2.1290323436260223, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.2079195225259579e-05, "perf/tokens_per_sec": 25906.147832040027, "train/loss_code": 1.5940327942371368, "train/loss_prose": 3.763803243637085, "train/loss_math": 2.297441562016805} +{"step": 3528, "train/loss": 2.030297964811325, "train/lm_loss": 2.030297964811325, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.206425684310363e-05, "perf/tokens_per_sec": 25877.077199530653, "train/loss_code": 1.4872075319290161, "train/loss_math": 1.9197040796279907, "train/loss_prose": 3.227072238922119} +{"step": 3529, "train/loss": 2.430368721485138, "train/lm_loss": 2.430368721485138, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.2049324765671749e-05, "perf/tokens_per_sec": 26136.94950570666, "train/loss_code": 1.7917945981025696, "train/loss_prose": 3.581239938735962, "train/loss_math": 2.1742202043533325} +{"step": 3530, "train/loss": 2.5048784613609314, "train/lm_loss": 2.5048784613609314, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.2034399000241654e-05, "perf/tokens_per_sec": 25902.63247533351, "train/loss_code": 1.32503346602122, "train/loss_prose": 3.5709117650985718, "train/loss_math": 1.7802799940109253} +{"step": 3531, "train/loss": 2.548486888408661, "train/lm_loss": 2.548486888408661, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.2019479554087964e-05, "perf/tokens_per_sec": 25338.217910333293, "train/loss_code": 1.2228520115216572, "train/loss_prose": 3.3438680171966553} +{"step": 3532, "train/loss": 1.9611680507659912, "train/lm_loss": 1.9611680507659912, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.2004566434482261e-05, "perf/tokens_per_sec": 25567.33604188432, "train/loss_math": 2.1261072158813477, "train/loss_prose": 2.8336617946624756, "train/loss_code": 1.1125730276107788} +{"step": 3533, "train/loss": 2.7661995887756348, "train/lm_loss": 2.7661995887756348, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.1989659648693016e-05, "perf/tokens_per_sec": 26188.266079687324, "train/loss_prose": 3.636531710624695, "train/loss_code": 1.6109859943389893, "train/loss_math": 2.180749535560608} +{"step": 3534, "train/loss": 2.31766340136528, "train/lm_loss": 2.31766340136528, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.19747592039856e-05, "perf/tokens_per_sec": 26045.92969971149, "train/loss_prose": 3.848088502883911, "train/loss_code": 1.399408221244812} +{"step": 3535, "train/loss": 2.2711069583892822, "train/lm_loss": 2.2711069583892822, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.1959865107622307e-05, "perf/tokens_per_sec": 25797.110016952945, "train/loss_prose": 3.4378490447998047, "train/loss_code": 1.193321406841278, "train/loss_math": 2.226628839969635} +{"step": 3536, "train/loss": 1.903952181339264, "train/lm_loss": 1.903952181339264, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.1944977366862353e-05, "perf/tokens_per_sec": 26568.31446731516, "train/loss_math": 2.056883215904236, "train/loss_code": 1.2314259211222331, "train/loss_prose": 3.309805393218994} +{"step": 3537, "train/loss": 2.4030052423477173, "train/lm_loss": 2.4030052423477173, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.1930095988961836e-05, "perf/tokens_per_sec": 25697.01697541713, "train/loss_math": 2.463552951812744, "train/loss_code": 1.5255535393953323, "train/loss_prose": 3.552757978439331} +{"step": 3538, "train/loss": 2.4769590497016907, "train/lm_loss": 2.4769590497016907, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.1915220981173764e-05, "perf/tokens_per_sec": 25744.033997968036, "train/loss_prose": 3.4658520221710205, "train/loss_math": 2.068402111530304, "train/loss_code": 1.7604373693466187} +{"step": 3539, "train/loss": 1.9730870425701141, "train/lm_loss": 1.9730870425701141, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.1900352350748026e-05, "perf/tokens_per_sec": 25465.76797218014, "train/loss_math": 2.1721014976501465, "train/loss_code": 1.292475402355194, "train/loss_prose": 3.1352962255477905} +{"step": 3540, "train/loss": 2.0687185525894165, "train/lm_loss": 2.0687185525894165, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.1885490104931406e-05, "perf/tokens_per_sec": 26269.716480626317, "train/loss_math": 1.896656095981598, "train/loss_prose": 3.152715563774109, "train/loss_code": 1.6127511262893677} +{"step": 3541, "train/loss": 2.0755225121974945, "train/lm_loss": 2.0755225121974945, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.1870634250967605e-05, "perf/tokens_per_sec": 25281.987820993923, "train/loss_code": 1.5994003613789876, "train/loss_math": 2.1278074979782104, "train/loss_prose": 3.2947487831115723} +{"step": 3542, "train/loss": 2.313602477312088, "train/lm_loss": 2.313602477312088, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.185578479609718e-05, "perf/tokens_per_sec": 25656.37693357666, "train/loss_code": 1.8780522346496582, "train/loss_prose": 3.1274927854537964, "train/loss_math": 2.124432325363159} +{"step": 3543, "train/loss": 1.6052935719490051, "train/lm_loss": 1.6052935719490051, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.1840941747557558e-05, "perf/tokens_per_sec": 26171.669765245344, "train/loss_code": 1.3319173455238342, "train/loss_math": 1.9802438020706177, "train/loss_prose": 2.8706014156341553} +{"step": 3544, "train/loss": 2.2256025075912476, "train/lm_loss": 2.2256025075912476, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.1826105112583061e-05, "perf/tokens_per_sec": 25775.55172403446, "train/loss_code": 1.3543856541315715, "train/loss_prose": 3.610653877258301, "train/loss_math": 2.1734513441721597} +{"step": 3545, "train/loss": 2.6815937757492065, "train/lm_loss": 2.6815937757492065, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.1811274898404903e-05, "perf/tokens_per_sec": 26582.29151619242, "train/loss_math": 2.1123308340708413, "train/loss_prose": 3.4573245644569397, "train/loss_code": 1.2864595651626587} +{"step": 3546, "train/loss": 2.0268428921699524, "train/lm_loss": 2.0268428921699524, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.1796451112251141e-05, "perf/tokens_per_sec": 25899.039985527783, "train/loss_code": 1.2185605615377426, "train/loss_prose": 3.3299899101257324, "train/loss_math": 2.3402607440948486} +{"step": 3547, "train/loss": 1.957526683807373, "train/lm_loss": 1.957526683807373, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.1781633761346707e-05, "perf/tokens_per_sec": 25733.737942273903, "train/loss_math": 2.0405726432800293, "train/loss_code": 1.3490563035011292, "train/loss_prose": 4.142270565032959} +{"step": 3548, "train/loss": 2.3862432837486267, "train/lm_loss": 2.3862432837486267, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.1766822852913403e-05, "perf/tokens_per_sec": 25592.96915561925, "train/loss_math": 2.0870173931121827, "train/loss_code": 1.1563035249710083, "train/loss_prose": 3.7492778301239014} +{"step": 3549, "train/loss": 2.1351528763771057, "train/lm_loss": 2.1351528763771057, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.175201839416988e-05, "perf/tokens_per_sec": 25620.602199093577, "train/loss_code": 1.9049472510814667, "train/loss_math": 2.0831859906514487, "train/loss_prose": 3.2118756771087646} +{"step": 3550, "train/loss": 2.085839033126831, "train/lm_loss": 2.085839033126831, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.1737220392331644e-05, "perf/tokens_per_sec": 26501.101676770482, "train/loss_prose": 3.4608575105667114, "train/loss_math": 2.1752984523773193, "train/loss_code": 1.079700728257497} +{"step": 3551, "train/loss": 2.678303837776184, "train/lm_loss": 2.678303837776184, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.172242885461109e-05, "perf/tokens_per_sec": 25716.21119187613, "train/loss_code": 1.7844022512435913, "train/loss_prose": 3.1787272930145263, "train/loss_math": 1.9639900922775269} +{"step": 3552, "train/loss": 2.0426471829414368, "train/lm_loss": 2.0426471829414368, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.1707643788217404e-05, "perf/tokens_per_sec": 25883.5490322221, "train/loss_math": 2.269119699796041, "train/loss_prose": 3.1054751873016357, "train/loss_code": 1.1076227227846782} +{"step": 3553, "train/loss": 2.261903792619705, "train/lm_loss": 2.261903792619705, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.1692865200356659e-05, "perf/tokens_per_sec": 26143.154160567057, "train/loss_prose": 3.498657703399658, "train/loss_math": 2.0620195468266806, "train/loss_code": 1.6372852722803752} +{"step": 3554, "train/loss": 2.70027893781662, "train/lm_loss": 2.70027893781662, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.167809309823175e-05, "perf/tokens_per_sec": 26052.328249703914, "train/loss_prose": 3.2164329051971436, "train/loss_code": 1.8400225639343262} +{"step": 3555, "train/loss": 2.0815410912036896, "train/lm_loss": 2.0815410912036896, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.1663327489042435e-05, "perf/tokens_per_sec": 26217.000257899115, "train/loss_prose": 2.9097663164138794, "train/loss_math": 1.9551671743392944, "train/loss_code": 1.5060638189315796} +{"step": 3556, "train/loss": 2.448296546936035, "train/lm_loss": 2.448296546936035, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.1648568379985289e-05, "perf/tokens_per_sec": 26073.442045338994, "train/loss_code": 1.515242099761963, "train/loss_math": 2.0823370933532717, "train/loss_prose": 3.829722285270691} +{"step": 3557, "train/loss": 2.4533492028713226, "train/lm_loss": 2.4533492028713226, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.163381577825372e-05, "perf/tokens_per_sec": 25984.592416918247, "train/loss_code": 1.0647404392560322, "train/loss_prose": 3.5247210264205933, "train/loss_math": 2.333688259124756} +{"step": 3558, "train/loss": 2.075596809387207, "train/lm_loss": 2.075596809387207, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.1619069691037965e-05, "perf/tokens_per_sec": 26202.365527307556, "train/loss_math": 2.1037411987781525, "train/loss_prose": 3.6940228939056396, "train/loss_code": 1.4985960324605305} +{"step": 3559, "train/loss": 2.1854312121868134, "train/lm_loss": 2.1854312121868134, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.1604330125525079e-05, "perf/tokens_per_sec": 26142.557431131794, "train/loss_code": 1.123903512954712, "train/loss_math": 2.1358864307403564, "train/loss_prose": 3.617316246032715} +{"step": 3560, "train/loss": 1.8881441354751587, "train/lm_loss": 1.8881441354751587, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.1589597088898976e-05, "perf/tokens_per_sec": 25999.338939830293, "train/loss_code": 1.4979815244674684, "train/loss_math": 1.978186309337616, "train/loss_prose": 3.65887188911438} +{"step": 3561, "train/loss": 2.8327418863773346, "train/lm_loss": 2.8327418863773346, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.1574870588340333e-05, "perf/tokens_per_sec": 26431.421913631057, "train/loss_prose": 3.5772993564605713, "train/loss_code": 1.5918127298355103} +{"step": 3562, "train/loss": 3.018102705478668, "train/lm_loss": 3.018102705478668, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.1560150631026676e-05, "perf/tokens_per_sec": 26340.483521740265, "train/loss_prose": 3.590696668624878, "train/loss_code": 1.5564074516296387, "train/loss_math": 2.317465305328369} +{"step": 3563, "train/loss": 1.7971819043159485, "train/lm_loss": 1.7971819043159485, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.1545437224132318e-05, "perf/tokens_per_sec": 25994.146255255986, "train/loss_math": 2.2323474884033203, "train/loss_code": 1.0719058513641357} +{"step": 3564, "train/loss": 2.5065139532089233, "train/lm_loss": 2.5065139532089233, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.1530730374828422e-05, "perf/tokens_per_sec": 26045.376886143844, "train/loss_code": 1.6296396851539612, "train/loss_math": 2.025892972946167, "train/loss_prose": 3.1852614283561707} +{"step": 3565, "train/loss": 2.808716833591461, "train/lm_loss": 2.808716833591461, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.1516030090282914e-05, "perf/tokens_per_sec": 26205.962716471597, "train/loss_prose": 3.8242833614349365, "train/loss_math": 2.0673855543136597, "train/loss_code": 0.9704445004463196} +{"step": 3566, "train/loss": 2.0981126725673676, "train/lm_loss": 2.0981126725673676, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.1501336377660537e-05, "perf/tokens_per_sec": 26831.429532601684, "train/loss_math": 1.9554652452468873, "train/loss_code": 1.4689382314682007, "train/loss_prose": 4.069697856903076} +{"step": 3567, "train/loss": 2.5635741651058197, "train/lm_loss": 2.5635741651058197, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.1486649244122824e-05, "perf/tokens_per_sec": 26793.099217878836, "train/loss_prose": 3.586369434992472, "train/loss_code": 1.8057247996330261, "train/loss_math": 2.046011726061503} +{"step": 3568, "train/loss": 2.575839340686798, "train/lm_loss": 2.575839340686798, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.1471968696828106e-05, "perf/tokens_per_sec": 26041.428900142942, "train/loss_prose": 3.2810401916503906, "train/loss_math": 2.153386026620865, "train/loss_code": 2.150049924850464} +{"step": 3569, "train/loss": 2.5972899794578552, "train/lm_loss": 2.5972899794578552, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.1457294742931507e-05, "perf/tokens_per_sec": 26510.58916915366, "train/loss_prose": 3.963095784187317, "train/loss_math": 2.236179971694946, "train/loss_code": 1.6712275743484497} +{"step": 3570, "train/loss": 1.9994326531887054, "train/lm_loss": 1.9994326531887054, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.1442627389584932e-05, "perf/tokens_per_sec": 26229.328496596114, "train/loss_code": 1.5202588240305583, "train/loss_prose": 3.4369544982910156} +{"step": 3571, "train/loss": 2.2799811959266663, "train/lm_loss": 2.2799811959266663, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.1427966643937069e-05, "perf/tokens_per_sec": 25969.23451243827, "train/loss_math": 2.1643922924995422, "train/loss_code": 1.3841025829315186, "train/loss_prose": 3.4070374965667725} +{"step": 3572, "train/loss": 3.270001709461212, "train/lm_loss": 3.270001709461212, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.141331251313339e-05, "perf/tokens_per_sec": 26543.151774526723, "train/loss_prose": 3.422884157725743, "train/loss_math": 2.1998236179351807} +{"step": 3573, "train/loss": 2.028448671102524, "train/lm_loss": 2.028448671102524, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.1398665004316128e-05, "perf/tokens_per_sec": 26315.750332395884, "train/loss_math": 2.22044438123703, "train/loss_code": 1.436948835849762, "train/loss_prose": 3.019452452659607} +{"step": 3574, "train/loss": 2.546998143196106, "train/lm_loss": 2.546998143196106, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.1384024124624324e-05, "perf/tokens_per_sec": 26887.32152035032, "train/loss_code": 1.8681361079216003, "train/loss_math": 1.9439456462860107, "train/loss_prose": 3.602625528971354} +{"step": 3575, "train/loss": 2.265244871377945, "train/lm_loss": 2.265244871377945, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.1369389881193749e-05, "perf/tokens_per_sec": 26152.58610248908, "train/loss_code": 1.3990322649478912, "train/loss_prose": 3.4800304571787515, "train/loss_math": 2.085737705230713} +{"step": 3576, "train/loss": 2.1372437477111816, "train/lm_loss": 2.1372437477111816, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.135476228115696e-05, "perf/tokens_per_sec": 26856.59622722917, "train/loss_code": 1.775432825088501, "train/loss_prose": 2.806185007095337, "train/loss_math": 2.1919240951538086} +{"step": 3577, "train/loss": 2.510378748178482, "train/lm_loss": 2.510378748178482, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.1340141331643276e-05, "perf/tokens_per_sec": 25956.522017130228, "train/loss_math": 2.2226306200027466, "train/loss_prose": 3.418298602104187, "train/loss_code": 0.9822866320610046} +{"step": 3578, "train/loss": 2.072016566991806, "train/lm_loss": 2.072016566991806, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.132552703977876e-05, "perf/tokens_per_sec": 26660.04947820082, "train/loss_code": 1.356278936068217, "train/loss_prose": 3.170307993888855, "train/loss_math": 2.055559992790222} +{"step": 3579, "train/loss": 2.841780126094818, "train/lm_loss": 2.841780126094818, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.1310919412686247e-05, "perf/tokens_per_sec": 26193.1771956011, "train/loss_prose": 3.3769763708114624, "train/loss_math": 2.3065839409828186} +{"step": 3580, "train/loss": 1.820546805858612, "train/lm_loss": 1.820546805858612, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.1296318457485317e-05, "perf/tokens_per_sec": 26358.346107051293, "train/loss_prose": 3.4756059646606445, "train/loss_math": 1.796200156211853, "train/loss_code": 1.0051904916763306} +{"step": 3581, "train/loss": 2.5562247931957245, "train/lm_loss": 2.5562247931957245, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.1281724181292294e-05, "perf/tokens_per_sec": 26126.41629522163, "train/loss_code": 1.570883313814799, "train/loss_math": 2.2652785778045654, "train/loss_prose": 3.367967367172241} +{"step": 3582, "train/loss": 1.9659894704818726, "train/lm_loss": 1.9659894704818726, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.1267136591220257e-05, "perf/tokens_per_sec": 26169.79602331841, "train/loss_prose": 3.4251534938812256, "train/loss_math": 2.1538437008857727, "train/loss_code": 1.2291287581125896} +{"step": 3583, "train/loss": 2.58394318819046, "train/lm_loss": 2.58394318819046, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.1252555694379006e-05, "perf/tokens_per_sec": 26633.845680648767, "train/loss_code": 1.4587934613227844, "train/loss_math": 1.9003943800926208, "train/loss_prose": 3.4882922768592834} +{"step": 3584, "train/loss": 2.3501569032669067, "train/lm_loss": 2.3501569032669067, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.123798149787511e-05, "perf/tokens_per_sec": 26336.162960464797, "train/loss_code": 1.275665283203125, "train/loss_math": 2.2654653787612915, "train/loss_prose": 3.5940314531326294} +{"step": 3585, "train/loss": 2.9491156339645386, "train/lm_loss": 2.9491156339645386, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.122341400881185e-05, "perf/tokens_per_sec": 26513.6167811792, "train/loss_prose": 3.6037456512451174, "train/loss_math": 2.0609492659568787, "train/loss_code": 1.4522974491119385} +{"step": 3586, "train/loss": 2.7074015140533447, "train/lm_loss": 2.7074015140533447, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.1208853234289246e-05, "perf/tokens_per_sec": 26292.87421564606, "train/loss_prose": 3.0306572914123535, "train/loss_math": 2.1686420838038125} +{"step": 3587, "train/loss": 2.301448106765747, "train/lm_loss": 2.301448106765747, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.1194299181404036e-05, "perf/tokens_per_sec": 27360.400668885668, "train/loss_code": 1.4913982550303142, "train/loss_math": 2.1999069849650064, "train/loss_prose": 3.6688343286514282} +{"step": 3588, "train/loss": 2.6210675835609436, "train/lm_loss": 2.6210675835609436, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.117975185724969e-05, "perf/tokens_per_sec": 26057.030422345513, "train/loss_prose": 3.1855420589447023, "train/loss_code": 1.3885670900344849, "train/loss_math": 2.263695240020752} +{"step": 3589, "train/loss": 2.7203410267829895, "train/lm_loss": 2.7203410267829895, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.11652112689164e-05, "perf/tokens_per_sec": 26280.486183518125, "train/loss_prose": 3.474085569381714, "train/loss_math": 2.175453305244446, "train/loss_code": 1.3400253057479858} +{"step": 3590, "train/loss": 2.194804161787033, "train/lm_loss": 2.194804161787033, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.1150677423491073e-05, "perf/tokens_per_sec": 26502.123709399995, "train/loss_prose": 3.2883188724517822, "train/loss_code": 1.2137610117594402, "train/loss_math": 2.0260967016220093} +{"step": 3591, "train/loss": 2.215206652879715, "train/lm_loss": 2.215206652879715, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.1136150328057324e-05, "perf/tokens_per_sec": 26104.85964960265, "train/loss_prose": 3.8052121798197427, "train/loss_code": 1.2612034201622009} +{"step": 3592, "train/loss": 2.6142790615558624, "train/lm_loss": 2.6142790615558624, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.1121629989695476e-05, "perf/tokens_per_sec": 26025.806622845623, "train/loss_code": 1.7928953965504963, "train/loss_math": 2.2149195671081543, "train/loss_prose": 4.445393800735474} +{"step": 3593, "train/loss": 2.3636262118816376, "train/lm_loss": 2.3636262118816376, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.1107116415482586e-05, "perf/tokens_per_sec": 25884.367988718066, "train/loss_prose": 2.9397316575050354, "train/loss_code": 1.4015378952026367, "train/loss_math": 2.1735036373138428} +{"step": 3594, "train/loss": 1.9036574065685272, "train/lm_loss": 1.9036574065685272, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.109260961249238e-05, "perf/tokens_per_sec": 26386.928055907538, "train/loss_code": 1.4693758934736252, "train/loss_math": 1.9878534475962322, "train/loss_prose": 3.388195037841797} +{"step": 3595, "train/loss": 2.52827051281929, "train/lm_loss": 2.52827051281929, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.107810958779531e-05, "perf/tokens_per_sec": 26911.11773468694, "train/loss_prose": 3.3444244861602783, "train/loss_math": 2.2630630334218345, "train/loss_code": 1.7018510103225708} +{"step": 3596, "train/loss": 2.1869358718395233, "train/lm_loss": 2.1869358718395233, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.1063616348458483e-05, "perf/tokens_per_sec": 26005.124144382517, "train/loss_math": 2.074822688102722, "train/loss_code": 1.8308247923851013, "train/loss_prose": 3.459723711013794} +{"step": 3597, "train/loss": 1.8926503956317902, "train/lm_loss": 1.8926503956317902, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.1049129901545755e-05, "perf/tokens_per_sec": 25903.921328679236, "train/loss_math": 2.329841136932373, "train/loss_code": 1.7469200094540913} +{"step": 3598, "train/loss": 2.4596335887908936, "train/lm_loss": 2.4596335887908936, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.1034650254117637e-05, "perf/tokens_per_sec": 26023.953707911463, "train/loss_math": 2.2448278268178306, "train/loss_prose": 3.458163340886434, "train/loss_code": 1.2840476632118225} +{"step": 3599, "train/loss": 2.5292750895023346, "train/lm_loss": 2.5292750895023346, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.1020177413231334e-05, "perf/tokens_per_sec": 25970.41222523382, "train/loss_code": 1.6064231594403584, "train/loss_prose": 4.308449983596802, "train/loss_math": 2.2660106817881265} +{"step": 3600, "train/loss": 2.2985856235027313, "train/lm_loss": 2.2985856235027313, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.1005711385940729e-05, "perf/tokens_per_sec": 25880.897528641693, "train/loss_prose": 3.689757982889811, "train/loss_code": 1.3103739321231842, "train/loss_math": 2.077915668487549} +{"step": 3600, "eval/loss": 2.163033887712841, "eval/lm_loss": 2.163033887712841, "eval/lb_loss": 0.0, "eval/z_loss": 0.0, "eval/perplexity": 8.697484863483263, "eval/loss_code": 1.558188212720771, "eval/ppl_code": 4.750207079216293, "eval/loss_prose": 3.484566771147544, "eval/ppl_prose": 32.60829719416288, "eval/loss_math": 2.037863456096846, "eval/ppl_math": 7.674195415312026} +{"step": 3601, "train/loss": 2.8415316939353943, "train/lm_loss": 2.8415316939353943, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.0991252179296388e-05, "perf/tokens_per_sec": 26306.482321848023, "train/loss_prose": 3.339218521118164, "train/loss_math": 2.012053648630778} +{"step": 3602, "train/loss": 2.34572172164917, "train/lm_loss": 2.34572172164917, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.097679980034555e-05, "perf/tokens_per_sec": 26802.169750447745, "train/loss_math": 2.286307382583618, "train/loss_code": 1.8655040860176086, "train/loss_prose": 3.6032280921936035} +{"step": 3603, "train/loss": 2.5805068016052246, "train/lm_loss": 2.5805068016052246, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.0962354256132141e-05, "perf/tokens_per_sec": 26591.3844867963, "train/loss_math": 2.159097522497177, "train/loss_prose": 3.625812212626139, "train/loss_code": 1.13022780418396} +{"step": 3604, "train/loss": 2.356387138366699, "train/lm_loss": 2.356387138366699, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.0947915553696742e-05, "perf/tokens_per_sec": 26982.08172586122, "train/loss_math": 2.0052272379398346, "train/loss_code": 1.7291770577430725, "train/loss_prose": 3.6859169006347656} +{"step": 3605, "train/loss": 2.4083013236522675, "train/lm_loss": 2.4083013236522675, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.0933483700076591e-05, "perf/tokens_per_sec": 27058.02270812235, "train/loss_math": 2.356616973876953, "train/loss_code": 1.3542958498001099, "train/loss_prose": 3.5656750202178955} +{"step": 3606, "train/loss": 1.6286888122558594, "train/lm_loss": 1.6286888122558594, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.091905870230559e-05, "perf/tokens_per_sec": 26730.068107736377, "train/loss_prose": 3.3071706295013428, "train/loss_code": 1.0992841362953185, "train/loss_math": 2.112959921360016} +{"step": 3607, "train/loss": 1.8525247275829315, "train/lm_loss": 1.8525247275829315, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.0904640567414332e-05, "perf/tokens_per_sec": 26510.50735139806, "train/loss_code": 1.334700334072113, "train/loss_math": 2.1416606307029724, "train/loss_prose": 3.863375186920166} +{"step": 3608, "train/loss": 2.1439772844314575, "train/lm_loss": 2.1439772844314575, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.0890229302430028e-05, "perf/tokens_per_sec": 27027.88421657083, "train/loss_math": 2.1046961545944214, "train/loss_code": 1.3160425424575806, "train/loss_prose": 3.444800853729248} +{"step": 3609, "train/loss": 2.0433057248592377, "train/lm_loss": 2.0433057248592377, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.0875824914376553e-05, "perf/tokens_per_sec": 27155.451417768778, "train/loss_prose": 3.2775444984436035, "train/loss_code": 1.5337193846702575, "train/loss_math": 2.1227598190307617} +{"step": 3610, "train/loss": 2.4452208280563354, "train/lm_loss": 2.4452208280563354, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.0861427410274436e-05, "perf/tokens_per_sec": 27076.27464188281, "train/loss_code": 1.6262336174647014, "train/loss_prose": 3.4938790003458657, "train/loss_math": 2.100714385509491} +{"step": 3611, "train/loss": 2.3672374188899994, "train/lm_loss": 2.3672374188899994, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.0847036797140831e-05, "perf/tokens_per_sec": 27148.413803129835, "train/loss_prose": 3.554584344228109, "train/loss_code": 1.6548293113708497} +{"step": 3612, "train/loss": 2.1734327375888824, "train/lm_loss": 2.1734327375888824, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.0832653081989566e-05, "perf/tokens_per_sec": 27093.141162963802, "train/loss_code": 1.5915019313494365, "train/loss_math": 2.100657820701599, "train/loss_prose": 3.1554914712905884} +{"step": 3613, "train/loss": 2.327167123556137, "train/lm_loss": 2.327167123556137, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.0818276271831093e-05, "perf/tokens_per_sec": 26988.73656875975, "train/loss_prose": 3.5211522579193115, "train/loss_math": 2.0265100240707397, "train/loss_code": 1.4424818754196167} +{"step": 3614, "train/loss": 2.3122406601905823, "train/lm_loss": 2.3122406601905823, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.0803906373672476e-05, "perf/tokens_per_sec": 27192.07147865533, "train/loss_math": 2.3905107021331786, "train/loss_prose": 3.918830156326294, "train/loss_code": 1.313270926475525} +{"step": 3615, "train/loss": 2.238083630800247, "train/lm_loss": 2.238083630800247, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.0789543394517435e-05, "perf/tokens_per_sec": 27009.527587502573, "train/loss_code": 1.32994149128596, "train/loss_math": 1.9917071461677551, "train/loss_prose": 3.310476859410604} +{"step": 3616, "train/loss": 2.415694832801819, "train/lm_loss": 2.415694832801819, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.0775187341366303e-05, "perf/tokens_per_sec": 27196.634405112625, "train/loss_math": 1.948635458946228, "train/loss_prose": 3.406570792198181, "train/loss_code": 0.9010018408298492} +{"step": 3617, "train/loss": 2.1507810056209564, "train/lm_loss": 2.1507810056209564, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.0760838221216065e-05, "perf/tokens_per_sec": 27251.682120507918, "train/loss_prose": 3.232558250427246, "train/loss_math": 2.222802480061849, "train/loss_code": 1.3575745423634846} +{"step": 3618, "train/loss": 2.371761053800583, "train/lm_loss": 2.371761053800583, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.07464960410603e-05, "perf/tokens_per_sec": 27092.45755377531, "train/loss_code": 1.7527338862419128, "train/loss_prose": 3.2967097759246826, "train/loss_math": 1.8594967524210613} +{"step": 3619, "train/loss": 2.395241141319275, "train/lm_loss": 2.395241141319275, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.0732160807889211e-05, "perf/tokens_per_sec": 27132.5496483639, "train/loss_prose": 3.192180315653483, "train/loss_code": 1.7930851380030315, "train/loss_math": 2.103066325187683} +{"step": 3620, "train/loss": 2.496879279613495, "train/lm_loss": 2.496879279613495, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.0717832528689617e-05, "perf/tokens_per_sec": 27229.610420239456, "train/loss_prose": 3.436405301094055, "train/loss_code": 0.9510538578033447, "train/loss_math": 2.4302340030670164} +{"step": 3621, "train/loss": 2.178182542324066, "train/lm_loss": 2.178182542324066, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.0703511210444936e-05, "perf/tokens_per_sec": 27122.569028884653, "train/loss_prose": 3.471498489379883, "train/loss_code": 1.5963859558105469, "train/loss_math": 2.048459827899933} +{"step": 3622, "train/loss": 3.039798378944397, "train/lm_loss": 3.039798378944397, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.0689196860135234e-05, "perf/tokens_per_sec": 27191.899322729783, "train/loss_prose": 4.013782411813736, "train/loss_code": 1.7795661687850952, "train/loss_math": 2.1612300475438437} +{"step": 3623, "train/loss": 2.5999208092689514, "train/lm_loss": 2.5999208092689514, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.0674889484737125e-05, "perf/tokens_per_sec": 27157.855341428058, "train/loss_code": 1.6370101769765217, "train/loss_prose": 3.445867419242859, "train/loss_math": 2.104865312576294} +{"step": 3624, "train/loss": 2.466887414455414, "train/lm_loss": 2.466887414455414, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.0660589091223855e-05, "perf/tokens_per_sec": 27117.517266634466, "train/loss_math": 2.1107192635536194, "train/loss_prose": 3.449892044067383, "train/loss_code": 0.9425466060638428} +{"step": 3625, "train/loss": 1.9891015887260437, "train/lm_loss": 1.9891015887260437, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.0646295686565259e-05, "perf/tokens_per_sec": 27243.34163854566, "train/loss_math": 1.9982979595661163, "train/loss_prose": 3.3732571601867676, "train/loss_code": 1.5154544512430828} +{"step": 3626, "train/loss": 2.3645937740802765, "train/lm_loss": 2.3645937740802765, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.063200927772778e-05, "perf/tokens_per_sec": 27235.265314404838, "train/loss_code": 1.5003581494092941, "train/loss_prose": 3.5407764116923013, "train/loss_math": 2.2929887771606445} +{"step": 3627, "train/loss": 2.8736888766288757, "train/lm_loss": 2.8736888766288757, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.0617729871674436e-05, "perf/tokens_per_sec": 27062.199716774125, "train/loss_prose": 3.438940095901489, "train/loss_code": 1.6658129692077637, "train/loss_math": 2.064499020576477} +{"step": 3628, "train/loss": 2.121668815612793, "train/lm_loss": 2.121668815612793, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.0603457475364836e-05, "perf/tokens_per_sec": 26652.521977638396, "train/loss_math": 2.1944639682769775, "train/loss_code": 1.61365607380867, "train/loss_prose": 3.064899206161499} +{"step": 3629, "train/loss": 2.9327664971351624, "train/lm_loss": 2.9327664971351624, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.058919209575517e-05, "perf/tokens_per_sec": 27178.13390790687, "train/loss_prose": 3.411324222882589, "train/loss_code": 1.1647595167160034, "train/loss_math": 1.8294264078140259} +{"step": 3630, "train/loss": 1.85856431722641, "train/lm_loss": 1.85856431722641, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.0574933739798215e-05, "perf/tokens_per_sec": 27089.766177901332, "train/loss_math": 1.9593538045883179, "train/loss_code": 1.2166494131088257, "train/loss_prose": 3.0416042804718018} +{"step": 3631, "train/loss": 2.5850865244865417, "train/lm_loss": 2.5850865244865417, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.0560682414443315e-05, "perf/tokens_per_sec": 27064.92825533859, "train/loss_prose": 3.6599297523498535, "train/loss_code": 1.0735254883766174, "train/loss_math": 1.9469610452651978} +{"step": 3632, "train/loss": 2.7513028979301453, "train/lm_loss": 2.7513028979301453, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.0546438126636396e-05, "perf/tokens_per_sec": 27144.767694004404, "train/loss_math": 1.99260812997818, "train/loss_prose": 3.5242708325386047, "train/loss_code": 1.9640617370605469} +{"step": 3633, "train/loss": 2.1460036039352417, "train/lm_loss": 2.1460036039352417, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.0532200883319948e-05, "perf/tokens_per_sec": 27105.751391980866, "train/loss_prose": 3.2026641368865967, "train/loss_code": 1.762386679649353, "train/loss_math": 1.8565769791603088} +{"step": 3634, "train/loss": 2.3014939427375793, "train/lm_loss": 2.3014939427375793, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.0517970691433035e-05, "perf/tokens_per_sec": 27179.03683594368, "train/loss_code": 1.9125229120254517, "train/loss_math": 2.057462239265442, "train/loss_prose": 3.1060590744018555} +{"step": 3635, "train/loss": 2.787743031978607, "train/lm_loss": 2.787743031978607, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.050374755791127e-05, "perf/tokens_per_sec": 27209.858317600698, "train/loss_prose": 3.4509535789489747, "train/loss_code": 1.455816626548767, "train/loss_math": 2.13554310798645} +{"step": 3636, "train/loss": 2.4239881336688995, "train/lm_loss": 2.4239881336688995, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.0489531489686848e-05, "perf/tokens_per_sec": 27206.0664352497, "train/loss_prose": 2.9840484261512756, "train/loss_code": 1.8088876605033875, "train/loss_math": 1.9189680814743042} +{"step": 3637, "train/loss": 2.0777022540569305, "train/lm_loss": 2.0777022540569305, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.0475322493688504e-05, "perf/tokens_per_sec": 27239.022152882157, "train/loss_math": 1.9515055815378826, "train/loss_code": 1.3582379420598347, "train/loss_prose": 3.34619402885437} +{"step": 3638, "train/loss": 2.3077501952648163, "train/lm_loss": 2.3077501952648163, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.0461120576841533e-05, "perf/tokens_per_sec": 27192.544918690823, "train/loss_prose": 3.5240813493728638, "train/loss_code": 1.421144425868988, "train/loss_math": 2.1428872644901276} +{"step": 3639, "train/loss": 2.04431214928627, "train/lm_loss": 2.04431214928627, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.0446925746067768e-05, "perf/tokens_per_sec": 27051.802208565, "train/loss_math": 2.167150378227234, "train/loss_prose": 3.1674472093582153, "train/loss_code": 0.6755008101463318} +{"step": 3640, "train/loss": 2.503914564847946, "train/lm_loss": 2.503914564847946, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.0432738008285603e-05, "perf/tokens_per_sec": 27164.511278575923, "train/loss_prose": 3.254454791545868, "train/loss_math": 2.233380615711212, "train/loss_code": 1.2733675241470337} +{"step": 3641, "train/loss": 2.574130594730377, "train/lm_loss": 2.574130594730377, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.0418557370409965e-05, "perf/tokens_per_sec": 27206.0664352497, "train/loss_code": 2.020374119281769, "train/loss_prose": 3.227537751197815, "train/loss_math": 1.8210733532905579} +{"step": 3642, "train/loss": 2.7012212574481964, "train/lm_loss": 2.7012212574481964, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.040438383935233e-05, "perf/tokens_per_sec": 27221.28344099924, "train/loss_prose": 3.5607693195343018, "train/loss_code": 1.4766919612884521, "train/loss_math": 2.20665442943573} +{"step": 3643, "train/loss": 1.894919991493225, "train/lm_loss": 1.894919991493225, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.03902174220207e-05, "perf/tokens_per_sec": 26660.008106690504, "train/loss_code": 1.4924100041389465, "train/loss_math": 2.029090106487274} +{"step": 3644, "train/loss": 2.587723970413208, "train/lm_loss": 2.587723970413208, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.0376058125319613e-05, "perf/tokens_per_sec": 27050.4817856603, "train/loss_math": 2.200726419687271, "train/loss_code": 1.957502841949463, "train/loss_prose": 3.3137946923573813} +{"step": 3645, "train/loss": 2.9571920335292816, "train/lm_loss": 2.9571920335292816, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.0361905956150147e-05, "perf/tokens_per_sec": 27038.220789685736, "train/loss_prose": 3.6348355293273924, "train/loss_code": 1.8277866045633953} +{"step": 3646, "train/loss": 2.12644824385643, "train/lm_loss": 2.12644824385643, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.0347760921409894e-05, "perf/tokens_per_sec": 27037.156991393054, "train/loss_code": 1.5558128356933594, "train/loss_math": 2.1240495920181273, "train/loss_prose": 3.279712677001953} +{"step": 3647, "train/loss": 2.3108496367931366, "train/lm_loss": 2.3108496367931366, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.033362302799297e-05, "perf/tokens_per_sec": 27237.64020674129, "train/loss_math": 2.247541666030884, "train/loss_prose": 3.4307610988616943, "train/loss_code": 1.3175537586212158} +{"step": 3648, "train/loss": 2.506747990846634, "train/lm_loss": 2.506747990846634, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.0319492282790011e-05, "perf/tokens_per_sec": 27238.417534190976, "train/loss_prose": 3.4718550046284995, "train/loss_code": 1.7400974035263062, "train/loss_math": 2.2090632915496826} +{"step": 3649, "train/loss": 2.6621328592300415, "train/lm_loss": 2.6621328592300415, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.0305368692688174e-05, "perf/tokens_per_sec": 27211.409846867336, "train/loss_prose": 3.3579279581705728, "train/loss_math": 2.244655799865723} +{"step": 3650, "train/loss": 2.476939618587494, "train/lm_loss": 2.476939618587494, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.0291252264571125e-05, "perf/tokens_per_sec": 25787.042413853662, "train/loss_prose": 3.053724447886149, "train/loss_math": 2.2733128865559897, "train/loss_code": 1.9172025322914124} +{"step": 3651, "train/loss": 2.4465144276618958, "train/lm_loss": 2.4465144276618958, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.0277143005319038e-05, "perf/tokens_per_sec": 26685.848911588084, "train/loss_prose": 3.2021945317586265, "train/loss_math": 2.2123892307281494, "train/loss_code": 1.6641815900802612} +{"step": 3652, "train/loss": 1.8536493480205536, "train/lm_loss": 1.8536493480205536, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.0263040921808592e-05, "perf/tokens_per_sec": 26702.89643766971, "train/loss_code": 1.479113946358363, "train/loss_math": 2.650672197341919, "train/loss_prose": 3.3038387298583984} +{"step": 3653, "train/loss": 2.6121985018253326, "train/lm_loss": 2.6121985018253326, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.0248946020912978e-05, "perf/tokens_per_sec": 26611.856127589177, "train/loss_math": 2.300501585006714, "train/loss_code": 1.3444753885269165, "train/loss_prose": 3.7690439224243164} +{"step": 3654, "train/loss": 2.143255800008774, "train/lm_loss": 2.143255800008774, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.0234858309501862e-05, "perf/tokens_per_sec": 27012.118099514784, "train/loss_code": 1.154401385784149, "train/loss_math": 2.352492332458496, "train/loss_prose": 4.510773181915283} +{"step": 3655, "train/loss": 2.580486446619034, "train/lm_loss": 2.580486446619034, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.022077779444145e-05, "perf/tokens_per_sec": 27096.98710754021, "train/loss_prose": 3.482301115989685, "train/loss_math": 1.9642486969629924, "train/loss_code": 0.821940004825592} +{"step": 3656, "train/loss": 2.3409922420978546, "train/lm_loss": 2.3409922420978546, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.02067044825944e-05, "perf/tokens_per_sec": 27008.041452471458, "train/loss_code": 1.544660210609436, "train/loss_math": 2.234438498814901, "train/loss_prose": 3.6953208446502686} +{"step": 3657, "train/loss": 2.1895729303359985, "train/lm_loss": 2.1895729303359985, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.0192638380819883e-05, "perf/tokens_per_sec": 27090.663243776828, "train/loss_prose": 3.0670990347862244, "train/loss_math": 1.9920055866241455, "train/loss_code": 1.0853939255078633} +{"step": 3658, "train/loss": 2.4000317752361298, "train/lm_loss": 2.4000317752361298, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.017857949597352e-05, "perf/tokens_per_sec": 27141.122564113095, "train/loss_prose": 3.435694932937622, "train/loss_math": 2.09813392162323, "train/loss_code": 1.5656340916951497} +{"step": 3659, "train/loss": 2.3912256956100464, "train/lm_loss": 2.3912256956100464, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.0164527834907467e-05, "perf/tokens_per_sec": 27282.14876983251, "train/loss_math": 1.806281566619873, "train/loss_prose": 3.339129626750946, "train/loss_code": 1.3223350842793782} +{"step": 3660, "train/loss": 1.9300608932971954, "train/lm_loss": 1.9300608932971954, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.0150483404470324e-05, "perf/tokens_per_sec": 27139.064570297505, "train/loss_math": 2.0178881543023244, "train/loss_code": 1.3152698278427124} +{"step": 3661, "train/loss": 2.1399965584278107, "train/lm_loss": 2.1399965584278107, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.0136446211507175e-05, "perf/tokens_per_sec": 27168.20565760842, "train/loss_code": 1.4045599848031998, "train/loss_prose": 4.0831298828125, "train/loss_math": 2.4728676478068032} +{"step": 3662, "train/loss": 2.6827248334884644, "train/lm_loss": 2.6827248334884644, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.0122416262859577e-05, "perf/tokens_per_sec": 27264.613474344405, "train/loss_prose": 3.504443327585856, "train/loss_math": 2.1896936893463135} +{"step": 3663, "train/loss": 2.6253244876861572, "train/lm_loss": 2.6253244876861572, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.0108393565365551e-05, "perf/tokens_per_sec": 27184.240590272777, "train/loss_code": 1.2913399934768677, "train/loss_prose": 3.642412006855011, "train/loss_math": 2.5589282512664795} +{"step": 3664, "train/loss": 2.0917776823043823, "train/lm_loss": 2.0917776823043823, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.0094378125859602e-05, "perf/tokens_per_sec": 27056.999963776732, "train/loss_prose": 3.5248913764953613, "train/loss_code": 1.5770821173985798, "train/loss_math": 2.1195211708545685} +{"step": 3665, "train/loss": 2.363302767276764, "train/lm_loss": 2.363302767276764, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.008036995117268e-05, "perf/tokens_per_sec": 27217.79021546261, "train/loss_math": 2.371812403202057, "train/loss_prose": 3.1443907022476196, "train/loss_code": 1.5651960968971252} +{"step": 3666, "train/loss": 2.353989362716675, "train/lm_loss": 2.353989362716675, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.0066369048132208e-05, "perf/tokens_per_sec": 27116.704023183538, "train/loss_prose": 3.4739487171173096, "train/loss_math": 2.0854455629984536, "train/loss_code": 1.076866328716278} +{"step": 3667, "train/loss": 2.2660963237285614, "train/lm_loss": 2.2660963237285614, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.0052375423562038e-05, "perf/tokens_per_sec": 27118.159335394834, "train/loss_code": 1.2720792889595032, "train/loss_math": 2.057209014892578, "train/loss_prose": 4.463017702102661} +{"step": 3668, "train/loss": 2.684298515319824, "train/lm_loss": 2.684298515319824, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.003838908428249e-05, "perf/tokens_per_sec": 27314.594314768914, "train/loss_math": 2.2912527084350587, "train/loss_prose": 3.3393747806549072} +{"step": 3669, "train/loss": 2.0484520494937897, "train/lm_loss": 2.0484520494937897, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.0024410037110357e-05, "perf/tokens_per_sec": 27142.53761592543, "train/loss_math": 2.093770217895508, "train/loss_code": 1.2448590993881226, "train/loss_prose": 3.429047107696533} +{"step": 3670, "train/loss": 2.4310935139656067, "train/lm_loss": 2.4310935139656067, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.0010438288858846e-05, "perf/tokens_per_sec": 27022.740145590447, "train/loss_code": 1.375656247138977, "train/loss_math": 1.9733389218648274, "train/loss_prose": 3.592473109563192} +{"step": 3671, "train/loss": 2.3933112621307373, "train/lm_loss": 2.3933112621307373, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 9.996473846337614e-06, "perf/tokens_per_sec": 27016.365915717102, "train/loss_code": 1.494726824760437, "train/loss_prose": 3.8909520308176675} +{"step": 3672, "train/loss": 2.5540218353271484, "train/lm_loss": 2.5540218353271484, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 9.982516716352769e-06, "perf/tokens_per_sec": 27099.38069083441, "train/loss_prose": 3.3775450388590493, "train/loss_math": 2.0532810389995575, "train/loss_code": 2.0864157676696777} +{"step": 3673, "train/loss": 2.0682361721992493, "train/lm_loss": 2.0682361721992493, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 9.968566905706833e-06, "perf/tokens_per_sec": 27148.027698152087, "train/loss_math": 2.1201846599578857, "train/loss_prose": 3.0661399364471436, "train/loss_code": 1.3510186274846394} +{"step": 3674, "train/loss": 2.211340218782425, "train/lm_loss": 2.211340218782425, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 9.954624421198792e-06, "perf/tokens_per_sec": 27135.935302888625, "train/loss_code": 1.5221884727478028, "train/loss_prose": 4.009419679641724, "train/loss_math": 2.060938596725464} +{"step": 3675, "train/loss": 2.1449073553085327, "train/lm_loss": 2.1449073553085327, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 9.94068926962404e-06, "perf/tokens_per_sec": 27165.671033552546, "train/loss_prose": 3.2580376863479614, "train/loss_code": 1.7222045958042145, "train/loss_math": 1.8771830201148987} +{"step": 3676, "train/loss": 2.0106365382671356, "train/lm_loss": 2.0106365382671356, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 9.926761457774389e-06, "perf/tokens_per_sec": 27188.112443799466, "train/loss_code": 1.4313054382801056, "train/loss_prose": 4.015044212341309, "train/loss_math": 2.1149420738220215} +{"step": 3677, "train/loss": 2.4262446761131287, "train/lm_loss": 2.4262446761131287, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 9.912840992438086e-06, "perf/tokens_per_sec": 27252.330558375634, "train/loss_prose": 3.217499613761902, "train/loss_math": 2.0388938188552856, "train/loss_code": 1.2310855388641357} +{"step": 3678, "train/loss": 2.432331085205078, "train/lm_loss": 2.432331085205078, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 9.898927880399806e-06, "perf/tokens_per_sec": 27103.142890498053, "train/loss_code": 0.631331741809845, "train/loss_prose": 3.151335299015045, "train/loss_math": 2.073991815249125} +{"step": 3679, "train/loss": 2.2509889602661133, "train/lm_loss": 2.2509889602661133, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 9.88502212844063e-06, "perf/tokens_per_sec": 27158.75692253214, "train/loss_math": 2.1474478244781494, "train/loss_code": 1.6032200654347737, "train/loss_prose": 3.377954125404358} +{"step": 3680, "train/loss": 2.5664077401161194, "train/lm_loss": 2.5664077401161194, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 9.871123743338048e-06, "perf/tokens_per_sec": 27293.678056065262, "train/loss_code": 1.641415774822235, "train/loss_prose": 3.2884857654571533, "train/loss_math": 2.0472434759140015} +{"step": 3681, "train/loss": 2.4405876994132996, "train/lm_loss": 2.4405876994132996, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 9.857232731865967e-06, "perf/tokens_per_sec": 27098.14110383967, "train/loss_math": 2.3031511306762695, "train/loss_code": 1.5223357677459717, "train/loss_prose": 3.4504640897115073} +{"step": 3682, "train/loss": 2.5959948003292084, "train/lm_loss": 2.5959948003292084, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 9.843349100794697e-06, "perf/tokens_per_sec": 27285.22518264405, "train/loss_prose": 3.2666423320770264, "train/loss_code": 1.3137491345405579, "train/loss_math": 1.807248830795288} +{"step": 3683, "train/loss": 1.6987042129039764, "train/lm_loss": 1.6987042129039764, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 9.829472856890942e-06, "perf/tokens_per_sec": 27149.529279204537, "train/loss_math": 2.2100907961527505, "train/loss_code": 1.391872274875641} +{"step": 3684, "train/loss": 2.2591125071048737, "train/lm_loss": 2.2591125071048737, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 9.815604006917839e-06, "perf/tokens_per_sec": 26963.06782408897, "train/loss_prose": 3.1556190252304077, "train/loss_math": 1.9242790937423706, "train/loss_code": 2.032272517681122} +{"step": 3685, "train/loss": 1.973013997077942, "train/lm_loss": 1.973013997077942, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 9.801742557634872e-06, "perf/tokens_per_sec": 27223.957873852916, "train/loss_code": 1.3464780747890472, "train/loss_math": 2.268572966257731, "train/loss_prose": 3.592480421066284} +{"step": 3686, "train/loss": 1.7156957685947418, "train/lm_loss": 1.7156957685947418, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 9.787888515797952e-06, "perf/tokens_per_sec": 26990.602209224322, "train/loss_code": 1.4024404883384705, "train/loss_math": 2.028951019048691} +{"step": 3687, "train/loss": 2.517612785100937, "train/lm_loss": 2.517612785100937, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 9.774041888159364e-06, "perf/tokens_per_sec": 26837.423312806764, "train/loss_prose": 3.6918665568033853, "train/loss_code": 1.4531426827112834, "train/loss_math": 2.3529369831085205} +{"step": 3688, "train/loss": 2.3630164563655853, "train/lm_loss": 2.3630164563655853, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 9.760202681467797e-06, "perf/tokens_per_sec": 27110.5420792587, "train/loss_prose": 3.541407744089762, "train/loss_math": 2.0027339458465576, "train/loss_code": 1.1358534097671509} +{"step": 3689, "train/loss": 2.14588925242424, "train/lm_loss": 2.14588925242424, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 9.746370902468311e-06, "perf/tokens_per_sec": 27243.903272306314, "train/loss_code": 1.3471005360285442, "train/loss_prose": 3.135709842046102, "train/loss_math": 1.85934180021286} +{"step": 3690, "train/loss": 1.9893568754196167, "train/lm_loss": 1.9893568754196167, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 9.732546557902345e-06, "perf/tokens_per_sec": 27136.706836038073, "train/loss_prose": 2.010688215494156, "train/loss_code": 1.8530896306037903, "train/loss_math": 2.2405604124069214} +{"step": 3691, "train/loss": 2.1629219353199005, "train/lm_loss": 2.1629219353199005, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 9.718729654507713e-06, "perf/tokens_per_sec": 27086.349328275493, "train/loss_code": 1.0602866113185883, "train/loss_prose": 3.265557110309601} +{"step": 3692, "train/loss": 1.6920426189899445, "train/lm_loss": 1.6920426189899445, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 9.7049201990186e-06, "perf/tokens_per_sec": 27180.49884663943, "train/loss_math": 2.008789837360382, "train/loss_code": 1.2973505973815918, "train/loss_prose": 3.032008171081543} +{"step": 3693, "train/loss": 2.202272415161133, "train/lm_loss": 2.202272415161133, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 9.691118198165595e-06, "perf/tokens_per_sec": 27217.876457149738, "train/loss_math": 2.136361002922058, "train/loss_code": 1.2777652144432068, "train/loss_prose": 3.258602261543274} +{"step": 3694, "train/loss": 2.2504895329475403, "train/lm_loss": 2.2504895329475403, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 9.677323658675594e-06, "perf/tokens_per_sec": 27456.29283525939, "train/loss_math": 2.0667421221733093, "train/loss_prose": 3.2844154834747314, "train/loss_code": 1.5840579271316528} +{"step": 3695, "train/loss": 2.2177511751651764, "train/lm_loss": 2.2177511751651764, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 9.663536587271902e-06, "perf/tokens_per_sec": 27481.5908475048, "train/loss_prose": 3.603455384572347, "train/loss_code": 1.166982263326645, "train/loss_math": 2.2637135982513428} +{"step": 3696, "train/loss": 2.3458568155765533, "train/lm_loss": 2.3458568155765533, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 9.649756990674158e-06, "perf/tokens_per_sec": 27441.118839477815, "train/loss_math": 2.070780783891678, "train/loss_code": 1.580584466457367, "train/loss_prose": 3.6612813472747803} +{"step": 3697, "train/loss": 2.612171769142151, "train/lm_loss": 2.612171769142151, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 9.635984875598388e-06, "perf/tokens_per_sec": 27459.496429279272, "train/loss_prose": 3.4252604643503823, "train/loss_code": 1.6731380224227905, "train/loss_math": 2.2371134757995605} +{"step": 3698, "train/loss": 2.0036981105804443, "train/lm_loss": 2.0036981105804443, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 9.622220248756946e-06, "perf/tokens_per_sec": 27469.506990547092, "train/loss_code": 1.3236069679260254, "train/loss_prose": 3.1674203872680664, "train/loss_math": 2.2001575231552124} +{"step": 3699, "train/loss": 2.0013391375541687, "train/lm_loss": 2.0013391375541687, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 9.608463116858542e-06, "perf/tokens_per_sec": 27487.83467493548, "train/loss_code": 1.5774224698543549, "train/loss_prose": 3.158513069152832, "train/loss_math": 2.18083647886912} +{"step": 3700, "train/loss": 2.3593795597553253, "train/lm_loss": 2.3593795597553253, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 9.59471348660824e-06, "perf/tokens_per_sec": 27492.673393194633, "train/loss_math": 2.104975461959839, "train/loss_prose": 3.562571922938029, "train/loss_code": 1.3257903655370076} +{"step": 3701, "train/loss": 2.480528384447098, "train/lm_loss": 2.480528384447098, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 9.58097136470744e-06, "perf/tokens_per_sec": 27315.680057493744, "train/loss_prose": 3.223522106806437, "train/loss_code": 1.8531550963719685, "train/loss_math": 2.3070982694625854} +{"step": 3702, "train/loss": 1.8457917273044586, "train/lm_loss": 1.8457917273044586, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 9.567236757853879e-06, "perf/tokens_per_sec": 27533.20947675285, "train/loss_prose": 2.4495747089385986, "train/loss_math": 2.3364402055740356, "train/loss_code": 1.32685986161232} +{"step": 3703, "train/loss": 2.906414747238159, "train/lm_loss": 2.906414747238159, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 9.553509672741645e-06, "perf/tokens_per_sec": 27353.169336194984, "train/loss_math": 2.2790859639644623, "train/loss_prose": 3.53374320268631} +{"step": 3704, "train/loss": 2.5064380168914795, "train/lm_loss": 2.5064380168914795, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 9.539790116061151e-06, "perf/tokens_per_sec": 27430.997557057868, "train/loss_code": 1.2709468305110931, "train/loss_prose": 3.3801783323287964, "train/loss_math": 1.9944482445716858} +{"step": 3705, "train/loss": 2.660151720046997, "train/lm_loss": 2.660151720046997, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 9.526078094499141e-06, "perf/tokens_per_sec": 27442.653018714816, "train/loss_prose": 3.2819894313812257, "train/loss_math": 2.017216444015503, "train/loss_code": 1.4270249605178833} +{"step": 3706, "train/loss": 2.3573772609233856, "train/lm_loss": 2.3573772609233856, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 9.512373614738681e-06, "perf/tokens_per_sec": 26748.08835966644, "train/loss_prose": 3.031375765800476, "train/loss_math": 1.6378130316734314, "train/loss_code": 1.7289440035820007} +{"step": 3707, "train/loss": 2.492740958929062, "train/lm_loss": 2.492740958929062, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 9.498676683459185e-06, "perf/tokens_per_sec": 26852.73427000802, "train/loss_prose": 3.6281309723854065, "train/loss_code": 1.357350766658783} +{"step": 3708, "train/loss": 1.7819855511188507, "train/lm_loss": 1.7819855511188507, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 9.48498730733636e-06, "perf/tokens_per_sec": 26387.29281516774, "train/loss_prose": 3.2728590965270996, "train/loss_code": 1.2850276231765747} +{"step": 3709, "train/loss": 2.265231281518936, "train/lm_loss": 2.265231281518936, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 9.471305493042243e-06, "perf/tokens_per_sec": 27009.65497811541, "train/loss_code": 1.2761772871017456, "train/loss_prose": 3.6294801235198975, "train/loss_math": 2.128699779510498} +{"step": 3710, "train/loss": 2.620640814304352, "train/lm_loss": 2.620640814304352, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 9.457631247245182e-06, "perf/tokens_per_sec": 26941.122979792435, "train/loss_math": 1.938636024792989, "train/loss_prose": 3.223680555820465, "train/loss_code": 2.2544963359832764} +{"step": 3711, "train/loss": 2.4762848019599915, "train/lm_loss": 2.4762848019599915, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 9.443964576609843e-06, "perf/tokens_per_sec": 26967.257895596045, "train/loss_code": 1.4938313961029053, "train/loss_prose": 3.2613335847854614, "train/loss_math": 2.283449649810791} +{"step": 3712, "train/loss": 2.937962621450424, "train/lm_loss": 2.937962621450424, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 9.430305487797191e-06, "perf/tokens_per_sec": 26892.329967331363, "train/loss_prose": 3.568774175643921, "train/loss_math": 2.141758680343628, "train/loss_code": 1.7590355277061462} +{"step": 3713, "train/loss": 2.135828822851181, "train/lm_loss": 2.135828822851181, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 9.416653987464502e-06, "perf/tokens_per_sec": 27098.52579111578, "train/loss_code": 1.600117027759552, "train/loss_math": 1.9449794292449951, "train/loss_prose": 3.398102283477783} +{"step": 3714, "train/loss": 2.1854382753372192, "train/lm_loss": 2.1854382753372192, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 9.403010082265351e-06, "perf/tokens_per_sec": 27175.29652460427, "train/loss_prose": 3.538751244544983, "train/loss_code": 1.4680359661579132, "train/loss_math": 2.2669297456741333} +{"step": 3715, "train/loss": 2.537176251411438, "train/lm_loss": 2.537176251411438, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 9.389373778849612e-06, "perf/tokens_per_sec": 27182.090037007794, "train/loss_math": 2.029648542404175, "train/loss_prose": 3.079297363758087, "train/loss_code": 1.8912749290466309} +{"step": 3716, "train/loss": 2.3987068235874176, "train/lm_loss": 2.3987068235874176, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 9.375745083863443e-06, "perf/tokens_per_sec": 27192.84620729717, "train/loss_code": 1.722324550151825, "train/loss_math": 1.9595123132069905, "train/loss_prose": 3.2888227303822837} +{"step": 3717, "train/loss": 2.451329469680786, "train/lm_loss": 2.451329469680786, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 9.362124003949324e-06, "perf/tokens_per_sec": 27271.624886896505, "train/loss_prose": 3.5264626145362854, "train/loss_code": 1.376196265220642} +{"step": 3718, "train/loss": 2.2915769815444946, "train/lm_loss": 2.2915769815444946, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 9.348510545745995e-06, "perf/tokens_per_sec": 27190.350017488687, "train/loss_math": 2.2467521727085114, "train/loss_code": 1.1830556988716125, "train/loss_prose": 3.489747643470764} +{"step": 3719, "train/loss": 1.8393606841564178, "train/lm_loss": 1.8393606841564178, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 9.334904715888495e-06, "perf/tokens_per_sec": 27286.568622748404, "train/loss_code": 1.2377235492070515, "train/loss_math": 1.9582799077033997, "train/loss_prose": 3.1685950756073} +{"step": 3720, "train/loss": 2.150665044784546, "train/lm_loss": 2.150665044784546, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 9.321306521008122e-06, "perf/tokens_per_sec": 27365.151033528142, "train/loss_math": 1.8693657517433167, "train/loss_code": 1.1174842019875844, "train/loss_prose": 3.3713788191477456} +{"step": 3721, "train/loss": 2.2315085530281067, "train/lm_loss": 2.2315085530281067, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 9.307715967732491e-06, "perf/tokens_per_sec": 27295.976408978822, "train/loss_prose": 4.060955286026001, "train/loss_code": 1.4433175921440125, "train/loss_math": 1.9784433245658875} +{"step": 3722, "train/loss": 2.8294151723384857, "train/lm_loss": 2.8294151723384857, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 9.294133062685464e-06, "perf/tokens_per_sec": 27410.5149705949, "train/loss_math": 1.8918087482452393, "train/loss_prose": 4.118096113204956, "train/loss_code": 1.1896602511405945} +{"step": 3723, "train/loss": 2.3925241827964783, "train/lm_loss": 2.3925241827964783, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 9.280557812487188e-06, "perf/tokens_per_sec": 27320.76248089679, "train/loss_code": 0.9139886498451233, "train/loss_math": 2.0951772928237915, "train/loss_prose": 3.2804654240608215} +{"step": 3724, "train/loss": 2.479121059179306, "train/lm_loss": 2.479121059179306, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 9.266990223754069e-06, "perf/tokens_per_sec": 27367.287056034867, "train/loss_code": 1.3983182311058044, "train/loss_math": 2.066627343495687, "train/loss_prose": 3.6121498743693032} +{"step": 3725, "train/loss": 2.3624950647354126, "train/lm_loss": 2.3624950647354126, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 9.25343030309878e-06, "perf/tokens_per_sec": 27331.45477309788, "train/loss_code": 1.5258141905069351, "train/loss_prose": 3.1991758942604065} +{"step": 3726, "train/loss": 1.762817144393921, "train/lm_loss": 1.762817144393921, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 9.23987805713028e-06, "perf/tokens_per_sec": 27288.388978456624, "train/loss_code": 1.4485886335372924, "train/loss_math": 1.9783291220664978, "train/loss_prose": 2.902935266494751} +{"step": 3727, "train/loss": 2.656233549118042, "train/lm_loss": 2.656233549118042, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 9.22633349245376e-06, "perf/tokens_per_sec": 26680.751328223883, "train/loss_math": 2.3019604682922363, "train/loss_prose": 3.643505334854126, "train/loss_code": 1.9051444133122761} +{"step": 3728, "train/loss": 2.108551412820816, "train/lm_loss": 2.108551412820816, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 9.21279661567068e-06, "perf/tokens_per_sec": 27020.19009273123, "train/loss_code": 1.8625438809394836, "train/loss_math": 2.052004019419352, "train/loss_prose": 3.262223720550537} +{"step": 3729, "train/loss": 2.276970863342285, "train/lm_loss": 2.276970863342285, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 9.199267433378727e-06, "perf/tokens_per_sec": 27031.1162047898, "train/loss_prose": 3.3972790241241455, "train/loss_code": 1.693864345550537, "train/loss_math": 2.1874373952547708} +{"step": 3730, "train/loss": 2.323103606700897, "train/lm_loss": 2.323103606700897, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 9.18574595217189e-06, "perf/tokens_per_sec": 26935.63179118612, "train/loss_math": 1.9615492820739746, "train/loss_code": 1.4005155563354492, "train/loss_prose": 3.4867278734842935} +{"step": 3731, "train/loss": 2.3530604243278503, "train/lm_loss": 2.3530604243278503, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 9.17223217864036e-06, "perf/tokens_per_sec": 27116.276019906465, "train/loss_code": 1.4405581156412761, "train/loss_prose": 3.278153896331787, "train/loss_math": 2.3341734409332275} +{"step": 3732, "train/loss": 2.796408712863922, "train/lm_loss": 2.796408712863922, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 9.158726119370588e-06, "perf/tokens_per_sec": 27414.36406169715, "train/loss_prose": 3.7190107107162476, "train/loss_code": 1.5320498943328857, "train/loss_math": 2.2155635356903076} +{"step": 3733, "train/loss": 2.352348506450653, "train/lm_loss": 2.352348506450653, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 9.145227780945264e-06, "perf/tokens_per_sec": 27434.458107838364, "train/loss_prose": 3.1059977412223816, "train/loss_code": 1.2755779027938843, "train/loss_math": 2.568063497543335} +{"step": 3734, "train/loss": 2.36066472530365, "train/lm_loss": 2.36066472530365, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 9.131737169943314e-06, "perf/tokens_per_sec": 27199.562371857897, "train/loss_code": 1.0602356394131978, "train/loss_prose": 3.4493502378463745, "train/loss_math": 1.9072093963623047} +{"step": 3735, "train/loss": 2.0713801085948944, "train/lm_loss": 2.0713801085948944, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 9.11825429293989e-06, "perf/tokens_per_sec": 27394.081022039805, "train/loss_code": 1.1473129987716675, "train/loss_math": 2.3432981967926025, "train/loss_prose": 3.049603581428528} +{"step": 3736, "train/loss": 2.866736054420471, "train/lm_loss": 2.866736054420471, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 9.104779156506397e-06, "perf/tokens_per_sec": 27361.53363360111, "train/loss_code": 2.300664941469828, "train/loss_math": 1.7000340223312378, "train/loss_prose": 3.5829650163650513} +{"step": 3737, "train/loss": 2.159648507833481, "train/lm_loss": 2.159648507833481, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 9.091311767210453e-06, "perf/tokens_per_sec": 27332.367917474076, "train/loss_code": 1.4309900204340618, "train/loss_math": 2.2607929706573486, "train/loss_prose": 3.1009193658828735} +{"step": 3738, "train/loss": 1.7202990055084229, "train/lm_loss": 1.7202990055084229, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 9.077852131615888e-06, "perf/tokens_per_sec": 27424.998418022096, "train/loss_code": 1.5799888372421265, "train/loss_math": 2.141229748725891} +{"step": 3739, "train/loss": 2.4363848865032196, "train/lm_loss": 2.4363848865032196, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 9.064400256282757e-06, "perf/tokens_per_sec": 27391.329388824317, "train/loss_prose": 3.457443118095398, "train/loss_code": 1.1669094562530518, "train/loss_math": 2.1605782508850098} +{"step": 3740, "train/loss": 2.80909563601017, "train/lm_loss": 2.80909563601017, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 9.050956147767364e-06, "perf/tokens_per_sec": 27202.103324445347, "train/loss_prose": 3.4199355840682983, "train/loss_code": 0.9765759408473969} +{"step": 3741, "train/loss": 2.3392949402332306, "train/lm_loss": 2.3392949402332306, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 9.037519812622195e-06, "perf/tokens_per_sec": 27427.800608908197, "train/loss_math": 2.043771187464396, "train/loss_prose": 3.7188215255737305, "train/loss_code": 1.715134620666504} +{"step": 3742, "train/loss": 2.423659771680832, "train/lm_loss": 2.423659771680832, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 9.02409125739595e-06, "perf/tokens_per_sec": 27390.717989843994, "train/loss_prose": 3.3252743085225425, "train/loss_math": 2.2758864164352417, "train/loss_code": 1.620560924212138} +{"step": 3743, "train/loss": 2.1691451966762543, "train/lm_loss": 2.1691451966762543, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 9.010670488633552e-06, "perf/tokens_per_sec": 27421.846637605886, "train/loss_math": 1.9262747287750244, "train/loss_prose": 4.101689338684082, "train/loss_code": 1.8100489377975464} +{"step": 3744, "train/loss": 2.5221768021583557, "train/lm_loss": 2.5221768021583557, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.997257512876108e-06, "perf/tokens_per_sec": 27476.53630512942, "train/loss_prose": 3.8619731267293296, "train/loss_code": 1.0032986104488373, "train/loss_math": 2.194966117540995} +{"step": 3745, "train/loss": 2.223784327507019, "train/lm_loss": 2.223784327507019, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.983852336660959e-06, "perf/tokens_per_sec": 27357.35084556833, "train/loss_code": 1.4654254913330078, "train/loss_prose": 3.5810306072235107, "train/loss_math": 2.383255362510681} +{"step": 3746, "train/loss": 2.6018884479999542, "train/lm_loss": 2.6018884479999542, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.970454966521621e-06, "perf/tokens_per_sec": 27388.971286000113, "train/loss_code": 1.6340354979038239, "train/loss_prose": 4.030164798100789, "train/loss_math": 2.188471794128418} +{"step": 3747, "train/loss": 2.2554602324962616, "train/lm_loss": 2.2554602324962616, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.957065408987797e-06, "perf/tokens_per_sec": 27406.31719464376, "train/loss_prose": 3.7210055589675903, "train/loss_code": 1.6542441993951797, "train/loss_math": 1.992347538471222} +{"step": 3748, "train/loss": 1.7421374320983887, "train/lm_loss": 1.7421374320983887, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.94368367058539e-06, "perf/tokens_per_sec": 27405.09308550518, "train/loss_code": 1.4611666679382325, "train/loss_math": 2.2104220390319824} +{"step": 3749, "train/loss": 2.5822561383247375, "train/lm_loss": 2.5822561383247375, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.930309757836517e-06, "perf/tokens_per_sec": 27416.68278592642, "train/loss_prose": 3.552109718322754, "train/loss_math": 2.0398659110069275, "train/loss_code": 1.184938907623291} +{"step": 3750, "train/loss": 2.758667230606079, "train/lm_loss": 2.758667230606079, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.91694367725945e-06, "perf/tokens_per_sec": 27441.732490587798, "train/loss_math": 2.363316218058268, "train/loss_prose": 3.3510102033615112, "train/loss_code": 1.5753483772277832} +{"step": 3751, "train/loss": 2.4744409322738647, "train/lm_loss": 2.4744409322738647, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.903585435368658e-06, "perf/tokens_per_sec": 27378.408476853278, "train/loss_prose": 3.246894419193268, "train/loss_code": 1.3895935416221619, "train/loss_math": 2.0143808126449585} +{"step": 3752, "train/loss": 2.5657615065574646, "train/lm_loss": 2.5657615065574646, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.890235038674783e-06, "perf/tokens_per_sec": 27404.96193744028, "train/loss_code": 2.1981263160705566, "train/loss_math": 2.116830140352249, "train/loss_prose": 3.286881367365519} +{"step": 3753, "train/loss": 1.9626590609550476, "train/lm_loss": 1.9626590609550476, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.876892493684643e-06, "perf/tokens_per_sec": 27381.812128736936, "train/loss_code": 1.4835925499598186, "train/loss_math": 2.1200641989707947, "train/loss_prose": 2.7702388763427734} +{"step": 3754, "train/loss": 1.7990321516990662, "train/lm_loss": 1.7990321516990662, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.863557806901233e-06, "perf/tokens_per_sec": 27395.915751336317, "train/loss_math": 2.0731953978538513, "train/loss_code": 1.0305698066949844, "train/loss_prose": 3.061793327331543} +{"step": 3755, "train/loss": 2.430111140012741, "train/lm_loss": 2.430111140012741, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.850230984823735e-06, "perf/tokens_per_sec": 27404.393643663603, "train/loss_math": 1.9920652707417805, "train/loss_prose": 3.3431931336720786, "train/loss_code": 1.717556893825531} +{"step": 3756, "train/loss": 2.360527068376541, "train/lm_loss": 2.360527068376541, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.836912033947455e-06, "perf/tokens_per_sec": 27318.6771553376, "train/loss_math": 1.968745470046997, "train/loss_prose": 3.399717330932617, "train/loss_code": 2.0368226170539856} +{"step": 3757, "train/loss": 2.6126656234264374, "train/lm_loss": 2.6126656234264374, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.8236009607639e-06, "perf/tokens_per_sec": 27413.70789019753, "train/loss_code": 1.3294923901557922, "train/loss_math": 2.321553111076355, "train/loss_prose": 3.3998085260391235} +{"step": 3758, "train/loss": 2.2569559514522552, "train/lm_loss": 2.2569559514522552, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.810297771760715e-06, "perf/tokens_per_sec": 27444.538102350696, "train/loss_code": 1.3878523508707683, "train/loss_math": 2.074832797050476, "train/loss_prose": 3.2474752267201743} +{"step": 3759, "train/loss": 2.2077773809432983, "train/lm_loss": 2.2077773809432983, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.797002473421728e-06, "perf/tokens_per_sec": 27400.154041712984, "train/loss_code": 1.7013583183288574, "train/loss_prose": 3.1087803840637207, "train/loss_math": 2.113527695337931} +{"step": 3760, "train/loss": 2.402473509311676, "train/lm_loss": 2.402473509311676, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.7837150722269e-06, "perf/tokens_per_sec": 27425.917823789532, "train/loss_math": 1.8650354146957397, "train/loss_prose": 3.367596447467804, "train/loss_code": 1.2947886784871419} +{"step": 3761, "train/loss": 2.7832672894001007, "train/lm_loss": 2.7832672894001007, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.77043557465235e-06, "perf/tokens_per_sec": 27222.577455794664, "train/loss_math": 2.489211678504944, "train/loss_code": 2.0568044185638428, "train/loss_prose": 3.2935262322425842} +{"step": 3762, "train/loss": 2.4650922417640686, "train/lm_loss": 2.4650922417640686, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.757163987170339e-06, "perf/tokens_per_sec": 27405.79256305135, "train/loss_prose": 3.2834349473317466, "train/loss_code": 1.5509300231933594, "train/loss_math": 2.25619105497996} +{"step": 3763, "train/loss": 2.209038645029068, "train/lm_loss": 2.209038645029068, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.743900316249273e-06, "perf/tokens_per_sec": 27341.98137611744, "train/loss_code": 1.8220927715301514, "train/loss_math": 2.1504054069519043, "train/loss_prose": 3.2760963439941406} +{"step": 3764, "train/loss": 2.929721385240555, "train/lm_loss": 2.929721385240555, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.73064456835373e-06, "perf/tokens_per_sec": 27339.414225515284, "train/loss_math": 2.042564868927002, "train/loss_prose": 3.68405442237854, "train/loss_code": 1.4874668717384338} +{"step": 3765, "train/loss": 1.9910994172096252, "train/lm_loss": 1.9910994172096252, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.717396749944374e-06, "perf/tokens_per_sec": 27384.12534947702, "train/loss_math": 2.412338209152222, "train/loss_code": 1.2890347242355347} +{"step": 3766, "train/loss": 2.584785521030426, "train/lm_loss": 2.584785521030426, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.704156867478036e-06, "perf/tokens_per_sec": 27330.802564469686, "train/loss_prose": 3.3631482124328613, "train/loss_code": 1.5803537964820862, "train/loss_math": 2.0324917435646057} +{"step": 3767, "train/loss": 2.3193733394145966, "train/lm_loss": 2.3193733394145966, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.690924927407679e-06, "perf/tokens_per_sec": 27302.83040013731, "train/loss_code": 1.8060156106948853, "train/loss_math": 2.2212769985198975, "train/loss_prose": 3.2365540266036987} +{"step": 3768, "train/loss": 2.601435124874115, "train/lm_loss": 2.601435124874115, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.677700936182379e-06, "perf/tokens_per_sec": 27455.766288600975, "train/loss_math": 2.2328751981258392, "train/loss_prose": 3.6049981911977134, "train/loss_code": 1.064985990524292} +{"step": 3769, "train/loss": 1.9316790997982025, "train/lm_loss": 1.9316790997982025, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.664484900247363e-06, "perf/tokens_per_sec": 27444.669629478165, "train/loss_code": 1.2510355909665425, "train/loss_prose": 3.517822027206421, "train/loss_math": 2.0456258058547974} +{"step": 3770, "train/loss": 2.7394192814826965, "train/lm_loss": 2.7394192814826965, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.651276826043963e-06, "perf/tokens_per_sec": 27316.85275659036, "train/loss_math": 2.276357054710388, "train/loss_code": 1.8163690567016602, "train/loss_prose": 3.4324755668640137} +{"step": 3771, "train/loss": 2.201462894678116, "train/lm_loss": 2.201462894678116, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.63807672000963e-06, "perf/tokens_per_sec": 27385.434892944133, "train/loss_code": 2.0217714309692383, "train/loss_prose": 3.334576368331909, "train/loss_math": 2.0841349959373474} +{"step": 3772, "train/loss": 1.7855502367019653, "train/lm_loss": 1.7855502367019653, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.624884588577939e-06, "perf/tokens_per_sec": 27422.021717512718, "train/loss_code": 1.544044816493988, "train/loss_math": 2.188059449195862} +{"step": 3773, "train/loss": 2.3669573068618774, "train/lm_loss": 2.3669573068618774, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.61170043817857e-06, "perf/tokens_per_sec": 27399.017876480204, "train/loss_math": 2.181225001811981, "train/loss_prose": 3.449072003364563, "train/loss_code": 1.65630704164505} +{"step": 3774, "train/loss": 1.9973418712615967, "train/lm_loss": 1.9973418712615967, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.598524275237322e-06, "perf/tokens_per_sec": 27361.57721104091, "train/loss_code": 1.0547326058149338, "train/loss_prose": 3.324976364771525, "train/loss_math": 1.784875512123108} +{"step": 3775, "train/loss": 2.066412538290024, "train/lm_loss": 2.066412538290024, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.585356106176094e-06, "perf/tokens_per_sec": 27489.15415644616, "train/loss_prose": 2.6398940980434418, "train/loss_code": 1.323308487733205, "train/loss_math": 2.001798152923584} +{"step": 3776, "train/loss": 2.1088927090168, "train/lm_loss": 2.1088927090168, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.572195937412891e-06, "perf/tokens_per_sec": 27502.13580843768, "train/loss_code": 1.208882912993431, "train/loss_math": 2.154482046763102, "train/loss_prose": 5.572163105010986} +{"step": 3777, "train/loss": 2.181687206029892, "train/lm_loss": 2.181687206029892, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.559043775361817e-06, "perf/tokens_per_sec": 27417.732903285523, "train/loss_code": 1.3809224367141724, "train/loss_math": 2.038125514984131, "train/loss_prose": 3.0781598885854087} +{"step": 3778, "train/loss": 2.8203364610671997, "train/lm_loss": 2.8203364610671997, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.545899626433085e-06, "perf/tokens_per_sec": 27382.335842602966, "train/loss_code": 1.8144467671712239, "train/loss_prose": 3.4238702297210692} +{"step": 3779, "train/loss": 1.9716178476810455, "train/lm_loss": 1.9716178476810455, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.532763497032987e-06, "perf/tokens_per_sec": 27498.790210484192, "train/loss_code": 1.247610588868459, "train/loss_math": 2.123136818408966, "train/loss_prose": 3.5375633239746094} +{"step": 3780, "train/loss": 2.518122911453247, "train/lm_loss": 2.518122911453247, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.519635393563916e-06, "perf/tokens_per_sec": 27408.241015676103, "train/loss_math": 1.9364139437675476, "train/loss_code": 1.5694690942764282, "train/loss_prose": 3.8545826276143393} +{"step": 3781, "train/loss": 2.336052715778351, "train/lm_loss": 2.336052715778351, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.50651532242435e-06, "perf/tokens_per_sec": 27404.612215403806, "train/loss_math": 2.2894786993662515, "train/loss_code": 1.2508138418197632, "train/loss_prose": 3.700735092163086} +{"step": 3782, "train/loss": 2.6427488923072815, "train/lm_loss": 2.6427488923072815, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.493403290008847e-06, "perf/tokens_per_sec": 27323.499953877326, "train/loss_prose": 3.3943055470784507, "train/loss_math": 2.220071941614151, "train/loss_code": 2.078787088394165} +{"step": 3783, "train/loss": 1.5311566293239594, "train/lm_loss": 1.5311566293239594, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.480299302708059e-06, "perf/tokens_per_sec": 27300.183831112594, "train/loss_code": 1.3499058087666829, "train/loss_math": 2.074909210205078} +{"step": 3784, "train/loss": 2.5770424008369446, "train/lm_loss": 2.5770424008369446, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.467203366908707e-06, "perf/tokens_per_sec": 27449.931748532425, "train/loss_math": 2.063292145729065, "train/loss_code": 0.7610079646110535, "train/loss_prose": 3.416363537311554} +{"step": 3785, "train/loss": 2.197273761034012, "train/lm_loss": 2.197273761034012, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.454115488993591e-06, "perf/tokens_per_sec": 27512.838382463393, "train/loss_prose": 2.8451565504074097, "train/loss_code": 1.7129329442977905, "train/loss_math": 2.1155026853084564} +{"step": 3786, "train/loss": 2.4796421229839325, "train/lm_loss": 2.4796421229839325, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.441035675341583e-06, "perf/tokens_per_sec": 27389.10228106088, "train/loss_code": 1.4113860130310059, "train/loss_math": 2.0240637063980103, "train/loss_prose": 3.2415592670440674} +{"step": 3787, "train/loss": 1.9176126420497894, "train/lm_loss": 1.9176126420497894, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.42796393232762e-06, "perf/tokens_per_sec": 27479.480899376027, "train/loss_code": 1.801593017578125, "train/loss_math": 2.110978643099467} +{"step": 3788, "train/loss": 2.1875610947608948, "train/lm_loss": 2.1875610947608948, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.414900266322723e-06, "perf/tokens_per_sec": 27308.168485370625, "train/loss_code": 1.4852426846822102, "train/loss_prose": 3.518512010574341, "train/loss_math": 2.0025788942972818} +{"step": 3789, "train/loss": 2.6451371908187866, "train/lm_loss": 2.6451371908187866, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.40184468369396e-06, "perf/tokens_per_sec": 27358.047888095494, "train/loss_math": 2.2178193092346192, "train/loss_prose": 3.3573338190714517} +{"step": 3790, "train/loss": 2.2984499335289, "train/lm_loss": 2.2984499335289, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.388797190804468e-06, "perf/tokens_per_sec": 26666.17387781584, "train/loss_prose": 3.320228338241577, "train/loss_code": 1.2790589928627014, "train/loss_math": 2.2948683500289917} +{"step": 3791, "train/loss": 2.0030487775802612, "train/lm_loss": 2.0030487775802612, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.375757794013414e-06, "perf/tokens_per_sec": 27394.51784005817, "train/loss_code": 1.3027949452400207, "train/loss_prose": 3.493316411972046, "train/loss_math": 2.52378249168396} +{"step": 3792, "train/loss": 2.463309943675995, "train/lm_loss": 2.463309943675995, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.362726499676069e-06, "perf/tokens_per_sec": 27456.336715059228, "train/loss_code": 1.8891725301742555, "train/loss_prose": 3.4202053546905518} +{"step": 3793, "train/loss": 2.803836226463318, "train/lm_loss": 2.803836226463318, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.34970331414371e-06, "perf/tokens_per_sec": 27378.932060527342, "train/loss_prose": 3.3832640171051027, "train/loss_code": 1.6953277587890625, "train/loss_math": 1.9095209836959839} +{"step": 3794, "train/loss": 2.1801526844501495, "train/lm_loss": 2.1801526844501495, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.33668824376369e-06, "perf/tokens_per_sec": 27393.99366010779, "train/loss_math": 2.0755770802497864, "train/loss_code": 1.2747081915537517, "train/loss_prose": 3.1553141276041665} +{"step": 3795, "train/loss": 2.302187591791153, "train/lm_loss": 2.302187591791153, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.323681294879394e-06, "perf/tokens_per_sec": 27356.871649434466, "train/loss_math": 2.0877506732940674, "train/loss_code": 1.8152308861414592, "train/loss_prose": 3.3542776107788086} +{"step": 3796, "train/loss": 2.6976967453956604, "train/lm_loss": 2.6976967453956604, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.310682473830236e-06, "perf/tokens_per_sec": 27405.61769031735, "train/loss_code": 1.5912141799926758, "train/loss_prose": 3.4491097927093506, "train/loss_math": 2.301352620124817} +{"step": 3797, "train/loss": 2.912430316209793, "train/lm_loss": 2.912430316209793, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.297691786951705e-06, "perf/tokens_per_sec": 27413.139233638954, "train/loss_prose": 3.5033451557159423, "train/loss_math": 2.217543601989746, "train/loss_code": 1.347629189491272} +{"step": 3798, "train/loss": 3.092535376548767, "train/lm_loss": 3.092535376548767, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.284709240575297e-06, "perf/tokens_per_sec": 27495.093391204704, "train/loss_prose": 3.2303566251482283, "train/loss_math": 2.1277856826782227} +{"step": 3799, "train/loss": 2.1974822282791138, "train/lm_loss": 2.1974822282791138, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.271734841028553e-06, "perf/tokens_per_sec": 27440.461386600393, "train/loss_code": 1.673350304365158, "train/loss_math": 2.160182476043701, "train/loss_prose": 3.283045768737793} +{"step": 3800, "train/loss": 2.4519515335559845, "train/lm_loss": 2.4519515335559845, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.258768594635022e-06, "perf/tokens_per_sec": 27289.42929077124, "train/loss_prose": 3.326344668865204, "train/loss_math": 1.9949612617492676, "train/loss_code": 1.1601556539535522} +{"step": 3800, "eval/loss": 2.16149135173487, "eval/lm_loss": 2.16149135173487, "eval/lb_loss": 0.0, "eval/z_loss": 0.0, "eval/perplexity": 8.684079022317812, "eval/loss_code": 1.5567813175453988, "eval/ppl_code": 4.743528734761389, "eval/loss_prose": 3.484817880287505, "eval/ppl_prose": 32.61648646378409, "eval/loss_math": 2.0347223239870824, "eval/ppl_math": 7.650127573600796} +{"step": 3801, "train/loss": 2.5947064459323883, "train/lm_loss": 2.5947064459323883, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.245810507714294e-06, "perf/tokens_per_sec": 26658.187887345797, "train/loss_math": 2.1855681339899697, "train/loss_prose": 4.736331939697266, "train/loss_code": 1.5760945479075115} +{"step": 3802, "train/loss": 2.5771727561950684, "train/lm_loss": 2.5771727561950684, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.232860586582001e-06, "perf/tokens_per_sec": 27441.995492322414, "train/loss_math": 2.1991072495778403, "train/loss_prose": 3.5254801909128823, "train/loss_code": 1.7218097448349} +{"step": 3803, "train/loss": 2.6802039742469788, "train/lm_loss": 2.6802039742469788, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.21991883754977e-06, "perf/tokens_per_sec": 27194.309704073636, "train/loss_code": 2.6050033569335938, "train/loss_prose": 3.236888329188029, "train/loss_math": 2.281490921974182} +{"step": 3804, "train/loss": 2.378097653388977, "train/lm_loss": 2.378097653388977, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.206985266925249e-06, "perf/tokens_per_sec": 27420.927504772364, "train/loss_math": 2.3236408829689026, "train/loss_prose": 3.4540895223617554, "train/loss_code": 1.4110187888145447} +{"step": 3805, "train/loss": 2.9476965069770813, "train/lm_loss": 2.9476965069770813, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.194059881012105e-06, "perf/tokens_per_sec": 27237.03564939739, "train/loss_prose": 3.3762917041778566, "train/loss_math": 2.286596179008484, "train/loss_code": 2.1269209384918213} +{"step": 3806, "train/loss": 2.302386611700058, "train/lm_loss": 2.302386611700058, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.181142686110008e-06, "perf/tokens_per_sec": 27329.45476527192, "train/loss_prose": 3.3872267405192056, "train/loss_code": 1.5234723687171936, "train/loss_math": 2.163522481918335} +{"step": 3807, "train/loss": 2.178452491760254, "train/lm_loss": 2.178452491760254, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.168233688514654e-06, "perf/tokens_per_sec": 27445.50266389547, "train/loss_math": 1.7589528560638428, "train/loss_code": 1.4316237767537434, "train/loss_prose": 3.2049476305643716} +{"step": 3808, "train/loss": 2.5327788293361664, "train/lm_loss": 2.5327788293361664, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.155332894517733e-06, "perf/tokens_per_sec": 27346.899468339117, "train/loss_math": 1.7639538645744324, "train/loss_prose": 3.483659327030182, "train/loss_code": 1.3998427987098694} +{"step": 3809, "train/loss": 2.3116888999938965, "train/lm_loss": 2.3116888999938965, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.142440310406924e-06, "perf/tokens_per_sec": 27408.109837480475, "train/loss_prose": 3.424556255340576, "train/loss_code": 1.01814866065979, "train/loss_math": 1.7408400774002075} +{"step": 3810, "train/loss": 2.3663275241851807, "train/lm_loss": 2.3663275241851807, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.129555942465908e-06, "perf/tokens_per_sec": 27422.10925830451, "train/loss_code": 1.4304876327514648, "train/loss_prose": 3.3679651021957397, "train/loss_math": 2.333428680896759} +{"step": 3811, "train/loss": 2.2934639155864716, "train/lm_loss": 2.2934639155864716, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.116679796974388e-06, "perf/tokens_per_sec": 27330.672126479294, "train/loss_prose": 3.1430304050445557, "train/loss_code": 1.4971681833267212, "train/loss_math": 2.2135576009750366} +{"step": 3812, "train/loss": 2.2280957400798798, "train/lm_loss": 2.2280957400798798, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.103811880208037e-06, "perf/tokens_per_sec": 27384.954712463757, "train/loss_math": 2.3549749851226807, "train/loss_code": 1.3541325330734253, "train/loss_prose": 3.3487216234207153} +{"step": 3813, "train/loss": 2.3122871816158295, "train/lm_loss": 2.3122871816158295, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.09095219843852e-06, "perf/tokens_per_sec": 27425.961606554185, "train/loss_code": 1.4148176908493042, "train/loss_prose": 3.223552385965983, "train/loss_math": 2.291593551635742} +{"step": 3814, "train/loss": 2.1568510234355927, "train/lm_loss": 2.1568510234355927, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.078100757933485e-06, "perf/tokens_per_sec": 27507.508076189013, "train/loss_prose": 3.301388740539551, "train/loss_math": 2.125523845354716, "train/loss_code": 1.4251529773076375} +{"step": 3815, "train/loss": 2.114953875541687, "train/lm_loss": 2.114953875541687, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.06525756495657e-06, "perf/tokens_per_sec": 27508.74135181354, "train/loss_code": 2.0081950426101685, "train/loss_math": 1.9296501636505128, "train/loss_prose": 3.2549893856048584} +{"step": 3816, "train/loss": 2.455674409866333, "train/lm_loss": 2.455674409866333, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.052422625767387e-06, "perf/tokens_per_sec": 27448.528319491797, "train/loss_math": 2.13374400138855, "train/loss_prose": 3.135773181915283, "train/loss_code": 1.4174067378044128} +{"step": 3817, "train/loss": 1.813841700553894, "train/lm_loss": 1.813841700553894, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.03959594662155e-06, "perf/tokens_per_sec": 27383.950753144076, "train/loss_code": 1.364258239666621, "train/loss_prose": 3.1625925302505493} +{"step": 3818, "train/loss": 2.746095836162567, "train/lm_loss": 2.746095836162567, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.026777533770605e-06, "perf/tokens_per_sec": 27443.442091979363, "train/loss_math": 2.1240153312683105, "train/loss_prose": 3.3475308418273926, "train/loss_code": 0.9830830097198486} +{"step": 3819, "train/loss": 2.2881148904561996, "train/lm_loss": 2.2881148904561996, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.013967393462094e-06, "perf/tokens_per_sec": 27202.57711387643, "train/loss_math": 2.254148244857788, "train/loss_prose": 3.687903960545858, "train/loss_code": 0.9109703898429871} +{"step": 3820, "train/loss": 2.5677353143692017, "train/lm_loss": 2.5677353143692017, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.00116553193952e-06, "perf/tokens_per_sec": 27332.58534179515, "train/loss_prose": 3.3555509448051453, "train/loss_code": 1.685552954673767, "train/loss_math": 1.8742862939834595} +{"step": 3821, "train/loss": 2.0255167484283447, "train/lm_loss": 2.0255167484283447, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.98837195544237e-06, "perf/tokens_per_sec": 27405.311668402774, "train/loss_prose": 2.6981701254844666, "train/loss_math": 1.92605562210083, "train/loss_code": 1.1775157451629639} +{"step": 3822, "train/loss": 2.626530349254608, "train/lm_loss": 2.626530349254608, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.975586670206067e-06, "perf/tokens_per_sec": 27491.353573823166, "train/loss_prose": 3.5047029654184976, "train/loss_math": 2.123659908771515, "train/loss_code": 2.0034945011138916} +{"step": 3823, "train/loss": 3.1355164647102356, "train/lm_loss": 3.1355164647102356, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.962809682462009e-06, "perf/tokens_per_sec": 27370.03386071765, "train/loss_math": 2.1724648475646973, "train/loss_prose": 3.456533511479696} +{"step": 3824, "train/loss": 2.7817142605781555, "train/lm_loss": 2.7817142605781555, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.950040998437542e-06, "perf/tokens_per_sec": 27445.15190639822, "train/loss_prose": 3.508798837661743, "train/loss_math": 2.3317458629608154, "train/loss_code": 1.7775132656097412} +{"step": 3825, "train/loss": 2.1317502856254578, "train/lm_loss": 2.1317502856254578, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.937280624355956e-06, "perf/tokens_per_sec": 27471.966841499296, "train/loss_prose": 3.0118110179901123, "train/loss_math": 2.1810237169265747, "train/loss_code": 1.772698998451233} +{"step": 3826, "train/loss": 2.1688574254512787, "train/lm_loss": 2.1688574254512787, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.924528566436531e-06, "perf/tokens_per_sec": 27396.177875584046, "train/loss_math": 2.095459532737732, "train/loss_code": 1.7871726751327515, "train/loss_prose": 3.2992160320281982} +{"step": 3827, "train/loss": 1.7816492319107056, "train/lm_loss": 1.7816492319107056, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.91178483089444e-06, "perf/tokens_per_sec": 27515.746693040128, "train/loss_math": 1.98952317237854, "train/loss_code": 1.1580272912979126} +{"step": 3828, "train/loss": 2.430599272251129, "train/lm_loss": 2.430599272251129, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.899049423940833e-06, "perf/tokens_per_sec": 27458.57477084382, "train/loss_code": 2.2119380235671997, "train/loss_math": 2.1377079486846924, "train/loss_prose": 3.235043168067932} +{"step": 3829, "train/loss": 2.5994082391262054, "train/lm_loss": 2.5994082391262054, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.886322351782783e-06, "perf/tokens_per_sec": 27298.79567220221, "train/loss_prose": 3.461636225382487, "train/loss_math": 2.0727712710698447, "train/loss_code": 2.09602153301239} +{"step": 3830, "train/loss": 2.0544708967208862, "train/lm_loss": 2.0544708967208862, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.873603620623326e-06, "perf/tokens_per_sec": 27307.21355261924, "train/loss_code": 1.4440443068742752, "train/loss_prose": 3.4109954833984375, "train/loss_math": 1.9187996983528137} +{"step": 3831, "train/loss": 2.226116895675659, "train/lm_loss": 2.226116895675659, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.860893236661412e-06, "perf/tokens_per_sec": 26734.934926859634, "train/loss_code": 1.6925427516301472, "train/loss_math": 2.1083810329437256, "train/loss_prose": 3.203081965446472} +{"step": 3832, "train/loss": 2.3799945414066315, "train/lm_loss": 2.3799945414066315, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.848191206091926e-06, "perf/tokens_per_sec": 27115.20607082205, "train/loss_code": 1.7818654378255208, "train/loss_math": 1.8575642108917236, "train/loss_prose": 3.3264107704162598} +{"step": 3833, "train/loss": 1.6241907477378845, "train/lm_loss": 1.6241907477378845, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.835497535105685e-06, "perf/tokens_per_sec": 27065.823676946926, "train/loss_code": 1.294948434829712, "train/loss_math": 2.1729281743367515} +{"step": 3834, "train/loss": 2.440830886363983, "train/lm_loss": 2.440830886363983, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.822812229889428e-06, "perf/tokens_per_sec": 26947.588555187293, "train/loss_prose": 3.161749243736267, "train/loss_math": 2.184958577156067, "train/loss_code": 1.2548665404319763} +{"step": 3835, "train/loss": 1.8132863640785217, "train/lm_loss": 1.8132863640785217, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.810135296625818e-06, "perf/tokens_per_sec": 27005.366821500713, "train/loss_math": 2.0031492710113525, "train/loss_code": 1.3595038175582885, "train/loss_prose": 3.7024729251861572} +{"step": 3836, "train/loss": 2.3776788115501404, "train/lm_loss": 2.3776788115501404, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.797466741493441e-06, "perf/tokens_per_sec": 27090.748681720703, "train/loss_math": 2.11018168926239, "train/loss_code": 1.8978280425071716, "train/loss_prose": 3.3925236463546753} +{"step": 3837, "train/loss": 2.3097143173217773, "train/lm_loss": 2.3097143173217773, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.784806570666795e-06, "perf/tokens_per_sec": 27013.264878242837, "train/loss_math": 1.9424627423286438, "train/loss_prose": 3.268978714942932, "train/loss_code": 2.0849525332450867} +{"step": 3838, "train/loss": 2.043629616498947, "train/lm_loss": 2.043629616498947, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.772154790316294e-06, "perf/tokens_per_sec": 26909.136620512498, "train/loss_math": 1.8506234486897786, "train/loss_code": 1.3625206748644512, "train/loss_prose": 3.354801654815674} +{"step": 3839, "train/loss": 2.1348599791526794, "train/lm_loss": 2.1348599791526794, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.759511406608255e-06, "perf/tokens_per_sec": 26934.618278052665, "train/loss_prose": 3.2317116260528564, "train/loss_math": 2.2886147499084473, "train/loss_code": 1.5642362435658772} +{"step": 3840, "train/loss": 2.6083051562309265, "train/lm_loss": 2.6083051562309265, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.746876425704922e-06, "perf/tokens_per_sec": 27022.740145590447, "train/loss_code": 0.8306542038917542, "train/loss_prose": 3.047517919540405, "train/loss_math": 2.3990989923477173} +{"step": 3841, "train/loss": 1.915947288274765, "train/lm_loss": 1.915947288274765, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.734249853764427e-06, "perf/tokens_per_sec": 26840.190576177978, "train/loss_code": 1.5050409317016602, "train/loss_prose": 3.7076923847198486, "train/loss_math": 2.0473406314849854} +{"step": 3842, "train/loss": 2.3468498587608337, "train/lm_loss": 2.3468498587608337, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.721631696940808e-06, "perf/tokens_per_sec": 26966.62295256946, "train/loss_code": 1.6274643739064534, "train/loss_prose": 3.2054924964904785, "train/loss_math": 2.137964129447937} +{"step": 3843, "train/loss": 2.139276623725891, "train/lm_loss": 2.139276623725891, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.709021961384e-06, "perf/tokens_per_sec": 26568.560993715048, "train/loss_prose": 3.4592775106430054, "train/loss_code": 1.5542211830615997, "train/loss_math": 1.9893866777420044} +{"step": 3844, "train/loss": 1.794309377670288, "train/lm_loss": 1.794309377670288, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.696420653239833e-06, "perf/tokens_per_sec": 26512.348373058434, "train/loss_math": 2.005916118621826, "train/loss_code": 1.3288359999656678, "train/loss_prose": 3.698462963104248} +{"step": 3845, "train/loss": 1.946611076593399, "train/lm_loss": 1.946611076593399, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.683827778650032e-06, "perf/tokens_per_sec": 26831.63906016705, "train/loss_math": 2.177245706319809, "train/loss_code": 1.2162928978602092, "train/loss_prose": 3.2150275707244873} +{"step": 3846, "train/loss": 2.1867733001708984, "train/lm_loss": 2.1867733001708984, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.671243343752216e-06, "perf/tokens_per_sec": 27134.94951881692, "train/loss_prose": 3.4066027800242105, "train/loss_code": 1.2256225794553757, "train/loss_math": 2.37188720703125} +{"step": 3847, "train/loss": 2.4812260568141937, "train/lm_loss": 2.4812260568141937, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.65866735467988e-06, "perf/tokens_per_sec": 26970.644763447748, "train/loss_code": 1.111815631389618, "train/loss_math": 2.175810972849528, "train/loss_prose": 3.6995811462402344} +{"step": 3848, "train/loss": 1.9070693850517273, "train/lm_loss": 1.9070693850517273, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.646099817562404e-06, "perf/tokens_per_sec": 27192.114517977323, "train/loss_prose": 3.5932942628860474, "train/loss_code": 1.2456496238708497, "train/loss_math": 1.8417185544967651} +{"step": 3849, "train/loss": 2.1832558810710907, "train/lm_loss": 2.1832558810710907, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.633540738525066e-06, "perf/tokens_per_sec": 27196.246927338925, "train/loss_math": 2.2851258913675943, "train/loss_code": 1.3756365577379863, "train/loss_prose": 3.24187970161438} +{"step": 3850, "train/loss": 1.866262286901474, "train/lm_loss": 1.866262286901474, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.620990123689004e-06, "perf/tokens_per_sec": 27439.321876237813, "train/loss_code": 1.0853037387132645, "train/loss_math": 1.9554778337478638, "train/loss_prose": 3.3389638662338257} +{"step": 3851, "train/loss": 2.2722666561603546, "train/lm_loss": 2.2722666561603546, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.608447979171229e-06, "perf/tokens_per_sec": 26903.068639364596, "train/loss_code": 1.707071453332901, "train/loss_math": 2.1218103766441345, "train/loss_prose": 3.553113579750061} +{"step": 3852, "train/loss": 2.4213241934776306, "train/lm_loss": 2.4213241934776306, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.595914311084645e-06, "perf/tokens_per_sec": 27031.881788901108, "train/loss_code": 1.770676851272583, "train/loss_math": 2.0734370946884155, "train/loss_prose": 3.3038962682088218} +{"step": 3853, "train/loss": 1.9918819665908813, "train/lm_loss": 1.9918819665908813, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.5833891255379815e-06, "perf/tokens_per_sec": 27067.358823269915, "train/loss_prose": 3.3084813356399536, "train/loss_code": 1.2641167342662811, "train/loss_math": 2.1308130025863647} +{"step": 3854, "train/loss": 2.1726627349853516, "train/lm_loss": 2.1726627349853516, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.570872428635889e-06, "perf/tokens_per_sec": 27101.860205079665, "train/loss_code": 1.5432199537754059, "train/loss_prose": 3.44219970703125, "train/loss_math": 2.1620112657546997} +{"step": 3855, "train/loss": 2.6579940021038055, "train/lm_loss": 2.6579940021038055, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.558364226478842e-06, "perf/tokens_per_sec": 27118.97266614049, "train/loss_math": 2.295896291732788, "train/loss_code": 1.5682976245880127, "train/loss_prose": 3.5040228366851807} +{"step": 3856, "train/loss": 2.8526211380958557, "train/lm_loss": 2.8526211380958557, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.545864525163188e-06, "perf/tokens_per_sec": 26973.481919936192, "train/loss_prose": 3.474273681640625, "train/loss_math": 2.1119001507759094, "train/loss_code": 1.2258009910583496} +{"step": 3857, "train/loss": 1.7379897832870483, "train/lm_loss": 1.7379897832870483, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.533373330781126e-06, "perf/tokens_per_sec": 27419.04566311344, "train/loss_math": 1.8700024684270222, "train/loss_code": 1.3131121397018433, "train/loss_prose": 3.041461944580078} +{"step": 3858, "train/loss": 3.109468936920166, "train/lm_loss": 3.109468936920166, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.520890649420712e-06, "perf/tokens_per_sec": 27408.328468504114, "train/loss_prose": 3.653866243362427, "train/loss_math": 2.2021396160125732} +{"step": 3859, "train/loss": 2.5852839946746826, "train/lm_loss": 2.5852839946746826, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.508416487165862e-06, "perf/tokens_per_sec": 27405.967438017055, "train/loss_prose": 3.2680928707122803, "train/loss_math": 2.0906880696614585, "train/loss_code": 1.337835431098938} +{"step": 3860, "train/loss": 1.9669877588748932, "train/lm_loss": 1.9669877588748932, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.495950850096328e-06, "perf/tokens_per_sec": 27378.01580223934, "train/loss_code": 1.1784136176109314, "train/loss_math": 2.293026924133301, "train/loss_prose": 3.775403618812561} +{"step": 3861, "train/loss": 2.3457241654396057, "train/lm_loss": 2.3457241654396057, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.483493744287715e-06, "perf/tokens_per_sec": 27279.11636584781, "train/loss_math": 2.1446319222450256, "train/loss_prose": 3.765749931335449, "train/loss_code": 1.327882707118988} +{"step": 3862, "train/loss": 2.1497986912727356, "train/lm_loss": 2.1497986912727356, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.471045175811442e-06, "perf/tokens_per_sec": 27332.715798048193, "train/loss_prose": 3.0312968095143638, "train/loss_code": 1.2460904121398926, "train/loss_math": 2.1831135749816895} +{"step": 3863, "train/loss": 2.396859884262085, "train/lm_loss": 2.396859884262085, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.458605150734816e-06, "perf/tokens_per_sec": 27397.226422737, "train/loss_code": 1.3784383833408356, "train/loss_prose": 3.4152815341949463} +{"step": 3864, "train/loss": 2.2416398227214813, "train/lm_loss": 2.2416398227214813, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.4461736751209405e-06, "perf/tokens_per_sec": 27289.862777646285, "train/loss_math": 1.9881706635157268, "train/loss_prose": 3.164922555287679, "train/loss_code": 1.2369197010993958} +{"step": 3865, "train/loss": 2.428229331970215, "train/lm_loss": 2.428229331970215, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.433750755028773e-06, "perf/tokens_per_sec": 27413.532916435957, "train/loss_math": 2.3102640509605408, "train/loss_prose": 3.2757810751597085, "train/loss_code": 1.659320871035258} +{"step": 3866, "train/loss": 2.7741119265556335, "train/lm_loss": 2.7741119265556335, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.42133639651309e-06, "perf/tokens_per_sec": 26860.459295462613, "train/loss_code": 2.192106246948242, "train/loss_prose": 3.346090257167816, "train/loss_math": 2.2054762840270996} +{"step": 3867, "train/loss": 2.687255948781967, "train/lm_loss": 2.687255948781967, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.408930605624498e-06, "perf/tokens_per_sec": 27022.187593292532, "train/loss_code": 1.4853207170963287, "train/loss_prose": 3.5610563158988953, "train/loss_math": 2.1415905952453613} +{"step": 3868, "train/loss": 2.360345035791397, "train/lm_loss": 2.360345035791397, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.396533388409421e-06, "perf/tokens_per_sec": 27118.330558866008, "train/loss_prose": 3.2447166442871094, "train/loss_math": 2.111425995826721, "train/loss_code": 1.641919493675232} +{"step": 3869, "train/loss": 2.226730167865753, "train/lm_loss": 2.226730167865753, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.384144750910133e-06, "perf/tokens_per_sec": 27187.208912661612, "train/loss_prose": 3.242111921310425, "train/loss_math": 2.111884504556656, "train/loss_code": 1.4410396814346313} +{"step": 3870, "train/loss": 3.04691618680954, "train/lm_loss": 3.04691618680954, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.371764699164696e-06, "perf/tokens_per_sec": 27105.195439729134, "train/loss_prose": 3.3782353003819785, "train/loss_math": 2.052958607673645} +{"step": 3871, "train/loss": 2.9615708589553833, "train/lm_loss": 2.9615708589553833, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.359393239206991e-06, "perf/tokens_per_sec": 27079.859688059078, "train/loss_code": 1.1916214227676392, "train/loss_math": 2.266572117805481, "train/loss_prose": 3.593560314178467} +{"step": 3872, "train/loss": 2.1780766248703003, "train/lm_loss": 2.1780766248703003, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.34703037706671e-06, "perf/tokens_per_sec": 27148.585408867446, "train/loss_code": 1.4299011826515198, "train/loss_prose": 3.453732967376709, "train/loss_math": 2.075814406077067} +{"step": 3873, "train/loss": 3.1266764402389526, "train/lm_loss": 3.1266764402389526, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.334676118769382e-06, "perf/tokens_per_sec": 27115.67683801653, "train/loss_code": 1.9525203704833984, "train/loss_prose": 3.508625070254008, "train/loss_math": 2.0091402530670166} +{"step": 3874, "train/loss": 2.3760887384414673, "train/lm_loss": 2.3760887384414673, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.3223304703363135e-06, "perf/tokens_per_sec": 26975.472520294566, "train/loss_math": 2.3885692358016968, "train/loss_prose": 3.156805455684662, "train/loss_code": 0.802174985408783} +{"step": 3875, "train/loss": 2.259966880083084, "train/lm_loss": 2.259966880083084, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.309993437784624e-06, "perf/tokens_per_sec": 27439.05892574787, "train/loss_math": 2.111191725730896, "train/loss_code": 1.146573543548584, "train/loss_prose": 3.1886008977890015} +{"step": 3876, "train/loss": 2.354944407939911, "train/lm_loss": 2.354944407939911, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.297665027127229e-06, "perf/tokens_per_sec": 27376.183469631007, "train/loss_code": 1.5137838870286942, "train/loss_prose": 3.196104943752289} +{"step": 3877, "train/loss": 2.3290925920009613, "train/lm_loss": 2.3290925920009613, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.285345244372843e-06, "perf/tokens_per_sec": 27436.079172375907, "train/loss_math": 2.187852644920349, "train/loss_code": 1.1631202697753906, "train/loss_prose": 3.2651782035827637} +{"step": 3878, "train/loss": 2.3288999497890472, "train/lm_loss": 2.3288999497890472, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.273034095525993e-06, "perf/tokens_per_sec": 27409.20302747961, "train/loss_code": 1.6372350215911866, "train/loss_prose": 3.481674591700236} +{"step": 3879, "train/loss": 2.802454113960266, "train/lm_loss": 2.802454113960266, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.260731586586983e-06, "perf/tokens_per_sec": 27449.931748532425, "train/loss_prose": 3.2856392065684, "train/loss_code": 1.3528990149497986} +{"step": 3880, "train/loss": 1.7429361939430237, "train/lm_loss": 1.7429361939430237, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.24843772355189e-06, "perf/tokens_per_sec": 27466.213237280794, "train/loss_code": 1.3846285104751588, "train/loss_math": 2.0824588537216187, "train/loss_prose": 2.8554296493530273} +{"step": 3881, "train/loss": 2.198453664779663, "train/lm_loss": 2.198453664779663, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.236152512412595e-06, "perf/tokens_per_sec": 27296.27999396236, "train/loss_math": 2.0644360184669495, "train/loss_code": 1.2032467126846313, "train/loss_prose": 3.2830055554707847} +{"step": 3882, "train/loss": 2.8585769534111023, "train/lm_loss": 2.8585769534111023, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.223875959156776e-06, "perf/tokens_per_sec": 27491.837492878953, "train/loss_prose": 3.4134809017181396, "train/loss_math": 1.9337369600931804} +{"step": 3883, "train/loss": 2.3590894639492035, "train/lm_loss": 2.3590894639492035, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.211608069767867e-06, "perf/tokens_per_sec": 26104.581987701276, "train/loss_prose": 3.36826753616333, "train/loss_math": 1.9665991961956024, "train/loss_code": 0.9015161395072937} +{"step": 3884, "train/loss": 2.266443580389023, "train/lm_loss": 2.266443580389023, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.19934885022509e-06, "perf/tokens_per_sec": 27017.25812686256, "train/loss_math": 2.2890920639038086, "train/loss_code": 1.742405891418457, "train/loss_prose": 3.291871190071106} +{"step": 3885, "train/loss": 2.716895580291748, "train/lm_loss": 2.716895580291748, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.187098306503437e-06, "perf/tokens_per_sec": 27009.442661094945, "train/loss_code": 1.8545175790786743, "train/loss_prose": 3.772411823272705, "train/loss_math": 2.2362977663675943} +{"step": 3886, "train/loss": 2.4499656558036804, "train/lm_loss": 2.4499656558036804, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.174856444573677e-06, "perf/tokens_per_sec": 27142.151678070775, "train/loss_prose": 3.307810664176941, "train/loss_math": 1.7155203223228455, "train/loss_code": 1.4687209129333496} +{"step": 3887, "train/loss": 2.757362186908722, "train/lm_loss": 2.757362186908722, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.162623270402335e-06, "perf/tokens_per_sec": 26962.85623885694, "train/loss_math": 2.1773358980814614, "train/loss_code": 1.7201216220855713, "train/loss_prose": 3.451692283153534} +{"step": 3888, "train/loss": 2.013982266187668, "train/lm_loss": 2.013982266187668, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.150398789951737e-06, "perf/tokens_per_sec": 26987.676660665868, "train/loss_prose": 2.9259650707244873, "train/loss_code": 1.2949755589167278, "train/loss_math": 2.1250004371007285} +{"step": 3889, "train/loss": 2.4664233028888702, "train/lm_loss": 2.4664233028888702, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.138183009179922e-06, "perf/tokens_per_sec": 26651.15755638981, "train/loss_math": 2.020703613758087, "train/loss_code": 0.5269023776054382, "train/loss_prose": 3.7072229385375977} +{"step": 3890, "train/loss": 2.274338036775589, "train/lm_loss": 2.274338036775589, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.125975934040719e-06, "perf/tokens_per_sec": 27121.798299428196, "train/loss_math": 2.0998102724552155, "train/loss_prose": 3.141124963760376, "train/loss_code": 1.7566068172454834} +{"step": 3891, "train/loss": 1.9733197391033173, "train/lm_loss": 1.9733197391033173, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.113777570483701e-06, "perf/tokens_per_sec": 27076.95743463576, "train/loss_prose": 3.2012572288513184, "train/loss_math": 2.0953599214553833, "train/loss_code": 1.2983308285474777} +{"step": 3892, "train/loss": 2.0547222793102264, "train/lm_loss": 2.0547222793102264, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.10158792445422e-06, "perf/tokens_per_sec": 26776.896231723225, "train/loss_math": 2.2642269134521484, "train/loss_code": 1.0760093728701274, "train/loss_prose": 3.208534836769104} +{"step": 3893, "train/loss": 2.446879416704178, "train/lm_loss": 2.446879416704178, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.089407001893353e-06, "perf/tokens_per_sec": 27135.463832005244, "train/loss_code": 1.099994421005249, "train/loss_math": 2.1947230398654938, "train/loss_prose": 3.232049544652303} +{"step": 3894, "train/loss": 2.123564660549164, "train/lm_loss": 2.123564660549164, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.0772348087379315e-06, "perf/tokens_per_sec": 27496.06150970212, "train/loss_code": 1.5959320068359375, "train/loss_prose": 3.1147286891937256, "train/loss_math": 1.990421175956726} +{"step": 3895, "train/loss": 2.3500216901302338, "train/lm_loss": 2.3500216901302338, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.065071350920538e-06, "perf/tokens_per_sec": 27409.596597227428, "train/loss_prose": 3.4719625314076743, "train/loss_code": 1.5277204811573029, "train/loss_math": 2.2734031677246094} +{"step": 3896, "train/loss": 1.939946323633194, "train/lm_loss": 1.939946323633194, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.052916634369483e-06, "perf/tokens_per_sec": 27490.649721971084, "train/loss_code": 1.2882405320803325, "train/loss_math": 2.330969762802124} +{"step": 3897, "train/loss": 2.8350375294685364, "train/lm_loss": 2.8350375294685364, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.0407706650088525e-06, "perf/tokens_per_sec": 27407.36651808932, "train/loss_code": 2.367506742477417, "train/loss_prose": 3.220143508911133, "train/loss_math": 2.106037974357605} +{"step": 3898, "train/loss": 2.28059384226799, "train/lm_loss": 2.28059384226799, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.028633448758421e-06, "perf/tokens_per_sec": 27310.252206853685, "train/loss_math": 1.9943450093269348, "train/loss_prose": 3.4593504667282104, "train/loss_code": 1.6743348836898804} +{"step": 3899, "train/loss": 2.741575926542282, "train/lm_loss": 2.741575926542282, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.016504991533726e-06, "perf/tokens_per_sec": 27013.647159457207, "train/loss_math": 2.0471253395080566, "train/loss_prose": 3.3491400241851808, "train/loss_code": 1.5698912143707275} +{"step": 3900, "train/loss": 2.360383242368698, "train/lm_loss": 2.360383242368698, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.0043852992460355e-06, "perf/tokens_per_sec": 27072.690544814468, "train/loss_code": 1.4447120875120163, "train/loss_prose": 3.5969014167785645, "train/loss_math": 2.3135128021240234} +{"step": 3901, "train/loss": 2.210962414741516, "train/lm_loss": 2.210962414741516, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.992274377802327e-06, "perf/tokens_per_sec": 27387.224804916346, "train/loss_prose": 2.9420498609542847, "train/loss_code": 1.6394705176353455, "train/loss_math": 2.1311645209789276} +{"step": 3902, "train/loss": 2.967316061258316, "train/lm_loss": 2.967316061258316, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.980172233105334e-06, "perf/tokens_per_sec": 27382.03034041319, "train/loss_prose": 3.6339572429656983, "train/loss_code": 1.4050925970077515, "train/loss_math": 2.0818243622779846} +{"step": 3903, "train/loss": 2.9979318380355835, "train/lm_loss": 2.9979318380355835, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.968078871053488e-06, "perf/tokens_per_sec": 27274.612324471927, "train/loss_math": 2.1751600205898285, "train/loss_prose": 3.820703864097595} +{"step": 3904, "train/loss": 2.2200519144535065, "train/lm_loss": 2.2200519144535065, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.9559942975409465e-06, "perf/tokens_per_sec": 27236.949286253322, "train/loss_prose": 3.137346347173055, "train/loss_code": 1.617851972579956, "train/loss_math": 1.8769690990447998} +{"step": 3905, "train/loss": 2.300849676132202, "train/lm_loss": 2.300849676132202, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.943918518457584e-06, "perf/tokens_per_sec": 27266.86365829503, "train/loss_code": 1.8072842359542847, "train/loss_prose": 3.113796353340149, "train/loss_math": 2.475034236907959} +{"step": 3906, "train/loss": 2.665407657623291, "train/lm_loss": 2.665407657623291, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.931851539688985e-06, "perf/tokens_per_sec": 27307.60419885428, "train/loss_math": 1.9953622817993164, "train/loss_prose": 3.425436019897461, "train/loss_code": 1.1003599762916565} +{"step": 3907, "train/loss": 2.608986049890518, "train/lm_loss": 2.608986049890518, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.919793367116453e-06, "perf/tokens_per_sec": 27194.00838303638, "train/loss_math": 2.0709451834360757, "train/loss_prose": 3.6895009676615396, "train/loss_code": 1.7952748537063599} +{"step": 3908, "train/loss": 1.9266103506088257, "train/lm_loss": 1.9266103506088257, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.907744006616992e-06, "perf/tokens_per_sec": 27265.998158975628, "train/loss_math": 2.1618236899375916, "train/loss_prose": 3.2768795490264893, "train/loss_code": 1.1629028518994649} +{"step": 3909, "train/loss": 2.341188967227936, "train/lm_loss": 2.341188967227936, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.895703464063319e-06, "perf/tokens_per_sec": 27347.595978376452, "train/loss_prose": 4.065357208251953, "train/loss_math": 2.076319992542267, "train/loss_code": 2.2062342166900635} +{"step": 3910, "train/loss": 2.0745125710964203, "train/lm_loss": 2.0745125710964203, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.883671745323833e-06, "perf/tokens_per_sec": 27443.924325757707, "train/loss_math": 2.2273531436920164, "train/loss_code": 1.177039623260498, "train/loss_prose": 3.105255603790283} +{"step": 3911, "train/loss": 2.1754122972488403, "train/lm_loss": 2.1754122972488403, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.871648856262666e-06, "perf/tokens_per_sec": 27378.975693404136, "train/loss_math": 1.91881263256073, "train/loss_prose": 3.53677761554718, "train/loss_code": 1.5244348049163818} +{"step": 3912, "train/loss": 2.0839481949806213, "train/lm_loss": 2.0839481949806213, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.85963480273962e-06, "perf/tokens_per_sec": 27314.160041845993, "train/loss_code": 1.423108200232188, "train/loss_prose": 3.1597484350204468, "train/loss_math": 2.027587890625} +{"step": 3913, "train/loss": 2.298252046108246, "train/lm_loss": 2.298252046108246, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.847629590610202e-06, "perf/tokens_per_sec": 27501.95970403952, "train/loss_code": 1.3009082674980164, "train/loss_prose": 3.5974793434143066, "train/loss_math": 2.389944553375244} +{"step": 3914, "train/loss": 2.2713186144828796, "train/lm_loss": 2.2713186144828796, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.835633225725605e-06, "perf/tokens_per_sec": 27463.973936202525, "train/loss_code": 1.4569199681282043, "train/loss_prose": 3.7559365034103394, "train/loss_math": 2.0959717830022178} +{"step": 3915, "train/loss": 2.2716715037822723, "train/lm_loss": 2.2716715037822723, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.823645713932708e-06, "perf/tokens_per_sec": 27267.426262318448, "train/loss_code": 1.5850034356117249, "train/loss_math": 2.1307458579540253, "train/loss_prose": 3.2401905059814453} +{"step": 3916, "train/loss": 2.0624756813049316, "train/lm_loss": 2.0624756813049316, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.811667061074081e-06, "perf/tokens_per_sec": 27468.672498341155, "train/loss_code": 1.7202801704406738, "train/loss_math": 2.3862568140029907, "train/loss_prose": 3.125891923904419} +{"step": 3917, "train/loss": 2.271633744239807, "train/lm_loss": 2.271633744239807, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.7996972729879756e-06, "perf/tokens_per_sec": 27550.42935057146, "train/loss_prose": 3.7739243507385254, "train/loss_code": 1.6633582413196564, "train/loss_math": 1.985893726348877} +{"step": 3918, "train/loss": 2.7121875882148743, "train/lm_loss": 2.7121875882148743, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.787736355508317e-06, "perf/tokens_per_sec": 27500.991170161677, "train/loss_math": 2.125595211982727, "train/loss_prose": 3.298780083656311} +{"step": 3919, "train/loss": 2.0108025670051575, "train/lm_loss": 2.0108025670051575, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.775784314464717e-06, "perf/tokens_per_sec": 27529.900430097878, "train/loss_code": 1.509885311126709, "train/loss_prose": 2.8746273517608643, "train/loss_math": 2.170534372329712} +{"step": 3920, "train/loss": 2.654846429824829, "train/lm_loss": 2.654846429824829, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.763841155682443e-06, "perf/tokens_per_sec": 27348.16191890683, "train/loss_math": 2.005582094192505, "train/loss_code": 0.6207295656204224, "train/loss_prose": 3.3213754653930665} +{"step": 3921, "train/loss": 2.217654377222061, "train/lm_loss": 2.217654377222061, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.751906884982462e-06, "perf/tokens_per_sec": 27443.442091979363, "train/loss_prose": 3.419593930244446, "train/loss_code": 1.5711871981620789, "train/loss_math": 2.308648943901062} +{"step": 3922, "train/loss": 2.7410717606544495, "train/lm_loss": 2.7410717606544495, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.739981508181384e-06, "perf/tokens_per_sec": 27511.164179486892, "train/loss_prose": 3.615561544895172, "train/loss_math": 1.963908592859904, "train/loss_code": 1.5746023654937744} +{"step": 3923, "train/loss": 2.353765517473221, "train/lm_loss": 2.353765517473221, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.7280650310915015e-06, "perf/tokens_per_sec": 27464.23736443626, "train/loss_math": 2.0821624199549356, "train/loss_prose": 3.5320681730906167, "train/loss_code": 0.9937163889408112} +{"step": 3924, "train/loss": 2.712628424167633, "train/lm_loss": 2.712628424167633, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.716157459520739e-06, "perf/tokens_per_sec": 27375.703613524784, "train/loss_prose": 3.5638310313224792, "train/loss_code": 1.8544455766677856, "train/loss_math": 1.882366418838501} +{"step": 3925, "train/loss": 1.881157010793686, "train/lm_loss": 1.881157010793686, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.704258799272722e-06, "perf/tokens_per_sec": 27388.70929963763, "train/loss_math": 1.9387641251087189, "train/loss_code": 1.2854154109954834, "train/loss_prose": 3.437953472137451} +{"step": 3926, "train/loss": 2.5143452882766724, "train/lm_loss": 2.5143452882766724, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.692369056146708e-06, "perf/tokens_per_sec": 27490.781691448028, "train/loss_prose": 3.324033260345459, "train/loss_code": 1.3025553524494171, "train/loss_math": 2.106758952140808} +{"step": 3927, "train/loss": 1.7309992909431458, "train/lm_loss": 1.7309992909431458, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.6804882359376126e-06, "perf/tokens_per_sec": 27400.154041712984, "train/loss_code": 1.187173455953598, "train/loss_math": 1.8987127939860027, "train/loss_prose": 3.40316104888916} +{"step": 3928, "train/loss": 2.562864899635315, "train/lm_loss": 2.562864899635315, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.668616344436004e-06, "perf/tokens_per_sec": 27334.107408904176, "train/loss_prose": 3.6791654427846274, "train/loss_math": 2.196056842803955, "train/loss_code": 1.6911030213038127} +{"step": 3929, "train/loss": 2.662588119506836, "train/lm_loss": 2.662588119506836, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.656753387428089e-06, "perf/tokens_per_sec": 27205.76485362142, "train/loss_prose": 3.1596750259399413, "train/loss_code": 1.591441124677658, "train/loss_math": 2.3194477558135986} +{"step": 3930, "train/loss": 2.4079076051712036, "train/lm_loss": 2.4079076051712036, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.6448993706957424e-06, "perf/tokens_per_sec": 27389.669940804062, "train/loss_math": 2.257114839553833, "train/loss_prose": 3.363545536994934, "train/loss_code": 1.2505948543548584} +{"step": 3931, "train/loss": 1.9035634398460388, "train/lm_loss": 1.9035634398460388, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.6330543000164645e-06, "perf/tokens_per_sec": 27259.81143937026, "train/loss_prose": 3.54632031917572, "train/loss_math": 1.981082022190094, "train/loss_code": 1.043425589799881} +{"step": 3932, "train/loss": 2.1312217712402344, "train/lm_loss": 2.1312217712402344, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.621218181163402e-06, "perf/tokens_per_sec": 27398.23135574434, "train/loss_code": 1.6493866046269734, "train/loss_math": 2.165775239467621, "train/loss_prose": 3.4385128021240234} +{"step": 3933, "train/loss": 2.5730921626091003, "train/lm_loss": 2.5730921626091003, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.609391019905317e-06, "perf/tokens_per_sec": 27451.247593603395, "train/loss_prose": 3.710411012172699, "train/loss_math": 2.074845552444458, "train/loss_code": 1.2227493127187092} +{"step": 3934, "train/loss": 2.3210207521915436, "train/lm_loss": 2.3210207521915436, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.5975728220066425e-06, "perf/tokens_per_sec": 27368.98739392047, "train/loss_prose": 3.6366573572158813, "train/loss_code": 1.7128545641899109, "train/loss_math": 2.221716821193695} +{"step": 3935, "train/loss": 2.7331628799438477, "train/lm_loss": 2.7331628799438477, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.58576359322742e-06, "perf/tokens_per_sec": 27376.48884136654, "train/loss_prose": 3.2009761810302733, "train/loss_math": 2.290959119796753, "train/loss_code": 1.2785037755966187} +{"step": 3936, "train/loss": 2.327533036470413, "train/lm_loss": 2.327533036470413, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.5739633393233245e-06, "perf/tokens_per_sec": 27392.246538473803, "train/loss_code": 1.6789037883281708, "train/loss_math": 2.292300820350647, "train/loss_prose": 3.6600236892700195} +{"step": 3937, "train/loss": 1.8174428343772888, "train/lm_loss": 1.8174428343772888, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.562172066045655e-06, "perf/tokens_per_sec": 27478.777655329053, "train/loss_math": 2.019718130429586, "train/loss_code": 1.3131115287542343, "train/loss_prose": 3.2279417514801025} +{"step": 3938, "train/loss": 2.2354134619235992, "train/lm_loss": 2.2354134619235992, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.550389779141336e-06, "perf/tokens_per_sec": 27399.93554108108, "train/loss_math": 1.7716574271519978, "train/loss_prose": 3.680178642272949, "train/loss_code": 1.7359928290049236} +{"step": 3939, "train/loss": 2.7633768916130066, "train/lm_loss": 2.7633768916130066, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.538616484352902e-06, "perf/tokens_per_sec": 27416.639032782125, "train/loss_code": 1.3264409303665161, "train/loss_math": 2.401031732559204, "train/loss_prose": 3.7254823048909507} +{"step": 3940, "train/loss": 2.175507992506027, "train/lm_loss": 2.175507992506027, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.5268521874185304e-06, "perf/tokens_per_sec": 27369.728632968825, "train/loss_code": 1.493092119693756, "train/loss_math": 2.327108860015869, "train/loss_prose": 3.3887388706207275} +{"step": 3941, "train/loss": 1.7637686431407928, "train/lm_loss": 1.7637686431407928, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.515096894071995e-06, "perf/tokens_per_sec": 27258.427383028247, "train/loss_math": 2.225808064142863, "train/loss_code": 1.4865448951721192} +{"step": 3942, "train/loss": 1.8891205191612244, "train/lm_loss": 1.8891205191612244, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.503350610042669e-06, "perf/tokens_per_sec": 27414.1890795592, "train/loss_code": 1.2679579108953476, "train/loss_prose": 3.274024248123169, "train/loss_math": 2.255702575047811} +{"step": 3943, "train/loss": 2.499568521976471, "train/lm_loss": 2.499568521976471, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.4916133410555466e-06, "perf/tokens_per_sec": 27361.40290211455, "train/loss_code": 1.2456100384394329, "train/loss_math": 2.6309099197387695, "train/loss_prose": 3.4072017669677734} +{"step": 3944, "train/loss": 2.071543276309967, "train/lm_loss": 2.071543276309967, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.47988509283125e-06, "perf/tokens_per_sec": 27347.421847540943, "train/loss_math": 2.1748096545537314, "train/loss_prose": 3.102966070175171, "train/loss_code": 1.2806615630785625} +{"step": 3945, "train/loss": 2.1159130334854126, "train/lm_loss": 2.1159130334854126, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.468165871085971e-06, "perf/tokens_per_sec": 27346.638286221838, "train/loss_math": 1.9088681936264038, "train/loss_code": 1.5018510460853576, "train/loss_prose": 3.7545899152755737} +{"step": 3946, "train/loss": 2.3289718627929688, "train/lm_loss": 2.3289718627929688, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.456455681531523e-06, "perf/tokens_per_sec": 27466.74018472243, "train/loss_prose": 3.4005024433135986, "train/loss_math": 2.0887245893478394, "train/loss_code": 1.3871468305587769} +{"step": 3947, "train/loss": 2.8312538862228394, "train/lm_loss": 2.8312538862228394, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.444754529875302e-06, "perf/tokens_per_sec": 27447.914364411383, "train/loss_prose": 3.3810292720794677, "train/loss_math": 1.9149616956710815} +{"step": 3948, "train/loss": 2.0615630447864532, "train/lm_loss": 2.0615630447864532, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.433062421820308e-06, "perf/tokens_per_sec": 27334.411842668418, "train/loss_math": 2.305367887020111, "train/loss_code": 1.4870310227076213, "train/loss_prose": 2.8099400997161865} +{"step": 3949, "train/loss": 2.456126093864441, "train/lm_loss": 2.456126093864441, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.421379363065142e-06, "perf/tokens_per_sec": 27370.51351807774, "train/loss_code": 1.456652045249939, "train/loss_prose": 3.5379607677459717, "train/loss_math": 2.3325849771499634} +{"step": 3950, "train/loss": 2.4992476105690002, "train/lm_loss": 2.4992476105690002, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.409705359303986e-06, "perf/tokens_per_sec": 27370.339095274358, "train/loss_math": 2.124985138575236, "train/loss_code": 1.1212602853775024, "train/loss_prose": 3.1244412660598755} +{"step": 3951, "train/loss": 2.4714956879615784, "train/lm_loss": 2.4714956879615784, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.398040416226592e-06, "perf/tokens_per_sec": 27306.69270835817, "train/loss_math": 2.3909332752227783, "train/loss_prose": 2.9376481771469116, "train/loss_code": 2.166467010974884} +{"step": 3952, "train/loss": 2.5732975006103516, "train/lm_loss": 2.5732975006103516, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.38638453951832e-06, "perf/tokens_per_sec": 27357.481538336597, "train/loss_math": 2.0777453978856406, "train/loss_prose": 4.058442989985148, "train/loss_code": 1.0889070630073547} +{"step": 3953, "train/loss": 2.192343980073929, "train/lm_loss": 2.192343980073929, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.374737734860098e-06, "perf/tokens_per_sec": 27217.91957819825, "train/loss_prose": 3.314876675605774, "train/loss_code": 1.7227883040904999, "train/loss_math": 2.008922576904297} +{"step": 3954, "train/loss": 2.4517085552215576, "train/lm_loss": 2.4517085552215576, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.363100007928446e-06, "perf/tokens_per_sec": 27436.648781072083, "train/loss_math": 2.025008201599121, "train/loss_prose": 3.2895134687423706, "train/loss_code": 1.2027989625930786} +{"step": 3955, "train/loss": 2.3923031389713287, "train/lm_loss": 2.3923031389713287, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.3514713643954475e-06, "perf/tokens_per_sec": 27349.685721381313, "train/loss_prose": 3.609504143397013, "train/loss_code": 1.4513259728749592, "train/loss_math": 1.9779674410820007} +{"step": 3956, "train/loss": 2.5818578004837036, "train/lm_loss": 2.5818578004837036, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.339851809928762e-06, "perf/tokens_per_sec": 27483.17352124843, "train/loss_prose": 3.2508140087127684, "train/loss_math": 2.3018572330474854, "train/loss_code": 1.0494676530361176} +{"step": 3957, "train/loss": 1.947274997830391, "train/lm_loss": 1.947274997830391, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.3282413501916184e-06, "perf/tokens_per_sec": 27420.00843355226, "train/loss_math": 2.03928484916687, "train/loss_code": 0.9921322762966156, "train/loss_prose": 3.3975112438201904} +{"step": 3958, "train/loss": 2.5374293625354767, "train/lm_loss": 2.5374293625354767, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.316639990842804e-06, "perf/tokens_per_sec": 27305.0434992681, "train/loss_prose": 3.444793939590454, "train/loss_math": 2.0377464294433594, "train/loss_code": 1.22238290309906} +{"step": 3959, "train/loss": 2.5592969059944153, "train/lm_loss": 2.5592969059944153, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.305047737536707e-06, "perf/tokens_per_sec": 27352.95158427018, "train/loss_code": 1.6597795486450195, "train/loss_math": 1.920624852180481, "train/loss_prose": 3.328391671180725} +{"step": 3960, "train/loss": 2.6054627001285553, "train/lm_loss": 2.6054627001285553, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.2934645959232265e-06, "perf/tokens_per_sec": 27398.974179699086, "train/loss_math": 1.993508219718933, "train/loss_prose": 3.504695951938629, "train/loss_code": 1.418950617313385} +{"step": 3961, "train/loss": 2.3491814732551575, "train/lm_loss": 2.3491814732551575, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.281890571647853e-06, "perf/tokens_per_sec": 27405.61769031735, "train/loss_code": 1.6221978425979615, "train/loss_prose": 3.5608206590016684} +{"step": 3962, "train/loss": 2.7388021647930145, "train/lm_loss": 2.7388021647930145, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.270325670351615e-06, "perf/tokens_per_sec": 27454.31838941962, "train/loss_code": 1.8823112845420837, "train/loss_prose": 3.4905133843421936, "train/loss_math": 2.091870427131653} +{"step": 3963, "train/loss": 2.557302802801132, "train/lm_loss": 2.557302802801132, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.258769897671124e-06, "perf/tokens_per_sec": 27362.05657204016, "train/loss_math": 2.536557674407959, "train/loss_prose": 3.4900574684143066, "train/loss_code": 1.1892884373664856} +{"step": 3964, "train/loss": 2.1452992260456085, "train/lm_loss": 2.1452992260456085, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.247223259238511e-06, "perf/tokens_per_sec": 27502.53205157965, "train/loss_prose": 3.6166679859161377, "train/loss_code": 1.3593709170818329, "train/loss_math": 2.2457874417304993} +{"step": 3965, "train/loss": 2.6322585940361023, "train/lm_loss": 2.6322585940361023, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.235685760681473e-06, "perf/tokens_per_sec": 27576.69796865093, "train/loss_prose": 3.4899707436561584, "train/loss_math": 2.0864857037862143, "train/loss_code": 0.8387293815612793} +{"step": 3966, "train/loss": 2.048537015914917, "train/lm_loss": 2.048537015914917, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.224157407623243e-06, "perf/tokens_per_sec": 26638.470995942178, "train/loss_code": 1.7963592290878296, "train/loss_math": 2.1569855213165283, "train/loss_prose": 3.092528820037842} +{"step": 3967, "train/loss": 2.1592634320259094, "train/lm_loss": 2.1592634320259094, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.2126382056826e-06, "perf/tokens_per_sec": 26670.97605027471, "train/loss_math": 1.94911527633667, "train/loss_prose": 3.8299899101257324, "train/loss_code": 1.4289740324020386} +{"step": 3968, "train/loss": 2.341487020254135, "train/lm_loss": 2.341487020254135, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.201128160473865e-06, "perf/tokens_per_sec": 27137.607013497825, "train/loss_math": 2.3131104707717896, "train/loss_prose": 3.3913222551345825, "train/loss_code": 1.3484049439430237} +{"step": 3969, "train/loss": 2.4068673849105835, "train/lm_loss": 2.4068673849105835, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.189627277606894e-06, "perf/tokens_per_sec": 26959.59824557274, "train/loss_math": 1.7839751640955608, "train/loss_prose": 3.742173115412394, "train/loss_code": 1.3382467031478882} +{"step": 3970, "train/loss": 2.3049875795841217, "train/lm_loss": 2.3049875795841217, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.178135562687076e-06, "perf/tokens_per_sec": 26269.19429412853, "train/loss_math": 1.8695353269577026, "train/loss_code": 1.5156515538692474, "train/loss_prose": 4.3191118240356445} +{"step": 3971, "train/loss": 2.4567264318466187, "train/lm_loss": 2.4567264318466187, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.1666530213153355e-06, "perf/tokens_per_sec": 26087.616159867586, "train/loss_math": 2.1750423192977903, "train/loss_code": 1.6262891292572021, "train/loss_prose": 3.5761555433273315} +{"step": 3972, "train/loss": 2.684873938560486, "train/lm_loss": 2.684873938560486, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.155179659088114e-06, "perf/tokens_per_sec": 26241.38775535142, "train/loss_prose": 3.2903923988342285, "train/loss_code": 1.5135915875434875, "train/loss_math": 1.9998453855514526} +{"step": 3973, "train/loss": 1.9238832592964172, "train/lm_loss": 1.9238832592964172, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.143715481597404e-06, "perf/tokens_per_sec": 27187.036818301225, "train/loss_code": 1.083904320001602, "train/loss_prose": 4.024197936058044, "train/loss_math": 1.9231488704681396} +{"step": 3974, "train/loss": 2.557346761226654, "train/lm_loss": 2.557346761226654, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.1322604944307e-06, "perf/tokens_per_sec": 26468.51976064141, "train/loss_math": 2.1142826080322266, "train/loss_prose": 3.2996016144752502, "train/loss_code": 0.9175196290016174} +{"step": 3975, "train/loss": 1.9584826231002808, "train/lm_loss": 1.9584826231002808, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.120814703171024e-06, "perf/tokens_per_sec": 26163.299805068225, "train/loss_math": 1.9499598344167073, "train/loss_code": 1.5537039935588837, "train/loss_prose": 3.6031653881073} +{"step": 3976, "train/loss": 2.048875004053116, "train/lm_loss": 2.048875004053116, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.109378113396913e-06, "perf/tokens_per_sec": 26827.03020952726, "train/loss_code": 1.9020394881566365, "train/loss_math": 2.068526566028595, "train/loss_prose": 2.4107749462127686} +{"step": 3977, "train/loss": 1.7841913104057312, "train/lm_loss": 1.7841913104057312, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.097950730682425e-06, "perf/tokens_per_sec": 26847.06927784272, "train/loss_math": 2.135251373052597, "train/loss_code": 1.4331312477588654} +{"step": 3978, "train/loss": 2.497064232826233, "train/lm_loss": 2.497064232826233, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.086532560597125e-06, "perf/tokens_per_sec": 26383.48360464402, "train/loss_code": 1.632621705532074, "train/loss_prose": 3.492193619410197, "train/loss_math": 2.078229824701945} +{"step": 3979, "train/loss": 2.277705669403076, "train/lm_loss": 2.277705669403076, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.075123608706093e-06, "perf/tokens_per_sec": 26688.211866868616, "train/loss_math": 2.058103561401367, "train/loss_prose": 3.268595337867737, "train/loss_code": 1.393937349319458} +{"step": 3980, "train/loss": 1.7840074598789215, "train/lm_loss": 1.7840074598789215, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.06372388056991e-06, "perf/tokens_per_sec": 26692.48282998406, "train/loss_math": 2.032854676246643, "train/loss_code": 1.0374658107757568} +{"step": 3981, "train/loss": 2.5908449292182922, "train/lm_loss": 2.5908449292182922, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.052333381744663e-06, "perf/tokens_per_sec": 27237.553839763328, "train/loss_math": 2.250700306892395, "train/loss_prose": 3.169650435447693, "train/loss_code": 3.1339566707611084} +{"step": 3982, "train/loss": 2.20983749628067, "train/lm_loss": 2.20983749628067, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.040952117781953e-06, "perf/tokens_per_sec": 26965.35315621032, "train/loss_prose": 3.276025652885437, "train/loss_code": 1.6419451832771301, "train/loss_math": 1.9606894254684448} +{"step": 3983, "train/loss": 2.2643211483955383, "train/lm_loss": 2.2643211483955383, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.029580094228862e-06, "perf/tokens_per_sec": 27174.09296520962, "train/loss_math": 2.0481163263320923, "train/loss_code": 1.0429043173789978, "train/loss_prose": 3.294804016749064} +{"step": 3984, "train/loss": 2.6978878378868103, "train/lm_loss": 2.6978878378868103, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.01821731662798e-06, "perf/tokens_per_sec": 26839.351951257617, "train/loss_prose": 3.5172855854034424, "train/loss_math": 2.039986550807953, "train/loss_code": 1.7169935703277588} +{"step": 3985, "train/loss": 2.357856035232544, "train/lm_loss": 2.357856035232544, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.006863790517392e-06, "perf/tokens_per_sec": 26757.878579360764, "train/loss_code": 1.2917071183522542, "train/loss_prose": 3.5929203828175864, "train/loss_math": 2.104482412338257} +{"step": 3986, "train/loss": 1.720125526189804, "train/lm_loss": 1.720125526189804, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.9955195214306455e-06, "perf/tokens_per_sec": 27009.65497811541, "train/loss_code": 1.1362379312515258, "train/loss_prose": 3.5800015926361084, "train/loss_math": 2.2499067783355713} +{"step": 3987, "train/loss": 2.655677616596222, "train/lm_loss": 2.655677616596222, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.9841845148968204e-06, "perf/tokens_per_sec": 26549.263606214863, "train/loss_prose": 3.6428702672322593, "train/loss_code": 1.4080098867416382, "train/loss_math": 2.227200150489807} +{"step": 3988, "train/loss": 2.5135985910892487, "train/lm_loss": 2.5135985910892487, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.97285877644046e-06, "perf/tokens_per_sec": 26873.063981216867, "train/loss_prose": 3.5929004351298013, "train/loss_math": 2.1571445862452188, "train/loss_code": 1.4293270707130432} +{"step": 3989, "train/loss": 2.071504771709442, "train/lm_loss": 2.071504771709442, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.961542311581586e-06, "perf/tokens_per_sec": 26493.295948407074, "train/loss_code": 1.4431867996851604, "train/loss_prose": 3.178248643875122, "train/loss_math": 1.9619932969411213} +{"step": 3990, "train/loss": 3.1017099022865295, "train/lm_loss": 3.1017099022865295, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.950235125835707e-06, "perf/tokens_per_sec": 26937.532333908777, "train/loss_prose": 3.4311511516571045, "train/loss_math": 2.1133859157562256} +{"step": 3991, "train/loss": 2.340949833393097, "train/lm_loss": 2.340949833393097, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.9389372247138e-06, "perf/tokens_per_sec": 26582.003605153666, "train/loss_math": 2.0326194365819297, "train/loss_prose": 3.470079263051351, "train/loss_code": 1.1097512245178223} +{"step": 3992, "train/loss": 1.7199154794216156, "train/lm_loss": 1.7199154794216156, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.9276486137223445e-06, "perf/tokens_per_sec": 26318.532009124236, "train/loss_code": 1.2112215956052144, "train/loss_prose": 3.245997190475464} +{"step": 3993, "train/loss": 2.5313419103622437, "train/lm_loss": 2.5313419103622437, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.9163692983632586e-06, "perf/tokens_per_sec": 26605.79785324576, "train/loss_code": 1.309699296951294, "train/loss_math": 2.106186270713806, "train/loss_prose": 3.505430301030477} +{"step": 3994, "train/loss": 2.6037499010562897, "train/lm_loss": 2.6037499010562897, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.905099284133952e-06, "perf/tokens_per_sec": 27017.64052110789, "train/loss_math": 2.1868511199951173, "train/loss_prose": 3.298581123352051} +{"step": 3995, "train/loss": 2.2052973210811615, "train/lm_loss": 2.2052973210811615, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.893838576527275e-06, "perf/tokens_per_sec": 26858.527622439393, "train/loss_math": 2.432456851005554, "train/loss_code": 0.8139938116073608, "train/loss_prose": 3.4451608657836914} +{"step": 3996, "train/loss": 2.402301609516144, "train/lm_loss": 2.402301609516144, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.882587181031577e-06, "perf/tokens_per_sec": 26317.282197555753, "train/loss_code": 1.0048466920852661, "train/loss_math": 2.1811519861221313, "train/loss_prose": 3.6539032459259033} +{"step": 3997, "train/loss": 1.9377292096614838, "train/lm_loss": 1.9377292096614838, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.871345103130646e-06, "perf/tokens_per_sec": 26968.527871353806, "train/loss_code": 1.2537679970264435, "train/loss_math": 1.8131222128868103, "train/loss_prose": 3.430258274078369} +{"step": 3998, "train/loss": 2.28031724691391, "train/lm_loss": 2.28031724691391, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.860112348303737e-06, "perf/tokens_per_sec": 26817.440497765456, "train/loss_code": 1.0542092621326447, "train/loss_prose": 3.6297115087509155, "train/loss_math": 2.2186740040779114} +{"step": 3999, "train/loss": 2.3451157808303833, "train/lm_loss": 2.3451157808303833, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.848888922025553e-06, "perf/tokens_per_sec": 26573.985732228706, "train/loss_code": 1.3251294791698456, "train/loss_prose": 3.3651022911071777} +{"step": 4000, "train/loss": 2.441628158092499, "train/lm_loss": 2.441628158092499, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.837674829766257e-06, "perf/tokens_per_sec": 25974.574332752254, "train/loss_code": 1.3947187860806782, "train/loss_prose": 3.3242765069007874, "train/loss_math": 2.051762580871582} +{"step": 4000, "eval/loss": 2.161019366444526, "eval/lm_loss": 2.161019366444526, "eval/lb_loss": 0.0, "eval/z_loss": 0.0, "eval/perplexity": 8.679981231883568, "eval/loss_code": 1.557629320243178, "eval/ppl_code": 4.747552965963762, "eval/loss_prose": 3.4862906504095648, "eval/ppl_prose": 32.664558441331415, "eval/loss_math": 2.0307427300620327, "eval/ppl_math": 7.619743670281144} +{"step": 4001, "train/loss": 2.1612904369831085, "train/lm_loss": 2.1612904369831085, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.826470076991458e-06, "perf/tokens_per_sec": 26157.2050615643, "train/loss_code": 1.493477205435435, "train/loss_math": 2.2314252853393555, "train/loss_prose": 3.0578079223632812} +{"step": 4002, "train/loss": 1.9941993057727814, "train/lm_loss": 1.9941993057727814, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.815274669162227e-06, "perf/tokens_per_sec": 26721.12942581797, "train/loss_code": 1.3149236142635345, "train/loss_math": 2.129975199699402, "train/loss_prose": 3.21697461605072} +{"step": 4003, "train/loss": 2.2421009242534637, "train/lm_loss": 2.2421009242534637, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.80408861173507e-06, "perf/tokens_per_sec": 27117.174842394033, "train/loss_math": 2.141397178173065, "train/loss_prose": 2.9759292602539062, "train/loss_code": 0.44343093037605286} +{"step": 4004, "train/loss": 2.47617107629776, "train/lm_loss": 2.47617107629776, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.792911910161922e-06, "perf/tokens_per_sec": 27138.72160167067, "train/loss_prose": 3.8773279190063477, "train/loss_math": 1.8546223640441895, "train/loss_code": 1.3067588210105896} +{"step": 4005, "train/loss": 2.177937865257263, "train/lm_loss": 2.177937865257263, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.781744569890171e-06, "perf/tokens_per_sec": 27103.313924394115, "train/loss_code": 1.7164158423741658, "train/loss_math": 2.1508870124816895, "train/loss_prose": 3.6707065105438232} +{"step": 4006, "train/loss": 2.6408786177635193, "train/lm_loss": 2.6408786177635193, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.770586596362659e-06, "perf/tokens_per_sec": 27267.426262318448, "train/loss_prose": 3.8026618162790933, "train/loss_code": 1.3347294330596924, "train/loss_math": 2.3498613834381104} +{"step": 4007, "train/loss": 2.43327397108078, "train/lm_loss": 2.43327397108078, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.759437995017639e-06, "perf/tokens_per_sec": 27209.685936381433, "train/loss_math": 2.4396570920944214, "train/loss_prose": 3.255665381749471, "train/loss_code": 1.6066274245580037} +{"step": 4008, "train/loss": 2.2083888947963715, "train/lm_loss": 2.2083888947963715, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.748298771288801e-06, "perf/tokens_per_sec": 26725.909837480962, "train/loss_code": 1.6832444965839386, "train/loss_prose": 2.9966281255086265, "train/loss_math": 1.9442479610443115} +{"step": 4009, "train/loss": 2.7129801511764526, "train/lm_loss": 2.7129801511764526, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.737168930605272e-06, "perf/tokens_per_sec": 27143.137985515124, "train/loss_code": 1.3175592422485352, "train/loss_prose": 3.397741460800171, "train/loss_math": 2.08001446723938} +{"step": 4010, "train/loss": 1.9967727959156036, "train/lm_loss": 1.9967727959156036, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.726048478391591e-06, "perf/tokens_per_sec": 27113.62270112448, "train/loss_math": 2.070818305015564, "train/loss_prose": 3.1967263221740723, "train/loss_code": 1.359773188829422} +{"step": 4011, "train/loss": 2.374723970890045, "train/lm_loss": 2.374723970890045, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.714937420067746e-06, "perf/tokens_per_sec": 27072.60522073481, "train/loss_prose": 3.0893998940785727, "train/loss_code": 1.8682395815849304, "train/loss_math": 1.9977044264475505} +{"step": 4012, "train/loss": 2.3576079607009888, "train/lm_loss": 2.3576079607009888, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.703835761049131e-06, "perf/tokens_per_sec": 27016.450885668277, "train/loss_prose": 2.8854899406433105, "train/loss_code": 1.5721785426139832, "train/loss_math": 2.0872732400894165} +{"step": 4013, "train/loss": 2.198568820953369, "train/lm_loss": 2.198568820953369, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.6927435067465475e-06, "perf/tokens_per_sec": 27077.12813820473, "train/loss_prose": 3.5290286540985107, "train/loss_code": 1.5374229848384857, "train/loss_math": 2.190400719642639} +{"step": 4014, "train/loss": 2.5279638469219208, "train/lm_loss": 2.5279638469219208, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.681660662566224e-06, "perf/tokens_per_sec": 27142.92356475564, "train/loss_math": 2.1963553428649902, "train/loss_prose": 3.816872994105021, "train/loss_code": 1.4601271947224934} +{"step": 4015, "train/loss": 1.8491994142532349, "train/lm_loss": 1.8491994142532349, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.6705872339098186e-06, "perf/tokens_per_sec": 27168.893094810355, "train/loss_code": 1.2370936870574951, "train/loss_math": 2.106865088144938, "train/loss_prose": 3.524625539779663} +{"step": 4016, "train/loss": 2.107913911342621, "train/lm_loss": 2.107913911342621, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.6595232261743795e-06, "perf/tokens_per_sec": 27152.61876832381, "train/loss_code": 1.4317331711451213, "train/loss_prose": 3.2673497200012207, "train/loss_math": 2.325190782546997} +{"step": 4017, "train/loss": 2.3721316754817963, "train/lm_loss": 2.3721316754817963, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.64846864475237e-06, "perf/tokens_per_sec": 27196.333032556697, "train/loss_prose": 3.2714356780052185, "train/loss_code": 1.472827523946762} +{"step": 4018, "train/loss": 2.41468009352684, "train/lm_loss": 2.41468009352684, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.637423495031658e-06, "perf/tokens_per_sec": 27159.916186200193, "train/loss_prose": 3.6606208086013794, "train/loss_math": 2.3730900287628174, "train/loss_code": 1.6256430546442668} +{"step": 4019, "train/loss": 2.269517660140991, "train/lm_loss": 2.269517660140991, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.626387782395512e-06, "perf/tokens_per_sec": 27174.43682844385, "train/loss_math": 2.2593950827916465, "train/loss_prose": 3.0573132038116455, "train/loss_code": 1.5424578189849854} +{"step": 4020, "train/loss": 2.431258261203766, "train/lm_loss": 2.431258261203766, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.615361512222605e-06, "perf/tokens_per_sec": 27271.235269372963, "train/loss_code": 1.5785602728525798, "train/loss_prose": 3.491281429926554, "train/loss_math": 2.120270609855652} +{"step": 4021, "train/loss": 2.57788810133934, "train/lm_loss": 2.57788810133934, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.60434468988702e-06, "perf/tokens_per_sec": 27109.72925411383, "train/loss_prose": 3.214194933573405, "train/loss_code": 2.3985557556152344, "train/loss_math": 2.061136523882548} +{"step": 4022, "train/loss": 1.8197477161884308, "train/lm_loss": 1.8197477161884308, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.593337320758207e-06, "perf/tokens_per_sec": 27226.762279078022, "train/loss_code": 1.2905959725379943, "train/loss_math": 2.263037323951721, "train/loss_prose": 3.5789270401000977} +{"step": 4023, "train/loss": 1.9284991025924683, "train/lm_loss": 1.9284991025924683, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.582339410201029e-06, "perf/tokens_per_sec": 27239.92913124894, "train/loss_math": 2.082833727200826, "train/loss_code": 1.185774604479472, "train/loss_prose": 2.8110839128494263} +{"step": 4024, "train/loss": 2.5878443717956543, "train/lm_loss": 2.5878443717956543, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.571350963575728e-06, "perf/tokens_per_sec": 27154.59297673823, "train/loss_math": 2.1615973711013794, "train/loss_code": 1.5097021460533142, "train/loss_prose": 3.3400389552116394} +{"step": 4025, "train/loss": 1.7152206897735596, "train/lm_loss": 1.7152206897735596, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.56037198623795e-06, "perf/tokens_per_sec": 27237.942495477484, "train/loss_code": 1.1825601905584335, "train/loss_math": 2.0069421927134194, "train/loss_prose": 2.9706976413726807} +{"step": 4026, "train/loss": 2.241606965661049, "train/lm_loss": 2.241606965661049, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.5494024835387125e-06, "perf/tokens_per_sec": 27179.552821758996, "train/loss_prose": 3.368936777114868, "train/loss_code": 1.114277184009552} +{"step": 4027, "train/loss": 2.3443389236927032, "train/lm_loss": 2.3443389236927032, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.538442460824417e-06, "perf/tokens_per_sec": 27120.000101029873, "train/loss_code": 1.5808297793070476, "train/loss_math": 2.341482083002726, "train/loss_prose": 3.493887782096863} +{"step": 4028, "train/loss": 2.0766340792179108, "train/lm_loss": 2.0766340792179108, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.5274919234368425e-06, "perf/tokens_per_sec": 27095.66227002815, "train/loss_math": 2.1096343199412027, "train/loss_code": 1.2977137168248494, "train/loss_prose": 3.1955140829086304} +{"step": 4029, "train/loss": 2.38739076256752, "train/lm_loss": 2.38739076256752, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.5165508767131415e-06, "perf/tokens_per_sec": 27108.78814904582, "train/loss_math": 1.8153653740882874, "train/loss_code": 1.7585277557373047, "train/loss_prose": 3.397603909174601} +{"step": 4030, "train/loss": 2.412806451320648, "train/lm_loss": 2.412806451320648, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.505619325985873e-06, "perf/tokens_per_sec": 27168.29158535621, "train/loss_math": 2.3561528523763022, "train/loss_code": 0.9557443559169769, "train/loss_prose": 3.440834363301595} +{"step": 4031, "train/loss": 2.3409304320812225, "train/lm_loss": 2.3409304320812225, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.494697276582916e-06, "perf/tokens_per_sec": 27028.18186738155, "train/loss_math": 2.4193073511123657, "train/loss_prose": 3.302675485610962, "train/loss_code": 1.3269342184066772} +{"step": 4032, "train/loss": 2.0369352996349335, "train/lm_loss": 2.0369352996349335, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.4837847338275515e-06, "perf/tokens_per_sec": 27132.163994491402, "train/loss_prose": 3.8173446655273438, "train/loss_math": 1.9443558255831401, "train/loss_code": 0.8120017051696777} +{"step": 4033, "train/loss": 2.393339514732361, "train/lm_loss": 2.393339514732361, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.472881703038418e-06, "perf/tokens_per_sec": 27157.81241048761, "train/loss_math": 2.046888679265976, "train/loss_prose": 3.538559079170227, "train/loss_code": 1.9410216808319092} +{"step": 4034, "train/loss": 2.3903207778930664, "train/lm_loss": 2.3903207778930664, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.461988189529529e-06, "perf/tokens_per_sec": 27126.12351480185, "train/loss_prose": 3.303847849369049, "train/loss_math": 2.111881971359253, "train/loss_code": 1.2650974988937378} +{"step": 4035, "train/loss": 2.5641315579414368, "train/lm_loss": 2.5641315579414368, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.451104198610249e-06, "perf/tokens_per_sec": 27158.585188182922, "train/loss_prose": 3.5186254382133484, "train/loss_math": 1.7157517671585083, "train/loss_code": 1.57426651318868} +{"step": 4036, "train/loss": 2.3423063158988953, "train/lm_loss": 2.3423063158988953, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.440229735585298e-06, "perf/tokens_per_sec": 27132.678202091032, "train/loss_math": 2.0568806886672975, "train/loss_code": 1.0571401119232178, "train/loss_prose": 3.6984533071517944} +{"step": 4037, "train/loss": 2.4225286543369293, "train/lm_loss": 2.4225286543369293, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.429364805754758e-06, "perf/tokens_per_sec": 27216.108611793632, "train/loss_math": 2.112123465538025, "train/loss_prose": 3.636975646018982, "train/loss_code": 1.5456595420837402} +{"step": 4038, "train/loss": 2.4070390462875366, "train/lm_loss": 2.4070390462875366, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.4185094144140665e-06, "perf/tokens_per_sec": 27008.1263700676, "train/loss_math": 2.0861929257710776, "train/loss_code": 1.7802890141805012, "train/loss_prose": 3.828433036804199} +{"step": 4039, "train/loss": 2.2387689352035522, "train/lm_loss": 2.2387689352035522, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.4076635668540075e-06, "perf/tokens_per_sec": 27200.940456719618, "train/loss_prose": 3.3464555144309998, "train/loss_code": 1.1310822814702988} +{"step": 4040, "train/loss": 2.4881692230701447, "train/lm_loss": 2.4881692230701447, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.396827268360716e-06, "perf/tokens_per_sec": 27150.601938801636, "train/loss_code": 1.4980568091074626, "train/loss_math": 2.0703699588775635, "train/loss_prose": 3.3352033495903015} +{"step": 4041, "train/loss": 2.0515795946121216, "train/lm_loss": 2.0515795946121216, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.38600052421567e-06, "perf/tokens_per_sec": 27079.43284706624, "train/loss_prose": 3.3462913036346436, "train/loss_math": 1.9203635454177856, "train/loss_code": 1.4698317050933838} +{"step": 4042, "train/loss": 2.403189033269882, "train/lm_loss": 2.403189033269882, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.3751833396956966e-06, "perf/tokens_per_sec": 27161.247314683944, "train/loss_code": 1.356947660446167, "train/loss_math": 2.1477698485056558, "train/loss_prose": 3.3561023076375327} +{"step": 4043, "train/loss": 2.215321332216263, "train/lm_loss": 2.215321332216263, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.364375720072953e-06, "perf/tokens_per_sec": 27083.18950967703, "train/loss_math": 2.183844268321991, "train/loss_prose": 3.358931303024292, "train/loss_code": 1.2605738639831543} +{"step": 4044, "train/loss": 2.1066499650478363, "train/lm_loss": 2.1066499650478363, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.3535776706149505e-06, "perf/tokens_per_sec": 26815.55686252474, "train/loss_math": 1.8921170632044475, "train/loss_prose": 2.986342986424764, "train/loss_code": 1.1089093089103699} +{"step": 4045, "train/loss": 2.054071694612503, "train/lm_loss": 2.054071694612503, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.342789196584527e-06, "perf/tokens_per_sec": 27115.291663641558, "train/loss_math": 2.0081913471221924, "train/loss_prose": 3.055619239807129, "train/loss_code": 1.7813961903254192} +{"step": 4046, "train/loss": 2.0442980229854584, "train/lm_loss": 2.0442980229854584, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.332010303239851e-06, "perf/tokens_per_sec": 27091.26132070116, "train/loss_prose": 2.939347187678019, "train/loss_code": 1.2501827279726665, "train/loss_math": 1.8928974270820618} +{"step": 4047, "train/loss": 2.447696715593338, "train/lm_loss": 2.447696715593338, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.32124099583442e-06, "perf/tokens_per_sec": 27290.079526248235, "train/loss_math": 2.140841007232666, "train/loss_prose": 3.3472543557484946, "train/loss_code": 1.558643877506256} +{"step": 4048, "train/loss": 2.6761929988861084, "train/lm_loss": 2.6761929988861084, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.310481279617072e-06, "perf/tokens_per_sec": 26939.55987511741, "train/loss_prose": 3.334615647792816, "train/loss_math": 2.0731000900268555, "train/loss_code": 1.8517807722091675} +{"step": 4049, "train/loss": 2.2668152153491974, "train/lm_loss": 2.2668152153491974, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.299731159831953e-06, "perf/tokens_per_sec": 26988.270198941824, "train/loss_math": 1.9664841294288635, "train/loss_code": 1.592664122581482, "train/loss_prose": 3.1411869525909424} +{"step": 4050, "train/loss": 2.844164729118347, "train/lm_loss": 2.844164729118347, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.288990641718547e-06, "perf/tokens_per_sec": 27022.61263151603, "train/loss_prose": 3.494153547286987, "train/loss_math": 1.7608498732248943} +{"step": 4051, "train/loss": 1.717603862285614, "train/lm_loss": 1.717603862285614, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.2782597305116504e-06, "perf/tokens_per_sec": 27184.154561608655, "train/loss_code": 1.4135523637135823, "train/loss_prose": 3.230712413787842, "train/loss_math": 2.028804302215576} +{"step": 4052, "train/loss": 2.4126750826835632, "train/lm_loss": 2.4126750826835632, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.26753843144138e-06, "perf/tokens_per_sec": 27033.32801054906, "train/loss_math": 2.250030606985092, "train/loss_prose": 3.366665005683899, "train/loss_code": 1.7839741110801697} +{"step": 4053, "train/loss": 3.111817479133606, "train/lm_loss": 3.111817479133606, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.256826749733157e-06, "perf/tokens_per_sec": 27175.597431111393, "train/loss_prose": 3.601169776916504, "train/loss_math": 2.2962305545806885} +{"step": 4054, "train/loss": 2.346418082714081, "train/lm_loss": 2.346418082714081, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.24612469060774e-06, "perf/tokens_per_sec": 27169.064959546566, "train/loss_code": 1.4059618314107258, "train/loss_prose": 4.069722533226013, "train/loss_math": 2.1380046208699546} +{"step": 4055, "train/loss": 2.2447069883346558, "train/lm_loss": 2.2447069883346558, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.235432259281175e-06, "perf/tokens_per_sec": 27168.20565760842, "train/loss_prose": 3.1025797526041665, "train/loss_code": 1.7010798156261444, "train/loss_math": 1.8455967903137207} +{"step": 4056, "train/loss": 2.7905171513557434, "train/lm_loss": 2.7905171513557434, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.224749460964834e-06, "perf/tokens_per_sec": 27150.601938801636, "train/loss_prose": 3.6321390867233276, "train/loss_code": 1.9929454326629639, "train/loss_math": 1.9048448204994202} +{"step": 4057, "train/loss": 2.5271718502044678, "train/lm_loss": 2.5271718502044678, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.214076300865359e-06, "perf/tokens_per_sec": 27081.866020984664, "train/loss_prose": 3.3638299703598022, "train/loss_code": 1.4367745518684387, "train/loss_math": 1.944252610206604} +{"step": 4058, "train/loss": 2.10230416059494, "train/lm_loss": 2.10230416059494, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.2034127841847385e-06, "perf/tokens_per_sec": 27144.93925365069, "train/loss_code": 1.4093232154846191, "train/loss_math": 2.0367890199025473, "train/loss_prose": 3.2400485277175903} +{"step": 4059, "train/loss": 2.519162178039551, "train/lm_loss": 2.519162178039551, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.192758916120236e-06, "perf/tokens_per_sec": 27206.238770610704, "train/loss_prose": 3.2362552881240845, "train/loss_code": 1.2173292636871338, "train/loss_math": 1.9969822963078816} +{"step": 4060, "train/loss": 2.5241776406764984, "train/lm_loss": 2.5241776406764984, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.1821147018644155e-06, "perf/tokens_per_sec": 27142.580498586773, "train/loss_code": 1.4433524012565613, "train/loss_math": 2.2712974548339844, "train/loss_prose": 3.1910303235054016} +{"step": 4061, "train/loss": 2.1643359065055847, "train/lm_loss": 2.1643359065055847, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.17148014660514e-06, "perf/tokens_per_sec": 27259.076141938676, "train/loss_code": 1.6700334787368774, "train/loss_prose": 3.380306124687195, "train/loss_math": 2.2039079666137695} +{"step": 4062, "train/loss": 2.2676329016685486, "train/lm_loss": 2.2676329016685486, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.160855255525554e-06, "perf/tokens_per_sec": 27245.804358410343, "train/loss_prose": 2.9630936781565347, "train/loss_math": 1.9559251070022583, "train/loss_code": 1.6920031905174255} +{"step": 4063, "train/loss": 2.75946044921875, "train/lm_loss": 2.75946044921875, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.150240033804116e-06, "perf/tokens_per_sec": 27307.734416749718, "train/loss_prose": 3.7520328164100647, "train/loss_math": 2.215650796890259, "train/loss_code": 1.6173009872436523} +{"step": 4064, "train/loss": 1.8555767238140106, "train/lm_loss": 1.8555767238140106, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.139634486614544e-06, "perf/tokens_per_sec": 27226.762279078022, "train/loss_code": 1.7894290486971538, "train/loss_math": 2.0540196895599365} +{"step": 4065, "train/loss": 2.3877977430820465, "train/lm_loss": 2.3877977430820465, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.129038619125867e-06, "perf/tokens_per_sec": 27163.351622619433, "train/loss_code": 1.3180776437123616, "train/loss_prose": 3.3338741461435952, "train/loss_math": 2.5732635259628296} +{"step": 4066, "train/loss": 2.2996697425842285, "train/lm_loss": 2.2996697425842285, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.118452436502361e-06, "perf/tokens_per_sec": 27261.887787438172, "train/loss_prose": 3.1228750944137573, "train/loss_math": 2.0407193899154663, "train/loss_code": 2.0175420939922333} +{"step": 4067, "train/loss": 2.5346685349941254, "train/lm_loss": 2.5346685349941254, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.107875943903614e-06, "perf/tokens_per_sec": 27019.76513071186, "train/loss_code": 1.733276143670082, "train/loss_math": 2.4763479232788086, "train/loss_prose": 3.622631867726644} +{"step": 4068, "train/loss": 2.1768493354320526, "train/lm_loss": 2.1768493354320526, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.097309146484486e-06, "perf/tokens_per_sec": 27090.36421521708, "train/loss_code": 1.5763759851455688, "train/loss_math": 2.227294445037842, "train/loss_prose": 3.6528098583221436} +{"step": 4069, "train/loss": 2.2748546600341797, "train/lm_loss": 2.2748546600341797, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.086752049395094e-06, "perf/tokens_per_sec": 27047.16000138543, "train/loss_code": 1.6509869337081908, "train/loss_prose": 3.314633766810099} +{"step": 4070, "train/loss": 2.4085792005062103, "train/lm_loss": 2.4085792005062103, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.0762046577808475e-06, "perf/tokens_per_sec": 27226.244499242475, "train/loss_prose": 3.188065767288208, "train/loss_code": 1.8292156457901, "train/loss_math": 2.1083948612213135} +{"step": 4071, "train/loss": 2.0979122519493103, "train/lm_loss": 2.0979122519493103, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.065666976782413e-06, "perf/tokens_per_sec": 27042.263986765247, "train/loss_code": 1.482546865940094, "train/loss_math": 1.9941032528877258, "train/loss_prose": 3.432452082633972} +{"step": 4072, "train/loss": 2.3665035367012024, "train/lm_loss": 2.3665035367012024, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.055139011535723e-06, "perf/tokens_per_sec": 27146.91234546052, "train/loss_prose": 3.404075562953949, "train/loss_code": 1.1226804455121357, "train/loss_math": 1.947684645652771} +{"step": 4073, "train/loss": 2.136874705553055, "train/lm_loss": 2.136874705553055, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.044620767171992e-06, "perf/tokens_per_sec": 27354.04037856497, "train/loss_prose": 3.351977586746216, "train/loss_math": 2.2960294485092163, "train/loss_code": 1.1676512757937114} +{"step": 4074, "train/loss": 1.8701956570148468, "train/lm_loss": 1.8701956570148468, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.034112248817685e-06, "perf/tokens_per_sec": 27242.218440490047, "train/loss_code": 1.4165315985679627, "train/loss_math": 2.2563458681106567, "train/loss_prose": 3.3662161827087402} +{"step": 4075, "train/loss": 2.6568761467933655, "train/lm_loss": 2.6568761467933655, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.023613461594512e-06, "perf/tokens_per_sec": 27292.377273124428, "train/loss_math": 1.6851516962051392, "train/loss_prose": 3.4185823440551757, "train/loss_code": 0.7917942404747009} +{"step": 4076, "train/loss": 2.6867355406284332, "train/lm_loss": 2.6867355406284332, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.013124410619458e-06, "perf/tokens_per_sec": 27257.00022211927, "train/loss_math": 2.244715849558512, "train/loss_prose": 4.095402558644612, "train/loss_code": 1.2367644309997559} +{"step": 4077, "train/loss": 2.2321099638938904, "train/lm_loss": 2.2321099638938904, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.002645101004766e-06, "perf/tokens_per_sec": 27217.14342022924, "train/loss_code": 1.4964468677838643, "train/loss_prose": 4.122229099273682, "train/loss_math": 2.311327576637268} +{"step": 4078, "train/loss": 2.6730722188949585, "train/lm_loss": 2.6730722188949585, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.992175537857924e-06, "perf/tokens_per_sec": 27130.878560757523, "train/loss_prose": 3.422980546951294, "train/loss_code": 1.1123961210250854, "train/loss_math": 2.0448832511901855} +{"step": 4079, "train/loss": 2.3883333802223206, "train/lm_loss": 2.3883333802223206, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.981715726281666e-06, "perf/tokens_per_sec": 27239.324472293527, "train/loss_code": 1.415018916130066, "train/loss_math": 2.007604201634725, "train/loss_prose": 3.417938788731893} +{"step": 4080, "train/loss": 2.0787925124168396, "train/lm_loss": 2.0787925124168396, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.971265671373976e-06, "perf/tokens_per_sec": 27287.78216623437, "train/loss_math": 2.192555570602417, "train/loss_code": 1.1211720407009125, "train/loss_prose": 3.4252171516418457} +{"step": 4081, "train/loss": 2.932183861732483, "train/lm_loss": 2.932183861732483, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.960825378228082e-06, "perf/tokens_per_sec": 27258.297634951814, "train/loss_math": 2.0694291591644287, "train/loss_prose": 3.4291882117589316, "train/loss_code": 0.8129128813743591} +{"step": 4082, "train/loss": 2.327654629945755, "train/lm_loss": 2.327654629945755, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9503948519324665e-06, "perf/tokens_per_sec": 26857.47791666927, "train/loss_math": 2.0356184542179108, "train/loss_code": 1.3034439086914062, "train/loss_prose": 3.0584397315979004} +{"step": 4083, "train/loss": 1.9783295392990112, "train/lm_loss": 1.9783295392990112, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.939974097570841e-06, "perf/tokens_per_sec": 27283.838544512062, "train/loss_math": 2.0839233001073203, "train/loss_code": 1.4436293244361877, "train/loss_prose": 3.800349235534668} +{"step": 4084, "train/loss": 2.256405383348465, "train/lm_loss": 2.256405383348465, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.929563120222141e-06, "perf/tokens_per_sec": 27144.639025692803, "train/loss_code": 1.0717956026395161, "train/loss_prose": 3.2452738285064697, "train/loss_math": 1.8547611236572266} +{"step": 4085, "train/loss": 2.479877471923828, "train/lm_loss": 2.479877471923828, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9191619249605606e-06, "perf/tokens_per_sec": 27209.685936381433, "train/loss_math": 2.102957089742025, "train/loss_code": 1.1141170859336853, "train/loss_prose": 3.7673046588897705} +{"step": 4086, "train/loss": 2.682704597711563, "train/lm_loss": 2.682704597711563, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.908770516855505e-06, "perf/tokens_per_sec": 27229.351473692885, "train/loss_code": 0.970498522122701, "train/loss_math": 2.2382802963256836, "train/loss_prose": 4.077965259552002} +{"step": 4087, "train/loss": 2.3744683861732483, "train/lm_loss": 2.3744683861732483, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.898388900971634e-06, "perf/tokens_per_sec": 26769.386035249223, "train/loss_math": 1.9241969287395477, "train/loss_code": 1.7458942532539368, "train/loss_prose": 3.9035855531692505} +{"step": 4088, "train/loss": 2.112943261861801, "train/lm_loss": 2.112943261861801, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.888017082368815e-06, "perf/tokens_per_sec": 27284.271853788225, "train/loss_math": 2.0024641156196594, "train/loss_prose": 3.2997844219207764, "train/loss_code": 1.1470602750778198} +{"step": 4089, "train/loss": 2.3126906752586365, "train/lm_loss": 2.3126906752586365, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.877655066102149e-06, "perf/tokens_per_sec": 27244.292109244416, "train/loss_math": 2.286173661549886, "train/loss_prose": 3.0754690170288086, "train/loss_code": 1.7090139389038086} +{"step": 4090, "train/loss": 2.1142112612724304, "train/lm_loss": 2.1142112612724304, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.867302857221953e-06, "perf/tokens_per_sec": 27133.7495305257, "train/loss_math": 1.946090817451477, "train/loss_code": 1.3197144269943237, "train/loss_prose": 3.2449487447738647} +{"step": 4091, "train/loss": 2.146272897720337, "train/lm_loss": 2.146272897720337, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.856960460773766e-06, "perf/tokens_per_sec": 26924.951508075195, "train/loss_math": 2.108444666862488, "train/loss_code": 1.4459648132324219, "train/loss_prose": 3.736029863357544} +{"step": 4092, "train/loss": 1.735526978969574, "train/lm_loss": 1.735526978969574, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.84662788179836e-06, "perf/tokens_per_sec": 27104.810563068073, "train/loss_code": 1.3905344208081563, "train/loss_prose": 2.7705050110816956} +{"step": 4093, "train/loss": 2.5359549820423126, "train/lm_loss": 2.5359549820423126, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.836305125331694e-06, "perf/tokens_per_sec": 27197.62467625398, "train/loss_math": 2.1868446469306946, "train/loss_prose": 3.531322956085205, "train/loss_code": 0.9462916851043701} +{"step": 4094, "train/loss": 2.1284037828445435, "train/lm_loss": 2.1284037828445435, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.825992196404957e-06, "perf/tokens_per_sec": 27111.611660291128, "train/loss_math": 2.312097946802775, "train/loss_code": 1.17737877368927, "train/loss_prose": 3.279399871826172} +{"step": 4095, "train/loss": 2.3319767117500305, "train/lm_loss": 2.3319767117500305, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.8156891000445406e-06, "perf/tokens_per_sec": 26962.136873906322, "train/loss_math": 2.1984599431355796, "train/loss_code": 1.7196002006530762, "train/loss_prose": 3.7454540729522705} +{"step": 4096, "train/loss": 2.327598810195923, "train/lm_loss": 2.327598810195923, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.805395841272062e-06, "perf/tokens_per_sec": 26935.547328845085, "train/loss_prose": 3.2305848598480225, "train/loss_math": 2.2381550073623657, "train/loss_code": 1.6035001873970032} +{"step": 4097, "train/loss": 2.3838859498500824, "train/lm_loss": 2.3838859498500824, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.795112425104323e-06, "perf/tokens_per_sec": 27142.794913926034, "train/loss_math": 2.2707854509353638, "train/loss_code": 1.1641348004341125, "train/loss_prose": 3.829837918281555} +{"step": 4098, "train/loss": 2.0802908539772034, "train/lm_loss": 2.0802908539772034, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.784838856553337e-06, "perf/tokens_per_sec": 27123.853675325947, "train/loss_math": 2.1180909474690757, "train/loss_code": 1.5075005888938904, "train/loss_prose": 4.25805139541626} +{"step": 4099, "train/loss": 2.467337489128113, "train/lm_loss": 2.467337489128113, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.7745751406263165e-06, "perf/tokens_per_sec": 27226.244499242475, "train/loss_math": 2.2661807537078857, "train/loss_code": 1.7805418968200684, "train/loss_prose": 3.7992663383483887} +{"step": 4100, "train/loss": 2.2691726088523865, "train/lm_loss": 2.2691726088523865, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.764321282325671e-06, "perf/tokens_per_sec": 27237.942495477484, "train/loss_math": 2.163872996966044, "train/loss_prose": 3.6441761255264282, "train/loss_code": 1.4578033288319905} +{"step": 4101, "train/loss": 2.5753466188907623, "train/lm_loss": 2.5753466188907623, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.754077286649006e-06, "perf/tokens_per_sec": 27195.127609121675, "train/loss_code": 1.7401820023854573, "train/loss_prose": 3.5987297693888345, "train/loss_math": 2.293018937110901} +{"step": 4102, "train/loss": 2.820955455303192, "train/lm_loss": 2.820955455303192, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.74384315858912e-06, "perf/tokens_per_sec": 27243.168832816376, "train/loss_prose": 3.409678816795349, "train/loss_math": 2.138446013132731, "train/loss_code": 2.5135905742645264} +{"step": 4103, "train/loss": 2.1384810507297516, "train/lm_loss": 2.1384810507297516, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.733618903134004e-06, "perf/tokens_per_sec": 27210.332377216004, "train/loss_code": 1.267865613102913, "train/loss_prose": 3.7438570261001587, "train/loss_math": 2.2743358612060547} +{"step": 4104, "train/loss": 2.0403415858745575, "train/lm_loss": 2.0403415858745575, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.723404525266839e-06, "perf/tokens_per_sec": 26417.074946335102, "train/loss_code": 1.3169582039117813, "train/loss_prose": 3.074026107788086, "train/loss_math": 1.8328213691711426} +{"step": 4105, "train/loss": 2.2598003447055817, "train/lm_loss": 2.2598003447055817, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.713200029965978e-06, "perf/tokens_per_sec": 27043.115340237408, "train/loss_code": 1.7351974546909332, "train/loss_math": 1.9971439838409424, "train/loss_prose": 3.571662187576294} +{"step": 4106, "train/loss": 2.578423500061035, "train/lm_loss": 2.578423500061035, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.703005422204979e-06, "perf/tokens_per_sec": 27124.6673476831, "train/loss_prose": 3.290919542312622, "train/loss_math": 2.0683139165242515, "train/loss_code": 1.2587683200836182} +{"step": 4107, "train/loss": 2.5845248699188232, "train/lm_loss": 2.5845248699188232, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.69282070695257e-06, "perf/tokens_per_sec": 27182.133044738523, "train/loss_prose": 3.217137177785238, "train/loss_math": 2.1346768140792847, "train/loss_code": 2.4860804080963135} +{"step": 4108, "train/loss": 1.3794524371623993, "train/lm_loss": 1.3794524371623993, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.682645889172651e-06, "perf/tokens_per_sec": 27109.857591685824, "train/loss_code": 1.1641969879468281, "train/loss_math": 2.0252187252044678} +{"step": 4109, "train/loss": 2.992221772670746, "train/lm_loss": 2.992221772670746, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.672480973824311e-06, "perf/tokens_per_sec": 27121.241688728496, "train/loss_prose": 3.2944557666778564, "train/loss_code": 1.536568522453308, "train/loss_math": 2.6344711780548096} +{"step": 4110, "train/loss": 2.1884601414203644, "train/lm_loss": 2.1884601414203644, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.6623259658617996e-06, "perf/tokens_per_sec": 27168.549371860885, "train/loss_prose": 3.0707934697469077, "train/loss_code": 1.2946164806683857, "train/loss_math": 2.2057251930236816} +{"step": 4111, "train/loss": 2.592131346464157, "train/lm_loss": 2.592131346464157, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.6521808702345514e-06, "perf/tokens_per_sec": 27133.32098899971, "train/loss_code": 1.519952932993571, "train/loss_prose": 3.4851266145706177, "train/loss_math": 2.236685037612915} +{"step": 4112, "train/loss": 2.7961777448654175, "train/lm_loss": 2.7961777448654175, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.6420456918871565e-06, "perf/tokens_per_sec": 26698.165744345246, "train/loss_code": 1.172207236289978, "train/loss_prose": 3.552920866012573, "train/loss_math": 1.7163047790527344} +{"step": 4113, "train/loss": 2.1900548934936523, "train/lm_loss": 2.1900548934936523, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.63192043575938e-06, "perf/tokens_per_sec": 27168.936060790562, "train/loss_code": 1.2995255589485168, "train/loss_prose": 2.9787886142730713, "train/loss_math": 2.342748522758484} +{"step": 4114, "train/loss": 2.6823271214962006, "train/lm_loss": 2.6823271214962006, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.621805106786142e-06, "perf/tokens_per_sec": 27026.18348055217, "train/loss_code": 1.6781416535377502, "train/loss_math": 2.111522138118744, "train/loss_prose": 3.4698222875595093} +{"step": 4115, "train/loss": 2.361237049102783, "train/lm_loss": 2.361237049102783, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.6116997098975465e-06, "perf/tokens_per_sec": 26974.710127997387, "train/loss_math": 2.324333095550537, "train/loss_prose": 3.3586082458496094, "train/loss_code": 1.954811453819275} +{"step": 4116, "train/loss": 2.449508160352707, "train/lm_loss": 2.449508160352707, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.601604250018831e-06, "perf/tokens_per_sec": 27229.696736849448, "train/loss_math": 2.126874715089798, "train/loss_prose": 3.148723602294922, "train/loss_code": 1.6423954963684082} +{"step": 4117, "train/loss": 2.3846198320388794, "train/lm_loss": 2.3846198320388794, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.591518732070402e-06, "perf/tokens_per_sec": 27199.131748974483, "train/loss_math": 2.1142207980155945, "train/loss_prose": 3.1958165168762207} +{"step": 4118, "train/loss": 1.9989993870258331, "train/lm_loss": 1.9989993870258331, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.581443160967816e-06, "perf/tokens_per_sec": 27101.56092928458, "train/loss_math": 1.8223506450653075, "train/loss_code": 1.725149154663086, "train/loss_prose": 3.4299445152282715} +{"step": 4119, "train/loss": 2.2536192536354065, "train/lm_loss": 2.2536192536354065, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.571377541621788e-06, "perf/tokens_per_sec": 27103.44220123276, "train/loss_math": 2.1374722719192505, "train/loss_prose": 3.2085461616516113, "train/loss_code": 1.3761238257090251} +{"step": 4120, "train/loss": 2.440065562725067, "train/lm_loss": 2.440065562725067, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.561321878938177e-06, "perf/tokens_per_sec": 27176.58617084047, "train/loss_math": 2.1863654613494874, "train/loss_code": 1.9045755863189697, "train/loss_prose": 3.342060089111328} +{"step": 4121, "train/loss": 2.4007332623004913, "train/lm_loss": 2.4007332623004913, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.551276177817989e-06, "perf/tokens_per_sec": 27188.929976102678, "train/loss_math": 1.8665951490402222, "train/loss_prose": 3.453078269958496, "train/loss_code": 1.3802506923675537} +{"step": 4122, "train/loss": 2.1252951622009277, "train/lm_loss": 2.1252951622009277, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.54124044315738e-06, "perf/tokens_per_sec": 27157.597757821306, "train/loss_prose": 3.0406363010406494, "train/loss_code": 1.369186282157898, "train/loss_math": 2.27117649714152} +{"step": 4123, "train/loss": 2.137076109647751, "train/lm_loss": 2.137076109647751, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.53121467984764e-06, "perf/tokens_per_sec": 27025.71581567, "train/loss_math": 1.9107801119486492, "train/loss_code": 1.5145864089330037, "train/loss_prose": 3.4102542400360107} +{"step": 4124, "train/loss": 1.8121033906936646, "train/lm_loss": 1.8121033906936646, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.521198892775203e-06, "perf/tokens_per_sec": 27067.444114282833, "train/loss_code": 1.2933497428894043, "train/loss_prose": 3.368364453315735} +{"step": 4125, "train/loss": 2.3872649669647217, "train/lm_loss": 2.3872649669647217, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.511193086821652e-06, "perf/tokens_per_sec": 27046.265814235765, "train/loss_prose": 3.795687437057495, "train/loss_code": 1.3378089666366577, "train/loss_math": 2.359821081161499} +{"step": 4126, "train/loss": 2.04187473654747, "train/lm_loss": 2.04187473654747, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.501197266863691e-06, "perf/tokens_per_sec": 27242.693628364334, "train/loss_prose": 3.236436128616333, "train/loss_math": 2.0297560691833496, "train/loss_code": 1.566473937034607} +{"step": 4127, "train/loss": 2.3931930363178253, "train/lm_loss": 2.3931930363178253, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.491211437773166e-06, "perf/tokens_per_sec": 27231.07787703759, "train/loss_prose": 3.144784092903137, "train/loss_math": 2.0463967323303223, "train/loss_code": 1.506670633951823} +{"step": 4128, "train/loss": 2.1415185928344727, "train/lm_loss": 2.1415185928344727, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.481235604417028e-06, "perf/tokens_per_sec": 27179.380824310385, "train/loss_prose": 3.3602755069732666, "train/loss_math": 2.2475900650024414, "train/loss_code": 0.8520472844441732} +{"step": 4129, "train/loss": 2.494753837585449, "train/lm_loss": 2.494753837585449, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.4712697716574e-06, "perf/tokens_per_sec": 27169.709471673228, "train/loss_math": 2.470423698425293, "train/loss_prose": 3.266408145427704, "train/loss_code": 1.4739913543065388} +{"step": 4130, "train/loss": 2.0427399277687073, "train/lm_loss": 2.0427399277687073, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.4613139443515e-06, "perf/tokens_per_sec": 27208.13460368944, "train/loss_math": 2.2089210748672485, "train/loss_prose": 3.17357337474823, "train/loss_code": 1.39423269033432} +{"step": 4131, "train/loss": 2.464465856552124, "train/lm_loss": 2.464465856552124, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.451368127351674e-06, "perf/tokens_per_sec": 27054.571255795203, "train/loss_math": 2.0742407143115997, "train/loss_prose": 3.352717558542887, "train/loss_code": 1.3606114387512207} +{"step": 4132, "train/loss": 2.911748319864273, "train/lm_loss": 2.911748319864273, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.441432325505399e-06, "perf/tokens_per_sec": 27109.002030823707, "train/loss_prose": 3.449201154708862, "train/loss_math": 2.2209701538085938, "train/loss_code": 1.6060400009155273} +{"step": 4133, "train/loss": 2.1585394740104675, "train/lm_loss": 2.1585394740104675, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.431506543655251e-06, "perf/tokens_per_sec": 27201.414205642992, "train/loss_code": 1.7797441482543945, "train/loss_math": 2.093165546655655, "train/loss_prose": 3.55642032623291} +{"step": 4134, "train/loss": 2.181231737136841, "train/lm_loss": 2.181231737136841, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.421590786638951e-06, "perf/tokens_per_sec": 27163.43751966518, "train/loss_math": 2.3201189835866294, "train/loss_prose": 3.1876895427703857, "train/loss_code": 1.3713724613189697} +{"step": 4135, "train/loss": 2.29265633225441, "train/lm_loss": 2.29265633225441, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.411685059289314e-06, "perf/tokens_per_sec": 27122.697488052818, "train/loss_code": 1.7455386718114216, "train/loss_prose": 3.2606900930404663, "train/loss_math": 2.19441819190979} +{"step": 4136, "train/loss": 2.334890902042389, "train/lm_loss": 2.334890902042389, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.401789366434273e-06, "perf/tokens_per_sec": 27297.277535027537, "train/loss_prose": 3.257933020591736, "train/loss_math": 2.1284637212753297, "train/loss_code": 1.5209423303604126} +{"step": 4137, "train/loss": 2.4010519087314606, "train/lm_loss": 2.4010519087314606, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.391903712896861e-06, "perf/tokens_per_sec": 27136.36392699978, "train/loss_prose": 3.46321964263916, "train/loss_code": 1.4803017775217693, "train/loss_math": 2.188925623893738} +{"step": 4138, "train/loss": 2.462034285068512, "train/lm_loss": 2.462034285068512, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.382028103495223e-06, "perf/tokens_per_sec": 27152.44711159298, "train/loss_prose": 3.9099180698394775, "train/loss_code": 1.4557004868984222, "train/loss_math": 2.1437172889709473} +{"step": 4139, "train/loss": 1.9313491582870483, "train/lm_loss": 1.9313491582870483, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.372162543042624e-06, "perf/tokens_per_sec": 27133.106723312856, "train/loss_prose": 3.5504558086395264, "train/loss_code": 1.2368359565734863, "train/loss_math": 2.317664702733358} +{"step": 4140, "train/loss": 2.537292182445526, "train/lm_loss": 2.537292182445526, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.36230703634741e-06, "perf/tokens_per_sec": 27153.648754289217, "train/loss_math": 2.19505889415741, "train/loss_prose": 3.1076812744140625} +{"step": 4141, "train/loss": 2.6361364126205444, "train/lm_loss": 2.6361364126205444, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.352461588213036e-06, "perf/tokens_per_sec": 26522.25336509441, "train/loss_prose": 3.298390805721283, "train/loss_code": 1.751230239868164, "train/loss_math": 2.0480994383494058} +{"step": 4142, "train/loss": 2.841828405857086, "train/lm_loss": 2.841828405857086, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.3426262034380575e-06, "perf/tokens_per_sec": 27139.964904187927, "train/loss_prose": 3.3832263469696047, "train/loss_math": 2.1336829662323, "train/loss_code": 1.842406451702118} +{"step": 4143, "train/loss": 2.4824015498161316, "train/lm_loss": 2.4824015498161316, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.332800886816113e-06, "perf/tokens_per_sec": 27098.568534820664, "train/loss_math": 2.207206916809082, "train/loss_prose": 3.336099624633789, "train/loss_code": 2.1509788036346436} +{"step": 4144, "train/loss": 2.4116081595420837, "train/lm_loss": 2.4116081595420837, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.322985643135952e-06, "perf/tokens_per_sec": 27151.545949363088, "train/loss_prose": 3.2851549784342446, "train/loss_math": 2.146096795797348, "train/loss_code": 0.85301274061203} +{"step": 4145, "train/loss": 1.6230431497097015, "train/lm_loss": 1.6230431497097015, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.313180477181408e-06, "perf/tokens_per_sec": 26885.764652305574, "train/loss_code": 1.2545788764953614, "train/loss_prose": 2.9860947132110596, "train/loss_math": 1.8626779317855835} +{"step": 4146, "train/loss": 2.6744601130485535, "train/lm_loss": 2.6744601130485535, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.3033853937313876e-06, "perf/tokens_per_sec": 27230.300968442407, "train/loss_prose": 3.3911337852478027, "train/loss_code": 1.4800040324529011} +{"step": 4147, "train/loss": 2.0136123597621918, "train/lm_loss": 2.0136123597621918, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.293600397559897e-06, "perf/tokens_per_sec": 27217.96269938339, "train/loss_prose": 3.2921446561813354, "train/loss_code": 1.5874348084131877} +{"step": 4148, "train/loss": 2.139728367328644, "train/lm_loss": 2.139728367328644, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.283825493436031e-06, "perf/tokens_per_sec": 27210.84955199957, "train/loss_code": 1.5479257702827454, "train/loss_math": 2.4690789381663003, "train/loss_prose": 3.518887519836426} +{"step": 4149, "train/loss": 2.471819370985031, "train/lm_loss": 2.471819370985031, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.274060686123959e-06, "perf/tokens_per_sec": 27274.222621583947, "train/loss_prose": 3.483579456806183, "train/loss_code": 1.460059016942978} +{"step": 4150, "train/loss": 2.3384983241558075, "train/lm_loss": 2.3384983241558075, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.264305980382927e-06, "perf/tokens_per_sec": 27247.1439193047, "train/loss_code": 1.4108201563358307, "train/loss_prose": 3.2661765217781067} +{"step": 4151, "train/loss": 2.3689342737197876, "train/lm_loss": 2.3689342737197876, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.254561380967259e-06, "perf/tokens_per_sec": 27148.671212549798, "train/loss_math": 1.841781109571457, "train/loss_prose": 2.896087169647217} +{"step": 4152, "train/loss": 2.6520648896694183, "train/lm_loss": 2.6520648896694183, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.244826892626355e-06, "perf/tokens_per_sec": 27130.707178795565, "train/loss_prose": 3.3107393741607667, "train/loss_code": 1.2787015736103058, "train/loss_math": 2.105419635772705} +{"step": 4153, "train/loss": 2.1229427754879, "train/lm_loss": 2.1229427754879, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.235102520104681e-06, "perf/tokens_per_sec": 27203.137068119202, "train/loss_prose": 2.9916657209396362, "train/loss_code": 1.7358158826828003, "train/loss_math": 2.028473734855652} +{"step": 4154, "train/loss": 1.886277675628662, "train/lm_loss": 1.886277675628662, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.225388268141797e-06, "perf/tokens_per_sec": 27246.409305084395, "train/loss_math": 1.8792360424995422, "train/loss_code": 1.3825254440307617, "train/loss_prose": 4.419122219085693} +{"step": 4155, "train/loss": 2.125218629837036, "train/lm_loss": 2.125218629837036, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.215684141472292e-06, "perf/tokens_per_sec": 27123.853675325947, "train/loss_math": 1.914693146944046, "train/loss_code": 1.4297958016395569, "train/loss_prose": 3.2416924238204956} +{"step": 4156, "train/loss": 2.3028168976306915, "train/lm_loss": 2.3028168976306915, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.205990144825844e-06, "perf/tokens_per_sec": 27161.29025648485, "train/loss_code": 1.43970388174057, "train/loss_math": 2.1908589899539948, "train/loss_prose": 3.389845609664917} +{"step": 4157, "train/loss": 2.678228944540024, "train/lm_loss": 2.678228944540024, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.196306282927187e-06, "perf/tokens_per_sec": 27144.467469841446, "train/loss_prose": 3.405250906944275, "train/loss_code": 1.006230354309082, "train/loss_math": 2.266198714574178} +{"step": 4158, "train/loss": 1.9705922305583954, "train/lm_loss": 1.9705922305583954, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.186632560496126e-06, "perf/tokens_per_sec": 27173.878055107398, "train/loss_code": 1.187685489654541, "train/loss_prose": 3.517738699913025, "train/loss_math": 1.9892589449882507} +{"step": 4159, "train/loss": 2.096859246492386, "train/lm_loss": 2.096859246492386, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.176968982247514e-06, "perf/tokens_per_sec": 27065.141445585734, "train/loss_code": 1.839169478416443, "train/loss_math": 2.289679765701294, "train/loss_prose": 2.999666929244995} +{"step": 4160, "train/loss": 2.0666691958904266, "train/lm_loss": 2.0666691958904266, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.167315552891257e-06, "perf/tokens_per_sec": 26848.57970869538, "train/loss_code": 1.381694217522939, "train/loss_math": 2.4776541233062743} +{"step": 4161, "train/loss": 2.0209294259548187, "train/lm_loss": 2.0209294259548187, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.157672277132321e-06, "perf/tokens_per_sec": 26963.744919147386, "train/loss_math": 2.1132227182388306, "train/loss_code": 1.2485232949256897, "train/loss_prose": 3.041098713874817} +{"step": 4162, "train/loss": 2.2833445072174072, "train/lm_loss": 2.2833445072174072, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.1480391596707215e-06, "perf/tokens_per_sec": 27090.876839646964, "train/loss_math": 2.1845367550849915, "train/loss_prose": 3.5258995294570923, "train/loss_code": 1.2384050488471985} +{"step": 4163, "train/loss": 2.4645245373249054, "train/lm_loss": 2.4645245373249054, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.1384162052015255e-06, "perf/tokens_per_sec": 27200.682055171255, "train/loss_math": 2.4406044483184814, "train/loss_prose": 3.4205807050069175, "train/loss_code": 1.5244149764378865} +{"step": 4164, "train/loss": 1.8002546727657318, "train/lm_loss": 1.8002546727657318, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.128803418414839e-06, "perf/tokens_per_sec": 27099.55167725103, "train/loss_prose": 3.1276406049728394, "train/loss_code": 1.2190706491470338, "train/loss_math": 2.051403284072876} +{"step": 4165, "train/loss": 2.430007040500641, "train/lm_loss": 2.430007040500641, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.1192008039958235e-06, "perf/tokens_per_sec": 27107.80433633499, "train/loss_code": 1.632132629553477, "train/loss_prose": 3.5793420473734536, "train/loss_math": 1.9028160572052002} +{"step": 4166, "train/loss": 2.2751612961292267, "train/lm_loss": 2.2751612961292267, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.109608366624665e-06, "perf/tokens_per_sec": 27085.79417162245, "train/loss_prose": 3.230216145515442, "train/loss_math": 1.9908719778060913, "train/loss_code": 1.7864985466003418} +{"step": 4167, "train/loss": 2.237733632326126, "train/lm_loss": 2.237733632326126, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.100026110976615e-06, "perf/tokens_per_sec": 27096.858891796277, "train/loss_prose": 3.420377016067505, "train/loss_math": 2.115093320608139, "train/loss_code": 1.3003710508346558} +{"step": 4168, "train/loss": 2.498131662607193, "train/lm_loss": 2.498131662607193, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.090454041721942e-06, "perf/tokens_per_sec": 27179.46682276258, "train/loss_math": 1.9540734887123108, "train/loss_prose": 4.701042413711548, "train/loss_code": 1.3833367824554443} +{"step": 4169, "train/loss": 2.150957018136978, "train/lm_loss": 2.150957018136978, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.08089216352596e-06, "perf/tokens_per_sec": 27208.39314685359, "train/loss_math": 2.1815191209316254, "train/loss_code": 1.7948167721430461, "train/loss_prose": 3.0971298217773438} +{"step": 4170, "train/loss": 2.1111336052417755, "train/lm_loss": 2.1111336052417755, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.071340481049008e-06, "perf/tokens_per_sec": 27290.816497249452, "train/loss_math": 2.0927882194519043, "train/loss_code": 1.497559368610382, "train/loss_prose": 3.430008888244629} +{"step": 4171, "train/loss": 1.7897620499134064, "train/lm_loss": 1.7897620499134064, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.061798998946459e-06, "perf/tokens_per_sec": 27238.71984018138, "train/loss_math": 1.7844494581222534, "train/loss_code": 1.2834025621414185, "train/loss_prose": 3.8311376571655273} +{"step": 4172, "train/loss": 2.7182913720607758, "train/lm_loss": 2.7182913720607758, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.052267721868719e-06, "perf/tokens_per_sec": 27279.11636584781, "train/loss_prose": 3.5209579467773438, "train/loss_math": 2.2470603783925376, "train/loss_code": 0.9213175773620605} +{"step": 4173, "train/loss": 2.8580202460289, "train/lm_loss": 2.8580202460289, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.042746654461216e-06, "perf/tokens_per_sec": 27327.88974591868, "train/loss_math": 2.0444054007530212, "train/loss_prose": 3.2078550815582276, "train/loss_code": 2.7360761165618896} +{"step": 4174, "train/loss": 2.385918766260147, "train/lm_loss": 2.385918766260147, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.0332358013644016e-06, "perf/tokens_per_sec": 27231.164202951386, "train/loss_math": 2.228596329689026, "train/loss_prose": 3.524374087651571, "train/loss_code": 1.352345069249471} +{"step": 4175, "train/loss": 2.2467931509017944, "train/lm_loss": 2.2467931509017944, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.023735167213752e-06, "perf/tokens_per_sec": 27254.924618459878, "train/loss_prose": 3.053099791208903, "train/loss_code": 1.2873335480690002, "train/loss_math": 2.0801261266072593} +{"step": 4176, "train/loss": 2.4604761600494385, "train/lm_loss": 2.4604761600494385, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.014244756639751e-06, "perf/tokens_per_sec": 27239.10852917207, "train/loss_math": 2.131104922294617, "train/loss_prose": 3.7533150911331177, "train/loss_code": 1.5216550827026367} +{"step": 4177, "train/loss": 2.110236167907715, "train/lm_loss": 2.110236167907715, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.004764574267927e-06, "perf/tokens_per_sec": 27333.629026689472, "train/loss_math": 1.9094528555870056, "train/loss_code": 1.4332208236058552, "train/loss_prose": 2.9211071332295737} +{"step": 4178, "train/loss": 2.508334159851074, "train/lm_loss": 2.508334159851074, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.995294624718795e-06, "perf/tokens_per_sec": 27316.635582494324, "train/loss_code": 1.4091927607854207, "train/loss_prose": 3.4609920978546143, "train/loss_math": 1.9951250553131104} +{"step": 4179, "train/loss": 2.4076296985149384, "train/lm_loss": 2.4076296985149384, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.985834912607894e-06, "perf/tokens_per_sec": 27195.5581052245, "train/loss_prose": 3.599333167076111, "train/loss_code": 1.8439360857009888, "train/loss_math": 2.0436869859695435} +{"step": 4180, "train/loss": 2.691156566143036, "train/lm_loss": 2.691156566143036, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.976385442545774e-06, "perf/tokens_per_sec": 26282.536642082057, "train/loss_code": 1.7898282408714294, "train/loss_prose": 3.164590311050415, "train/loss_math": 2.1266443729400635} +{"step": 4181, "train/loss": 2.8015733659267426, "train/lm_loss": 2.8015733659267426, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.966946219137993e-06, "perf/tokens_per_sec": 26863.189307180877, "train/loss_prose": 3.4302350521087646, "train/loss_code": 1.6829203963279724, "train/loss_math": 1.8955706357955933} +{"step": 4182, "train/loss": 2.036739259958267, "train/lm_loss": 2.036739259958267, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.957517246985109e-06, "perf/tokens_per_sec": 27061.21928260445, "train/loss_math": 1.977572500705719, "train/loss_prose": 3.2806867361068726, "train/loss_code": 1.4443489015102386} +{"step": 4183, "train/loss": 2.2778526842594147, "train/lm_loss": 2.2778526842594147, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.948098530682695e-06, "perf/tokens_per_sec": 27029.797800468856, "train/loss_code": 1.4624376893043518, "train/loss_prose": 3.579355001449585, "train/loss_math": 2.225599765777588} +{"step": 4184, "train/loss": 2.2317114770412445, "train/lm_loss": 2.2317114770412445, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.938690074821313e-06, "perf/tokens_per_sec": 26967.63887576426, "train/loss_code": 1.7164701223373413, "train/loss_math": 2.110992153485616, "train/loss_prose": 3.1856521368026733} +{"step": 4185, "train/loss": 3.1590017080307007, "train/lm_loss": 3.1590017080307007, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.929291883986536e-06, "perf/tokens_per_sec": 27124.581695662404, "train/loss_prose": 3.6582102378209433, "train/loss_math": 1.9964076280593872, "train/loss_code": 1.3263435363769531} +{"step": 4186, "train/loss": 2.100913852453232, "train/lm_loss": 2.100913852453232, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.919903962758917e-06, "perf/tokens_per_sec": 27214.94454635605, "train/loss_code": 1.3594290415445964, "train/loss_math": 2.05588698387146, "train/loss_prose": 3.2806817293167114} +{"step": 4187, "train/loss": 2.2565720081329346, "train/lm_loss": 2.2565720081329346, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.91052631571403e-06, "perf/tokens_per_sec": 27123.982146663235, "train/loss_math": 2.1483495831489563, "train/loss_prose": 3.267104148864746, "train/loss_code": 2.0640243689219155} +{"step": 4188, "train/loss": 1.9309715032577515, "train/lm_loss": 1.9309715032577515, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.901158947422417e-06, "perf/tokens_per_sec": 27115.805231906612, "train/loss_code": 1.6382873058319092, "train/loss_math": 1.945299744606018, "train/loss_prose": 3.058723211288452} +{"step": 4189, "train/loss": 2.4090015292167664, "train/lm_loss": 2.4090015292167664, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.891801862449629e-06, "perf/tokens_per_sec": 27047.841326519367, "train/loss_code": 1.3543993830680847, "train/loss_prose": 3.1804444789886475, "train/loss_math": 2.3406265576680503} +{"step": 4190, "train/loss": 1.9917400479316711, "train/lm_loss": 1.9917400479316711, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.8824550653561744e-06, "perf/tokens_per_sec": 26940.447020376418, "train/loss_math": 2.309337854385376, "train/loss_code": 1.3706599175930023, "train/loss_prose": 2.916302800178528} +{"step": 4191, "train/loss": 2.266473203897476, "train/lm_loss": 2.266473203897476, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.8731185606975916e-06, "perf/tokens_per_sec": 27019.38267632326, "train/loss_math": 1.9633169571558635, "train/loss_code": 1.624613602956136, "train/loss_prose": 3.6839972734451294} +{"step": 4192, "train/loss": 2.2969051599502563, "train/lm_loss": 2.2969051599502563, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.863792353024367e-06, "perf/tokens_per_sec": 27255.962380774086, "train/loss_math": 2.253623366355896, "train/loss_code": 1.3419386446475983, "train/loss_prose": 3.338435173034668} +{"step": 4193, "train/loss": 2.3181264996528625, "train/lm_loss": 2.3181264996528625, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.854476446881985e-06, "perf/tokens_per_sec": 26992.722568318975, "train/loss_math": 2.0785165429115295, "train/loss_prose": 3.041030486424764, "train/loss_code": 1.75496244430542} +{"step": 4194, "train/loss": 2.1257591247558594, "train/lm_loss": 2.1257591247558594, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.845170846810902e-06, "perf/tokens_per_sec": 26819.99429251189, "train/loss_code": 1.707405373454094, "train/loss_prose": 4.2449822425842285, "train/loss_math": 1.977156400680542} +{"step": 4195, "train/loss": 2.639163374900818, "train/lm_loss": 2.639163374900818, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.835875557346552e-06, "perf/tokens_per_sec": 27304.739719321668, "train/loss_prose": 3.692432165145874, "train/loss_code": 1.9188333749771118, "train/loss_math": 2.1397551894187927} +{"step": 4196, "train/loss": 2.4435577392578125, "train/lm_loss": 2.4435577392578125, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.826590583019357e-06, "perf/tokens_per_sec": 27140.179278204752, "train/loss_code": 1.291473110516866, "train/loss_prose": 3.5639739831288657, "train/loss_math": 2.491060733795166} +{"step": 4197, "train/loss": 2.7406232953071594, "train/lm_loss": 2.7406232953071594, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.817315928354695e-06, "perf/tokens_per_sec": 27257.821902572377, "train/loss_prose": 3.272619342803955, "train/loss_code": 1.4963384866714478, "train/loss_math": 2.0327759385108948} +{"step": 4198, "train/loss": 2.084532469511032, "train/lm_loss": 2.084532469511032, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.8080515978729247e-06, "perf/tokens_per_sec": 26774.60100241877, "train/loss_math": 2.1898117065429688, "train/loss_code": 1.7729023933410644, "train/loss_prose": 3.432124614715576} +{"step": 4199, "train/loss": 2.511353015899658, "train/lm_loss": 2.511353015899658, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.798797596089351e-06, "perf/tokens_per_sec": 27123.725205205646, "train/loss_math": 2.299825429916382, "train/loss_prose": 3.145244836807251, "train/loss_code": 1.4557890892028809} +{"step": 4200, "train/loss": 2.0792571306228638, "train/lm_loss": 2.0792571306228638, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.7895539275142807e-06, "perf/tokens_per_sec": 27199.30399649161, "train/loss_code": 1.5870711008707683, "train/loss_math": 2.1333354711532593, "train/loss_prose": 3.3395016193389893} +{"step": 4200, "eval/loss": 2.1584394447241046, "eval/lm_loss": 2.1584394447241046, "eval/lb_loss": 0.0, "eval/z_loss": 0.0, "eval/perplexity": 8.657616421905107, "eval/loss_code": 1.555013929123748, "eval/ppl_code": 4.735152481224944, "eval/loss_prose": 3.4852971863328364, "eval/ppl_prose": 32.6321234900849, "eval/loss_math": 2.0269779248540756, "eval/ppl_math": 7.591110752127327} +{"step": 4201, "train/loss": 2.717876136302948, "train/lm_loss": 2.717876136302948, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.780320596652956e-06, "perf/tokens_per_sec": 26439.353976090515, "train/loss_prose": 3.491598129272461, "train/loss_math": 2.2221649487813315, "train/loss_code": 1.1101210117340088} +{"step": 4202, "train/loss": 2.3028933703899384, "train/lm_loss": 2.3028933703899384, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.7710976080055942e-06, "perf/tokens_per_sec": 27173.79209201831, "train/loss_code": 1.606082260608673, "train/loss_math": 1.8943987290064495, "train/loss_prose": 3.175928513209025} +{"step": 4203, "train/loss": 1.6131117045879364, "train/lm_loss": 1.6131117045879364, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.7618849660673694e-06, "perf/tokens_per_sec": 27069.27800108089, "train/loss_code": 1.2619587928056717, "train/loss_math": 1.9642645716667175} +{"step": 4204, "train/loss": 2.454128623008728, "train/lm_loss": 2.454128623008728, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.752682675328406e-06, "perf/tokens_per_sec": 27177.789951101753, "train/loss_math": 2.1501619815826416, "train/loss_prose": 3.3660281896591187} +{"step": 4205, "train/loss": 2.752596080303192, "train/lm_loss": 2.752596080303192, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.7434907402737836e-06, "perf/tokens_per_sec": 26864.575525528577, "train/loss_math": 2.095426559448242, "train/loss_code": 1.866741418838501, "train/loss_prose": 3.5241082310676575} +{"step": 4206, "train/loss": 2.8743553161621094, "train/lm_loss": 2.8743553161621094, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.7343091653835565e-06, "perf/tokens_per_sec": 26969.41692555725, "train/loss_math": 2.374775012334188, "train/loss_code": 1.6622586250305176, "train/loss_prose": 3.5520647168159485} +{"step": 4207, "train/loss": 2.491752028465271, "train/lm_loss": 2.491752028465271, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.725137955132707e-06, "perf/tokens_per_sec": 27163.824063093918, "train/loss_prose": 3.2802262902259827, "train/loss_code": 1.531907320022583, "train/loss_math": 2.2173893451690674} +{"step": 4208, "train/loss": 2.398249566555023, "train/lm_loss": 2.398249566555023, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.715977113991165e-06, "perf/tokens_per_sec": 27237.122013089138, "train/loss_prose": 3.4286516904830933, "train/loss_code": 2.0547821323076882} +{"step": 4209, "train/loss": 2.3860971927642822, "train/lm_loss": 2.3860971927642822, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.7068266464238084e-06, "perf/tokens_per_sec": 27093.31207065132, "train/loss_code": 1.6458646655082703, "train/loss_prose": 3.3595568339029946, "train/loss_math": 1.9061256647109985} +{"step": 4210, "train/loss": 2.0618042051792145, "train/lm_loss": 2.0618042051792145, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.697686556890481e-06, "perf/tokens_per_sec": 27213.866796770428, "train/loss_code": 1.237829864025116, "train/loss_math": 2.046879142522812, "train/loss_prose": 2.915628433227539} +{"step": 4211, "train/loss": 2.567228615283966, "train/lm_loss": 2.567228615283966, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.688556849845939e-06, "perf/tokens_per_sec": 27047.50065966176, "train/loss_code": 1.5259924530982971, "train/loss_prose": 3.2329761385917664, "train/loss_math": 2.276969790458679} +{"step": 4212, "train/loss": 2.4624253809452057, "train/lm_loss": 2.4624253809452057, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.679437529739896e-06, "perf/tokens_per_sec": 27168.37751364755, "train/loss_math": 1.9560131231943767, "train/loss_code": 1.694407343864441, "train/loss_prose": 3.4808497428894043} +{"step": 4213, "train/loss": 2.065242111682892, "train/lm_loss": 2.065242111682892, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.670328601016995e-06, "perf/tokens_per_sec": 27133.920950926396, "train/loss_math": 2.0740246772766113, "train/loss_code": 1.377329334616661, "train/loss_prose": 2.979531447092692} +{"step": 4214, "train/loss": 2.3894692063331604, "train/lm_loss": 2.3894692063331604, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.661230068116811e-06, "perf/tokens_per_sec": 27120.727914513805, "train/loss_code": 1.3511337637901306, "train/loss_math": 1.9369995594024658, "train/loss_prose": 3.5341625213623047} +{"step": 4215, "train/loss": 2.347979336977005, "train/lm_loss": 2.347979336977005, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.6521419354738738e-06, "perf/tokens_per_sec": 27243.428042232394, "train/loss_code": 1.702946662902832, "train/loss_prose": 3.0996577739715576, "train/loss_math": 2.1880099773406982} +{"step": 4216, "train/loss": 2.70391845703125, "train/lm_loss": 2.70391845703125, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.643064207517624e-06, "perf/tokens_per_sec": 27115.976425651468, "train/loss_prose": 4.164784669876099, "train/loss_code": 1.650687297185262, "train/loss_math": 2.092466115951538} +{"step": 4217, "train/loss": 2.337366819381714, "train/lm_loss": 2.337366819381714, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.633996888672428e-06, "perf/tokens_per_sec": 26776.270221069564, "train/loss_prose": 3.262310743331909, "train/loss_code": 1.1723577380180359, "train/loss_math": 2.189095616340637} +{"step": 4218, "train/loss": 2.091851085424423, "train/lm_loss": 2.091851085424423, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.6249399833575865e-06, "perf/tokens_per_sec": 27113.066425940364, "train/loss_code": 1.290299693743388, "train/loss_math": 2.1775723695755005, "train/loss_prose": 3.1655954122543335} +{"step": 4219, "train/loss": 2.197731673717499, "train/lm_loss": 2.197731673717499, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.6158934959873353e-06, "perf/tokens_per_sec": 27159.186267907335, "train/loss_code": 1.2795682350794475, "train/loss_prose": 3.118837912877401, "train/loss_math": 2.1933175325393677} +{"step": 4220, "train/loss": 2.150034725666046, "train/lm_loss": 2.150034725666046, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.606857430970814e-06, "perf/tokens_per_sec": 27238.71984018138, "train/loss_code": 1.493497093518575, "train/loss_math": 2.040436863899231, "train/loss_prose": 4.558039665222168} +{"step": 4221, "train/loss": 2.2256888151168823, "train/lm_loss": 2.2256888151168823, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.5978317927120974e-06, "perf/tokens_per_sec": 27155.4084944282, "train/loss_math": 2.633384863535563, "train/loss_prose": 3.0481539964675903, "train/loss_code": 1.2696828842163086} +{"step": 4222, "train/loss": 2.648034483194351, "train/lm_loss": 2.648034483194351, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.5888165856101693e-06, "perf/tokens_per_sec": 27086.94721473743, "train/loss_prose": 3.525698482990265, "train/loss_code": 1.5552595257759094, "train/loss_math": 1.9854813814163208} +{"step": 4223, "train/loss": 2.9295350313186646, "train/lm_loss": 2.9295350313186646, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.5798118140589284e-06, "perf/tokens_per_sec": 27078.963337515524, "train/loss_math": 2.0346884727478027, "train/loss_prose": 3.775008773803711, "train/loss_code": 1.26327383518219} +{"step": 4224, "train/loss": 2.5004451870918274, "train/lm_loss": 2.5004451870918274, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.5708174824471947e-06, "perf/tokens_per_sec": 27009.400198091404, "train/loss_code": 1.5762720505396526, "train/loss_prose": 3.612213134765625, "train/loss_math": 2.2190531492233276} +{"step": 4225, "train/loss": 2.3117814660072327, "train/lm_loss": 2.3117814660072327, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.561833595158698e-06, "perf/tokens_per_sec": 26874.70345932315, "train/loss_code": 2.068557381629944, "train/loss_math": 2.30894668896993, "train/loss_prose": 3.2931814193725586} +{"step": 4226, "train/loss": 1.7707432806491852, "train/lm_loss": 1.7707432806491852, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.5528601565720714e-06, "perf/tokens_per_sec": 27068.382350902182, "train/loss_math": 2.087250828742981, "train/loss_code": 1.4542356431484222} +{"step": 4227, "train/loss": 1.9722360968589783, "train/lm_loss": 1.9722360968589783, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.5438971710608615e-06, "perf/tokens_per_sec": 27171.127505994144, "train/loss_math": 2.0924778878688812, "train/loss_code": 1.3044416507085164, "train/loss_prose": 3.4946517944335938} +{"step": 4228, "train/loss": 2.2189093232154846, "train/lm_loss": 2.2189093232154846, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.5349446429935122e-06, "perf/tokens_per_sec": 27078.0243672552, "train/loss_math": 2.068898618221283, "train/loss_code": 1.3502976894378662, "train/loss_prose": 3.387542247772217} +{"step": 4229, "train/loss": 2.0597283840179443, "train/lm_loss": 2.0597283840179443, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.5260025767333893e-06, "perf/tokens_per_sec": 27031.58405659368, "train/loss_math": 2.118981456756592, "train/loss_prose": 3.0905816555023193, "train/loss_code": 1.3961687088012695} +{"step": 4230, "train/loss": 2.375479757785797, "train/lm_loss": 2.375479757785797, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.5170709766387424e-06, "perf/tokens_per_sec": 27134.520939385948, "train/loss_math": 2.0882331927617392, "train/loss_code": 1.5171037316322327, "train/loss_prose": 3.2349770863850913} +{"step": 4231, "train/loss": 2.178289443254471, "train/lm_loss": 2.178289443254471, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.508149847062725e-06, "perf/tokens_per_sec": 27155.022190469117, "train/loss_math": 1.8685621321201324, "train/loss_prose": 2.955819765726725, "train/loss_code": 1.0846076011657715} +{"step": 4232, "train/loss": 2.180460959672928, "train/lm_loss": 2.180460959672928, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.4992391923533844e-06, "perf/tokens_per_sec": 27136.278201094305, "train/loss_math": 2.202982234954834, "train/loss_code": 1.470573365688324, "train/loss_prose": 3.4876296520233154} +{"step": 4233, "train/loss": 3.1811291575431824, "train/lm_loss": 3.1811291575431824, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.4903390168536664e-06, "perf/tokens_per_sec": 27159.272138611355, "train/loss_math": 2.3077991803487143, "train/loss_prose": 3.7051273345947267} +{"step": 4234, "train/loss": 2.0371280014514923, "train/lm_loss": 2.0371280014514923, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.4814493249014116e-06, "perf/tokens_per_sec": 27065.31200020165, "train/loss_math": 1.940285873413086, "train/loss_code": 1.4310158491134644, "train/loss_prose": 3.733563184738159} +{"step": 4235, "train/loss": 2.438794195652008, "train/lm_loss": 2.438794195652008, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.4725701208293435e-06, "perf/tokens_per_sec": 27272.317567760638, "train/loss_prose": 3.508453051249186, "train/loss_math": 2.1031224131584167, "train/loss_code": 0.5725038051605225} +{"step": 4236, "train/loss": 2.1606292724609375, "train/lm_loss": 2.1606292724609375, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.463701408965084e-06, "perf/tokens_per_sec": 27161.934399792568, "train/loss_code": 1.3811568021774292, "train/loss_math": 2.1036471128463745, "train/loss_prose": 3.4153109788894653} +{"step": 4237, "train/loss": 2.1453354954719543, "train/lm_loss": 2.1453354954719543, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.454843193631127e-06, "perf/tokens_per_sec": 27174.26489573892, "train/loss_math": 2.155997943878174, "train/loss_prose": 3.4260754585266113, "train/loss_code": 1.4783092141151428} +{"step": 4238, "train/loss": 2.229092985391617, "train/lm_loss": 2.229092985391617, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.4459954791448616e-06, "perf/tokens_per_sec": 27119.957289688497, "train/loss_code": 1.3545904159545898, "train/loss_prose": 3.3073710203170776, "train/loss_math": 2.127205193042755} +{"step": 4239, "train/loss": 2.3154936730861664, "train/lm_loss": 2.3154936730861664, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.4371582698185633e-06, "perf/tokens_per_sec": 27159.916186200193, "train/loss_code": 1.757583737373352, "train/loss_math": 2.1828739047050476, "train/loss_prose": 3.5639333724975586} +{"step": 4240, "train/loss": 2.799706310033798, "train/lm_loss": 2.799706310033798, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.4283315699593753e-06, "perf/tokens_per_sec": 27036.476204414943, "train/loss_prose": 3.16643496354421, "train/loss_math": 2.109952688217163, "train/loss_code": 1.2890865802764893} +{"step": 4241, "train/loss": 2.7848339676856995, "train/lm_loss": 2.7848339676856995, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.419515383869326e-06, "perf/tokens_per_sec": 26994.03735827237, "train/loss_prose": 3.7384315133094788, "train/loss_math": 2.021012544631958, "train/loss_code": 1.261906385421753} +{"step": 4242, "train/loss": 2.7105599343776703, "train/lm_loss": 2.7105599343776703, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.4107097158453143e-06, "perf/tokens_per_sec": 26757.58685223719, "train/loss_prose": 3.301351022720337, "train/loss_code": 1.4863901734352112, "train/loss_math": 2.204944372177124} +{"step": 4243, "train/loss": 2.280409127473831, "train/lm_loss": 2.280409127473831, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.4019145701791184e-06, "perf/tokens_per_sec": 26873.9467522635, "train/loss_math": 2.0839692056179047, "train/loss_prose": 3.3218713998794556, "train/loss_code": 1.631826937198639} +{"step": 4244, "train/loss": 1.8314874172210693, "train/lm_loss": 1.8314874172210693, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.393129951157384e-06, "perf/tokens_per_sec": 27128.43657219014, "train/loss_prose": 3.407259702682495, "train/loss_code": 1.3095263987779617, "train/loss_math": 2.0021779537200928} +{"step": 4245, "train/loss": 2.345789909362793, "train/lm_loss": 2.345789909362793, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.3843558630616308e-06, "perf/tokens_per_sec": 26880.422179177058, "train/loss_math": 2.3395692825317385, "train/loss_code": 1.6991691589355469, "train/loss_prose": 3.670133590698242} +{"step": 4246, "train/loss": 2.3518091440200806, "train/lm_loss": 2.3518091440200806, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.3755923101682353e-06, "perf/tokens_per_sec": 27072.64788270741, "train/loss_prose": 3.332932154337565, "train/loss_code": 1.5279496908187866, "train/loss_math": 2.115913987159729} +{"step": 4247, "train/loss": 1.9155243337154388, "train/lm_loss": 1.9155243337154388, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.36683929674845e-06, "perf/tokens_per_sec": 27046.606449987798, "train/loss_code": 1.4535013834635417, "train/loss_math": 1.9113852977752686, "train/loss_prose": 3.3181495666503906} +{"step": 4248, "train/loss": 2.287817895412445, "train/lm_loss": 2.287817895412445, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.358096827068394e-06, "perf/tokens_per_sec": 26546.06377284555, "train/loss_math": 2.288278579711914, "train/loss_code": 1.1176925897598267, "train/loss_prose": 3.4570215940475464} +{"step": 4249, "train/loss": 1.851181298494339, "train/lm_loss": 1.851181298494339, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.3493649053890326e-06, "perf/tokens_per_sec": 27055.55120829626, "train/loss_code": 1.6710421528135027, "train/loss_prose": 3.112154006958008} +{"step": 4250, "train/loss": 2.721347689628601, "train/lm_loss": 2.721347689628601, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.3406435359662036e-06, "perf/tokens_per_sec": 27177.531989211166, "train/loss_prose": 3.7006086111068726, "train/loss_code": 1.7206965684890747, "train/loss_math": 1.8062559366226196} +{"step": 4251, "train/loss": 2.2810000479221344, "train/lm_loss": 2.2810000479221344, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.331932723050596e-06, "perf/tokens_per_sec": 27076.27464188281, "train/loss_math": 2.191455602645874, "train/loss_prose": 3.501163959503174, "train/loss_code": 1.5571022033691406} +{"step": 4252, "train/loss": 2.5035578310489655, "train/lm_loss": 2.5035578310489655, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.3232324708877495e-06, "perf/tokens_per_sec": 27214.685878670367, "train/loss_prose": 3.6353920300801597, "train/loss_code": 1.3456746935844421, "train/loss_math": 2.1436458428700766} +{"step": 4253, "train/loss": 2.1206152141094208, "train/lm_loss": 2.1206152141094208, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.3145427837180663e-06, "perf/tokens_per_sec": 27147.47001035025, "train/loss_prose": 3.544826030731201, "train/loss_math": 2.1353374322255454, "train/loss_code": 1.156419078509013} +{"step": 4254, "train/loss": 2.472598135471344, "train/lm_loss": 2.472598135471344, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.305863665776793e-06, "perf/tokens_per_sec": 27143.352409662348, "train/loss_math": 2.255262851715088, "train/loss_code": 1.784414827823639, "train/loss_prose": 3.595452308654785} +{"step": 4255, "train/loss": 2.6696415543556213, "train/lm_loss": 2.6696415543556213, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.297195121294022e-06, "perf/tokens_per_sec": 27163.09393474167, "train/loss_code": 1.9217259486516316, "train/loss_prose": 3.525237480799357, "train/loss_math": 2.50812029838562} +{"step": 4256, "train/loss": 2.3761159479618073, "train/lm_loss": 2.3761159479618073, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.288537154494703e-06, "perf/tokens_per_sec": 27136.87829380712, "train/loss_math": 2.21044385433197, "train/loss_prose": 3.530588388442993, "train/loss_code": 1.3320916096369426} +{"step": 4257, "train/loss": 2.088632583618164, "train/lm_loss": 2.088632583618164, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.2798897695986153e-06, "perf/tokens_per_sec": 27088.997311577874, "train/loss_prose": 3.3029768466949463, "train/loss_code": 1.3854673306147258, "train/loss_math": 1.9822349150975545} +{"step": 4258, "train/loss": 2.214159071445465, "train/lm_loss": 2.214159071445465, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.2712529708204016e-06, "perf/tokens_per_sec": 27041.625506837587, "train/loss_code": 1.33512744307518, "train/loss_math": 1.9924440383911133, "train/loss_prose": 3.460106293360392} +{"step": 4259, "train/loss": 2.1361444294452667, "train/lm_loss": 2.1361444294452667, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.262626762369525e-06, "perf/tokens_per_sec": 27287.218721906498, "train/loss_math": 2.4789260625839233, "train/loss_code": 1.4859003225962322, "train/loss_prose": 2.7157504558563232} +{"step": 4260, "train/loss": 2.3777184188365936, "train/lm_loss": 2.3777184188365936, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.2540111484503004e-06, "perf/tokens_per_sec": 27206.411108155018, "train/loss_prose": 3.248945713043213, "train/loss_code": 1.2750974297523499, "train/loss_math": 2.2415720224380493} +{"step": 4261, "train/loss": 2.893506169319153, "train/lm_loss": 2.893506169319153, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.245406133261858e-06, "perf/tokens_per_sec": 26915.165172325735, "train/loss_math": 2.4168760776519775, "train/loss_code": 2.231024146080017, "train/loss_prose": 3.253825283050537} +{"step": 4262, "train/loss": 2.3234708309173584, "train/lm_loss": 2.3234708309173584, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.236811720998195e-06, "perf/tokens_per_sec": 27172.158896656438, "train/loss_math": 2.1552193462848663, "train/loss_prose": 3.138352155685425, "train/loss_code": 0.5518333315849304} +{"step": 4263, "train/loss": 2.349144846200943, "train/lm_loss": 2.349144846200943, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.228227915848117e-06, "perf/tokens_per_sec": 27300.270595732036, "train/loss_prose": 3.350302219390869, "train/loss_code": 1.4908133745193481, "train/loss_math": 2.1349058151245117} +{"step": 4264, "train/loss": 2.2539659440517426, "train/lm_loss": 2.2539659440517426, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.219654721995266e-06, "perf/tokens_per_sec": 27063.308119211786, "train/loss_math": 1.9992451965808868, "train/loss_prose": 3.5207815170288086, "train/loss_code": 1.4965914487838745} +{"step": 4265, "train/loss": 1.7959074974060059, "train/lm_loss": 1.7959074974060059, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.2110921436181134e-06, "perf/tokens_per_sec": 26788.169431004346, "train/loss_code": 1.1670815825462342, "train/loss_prose": 3.1029499769210815, "train/loss_math": 2.3259525299072266} +{"step": 4266, "train/loss": 2.069183111190796, "train/lm_loss": 2.069183111190796, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.202540184889949e-06, "perf/tokens_per_sec": 27028.47952474824, "train/loss_math": 2.3451883792877197, "train/loss_code": 1.3178396622339885, "train/loss_prose": 3.2191925048828125} +{"step": 4267, "train/loss": 2.2337637841701508, "train/lm_loss": 2.2337637841701508, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.1939988499789077e-06, "perf/tokens_per_sec": 27096.8161534846, "train/loss_code": 1.3317147135734557, "train/loss_prose": 3.7371789614359536} +{"step": 4268, "train/loss": 2.5238196551799774, "train/lm_loss": 2.5238196551799774, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.1854681430479205e-06, "perf/tokens_per_sec": 27128.5222485579, "train/loss_prose": 3.584253946940104, "train/loss_math": 2.0381657779216766, "train/loss_code": 1.2851321697235107} +{"step": 4269, "train/loss": 1.8512652516365051, "train/lm_loss": 1.8512652516365051, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.176948068254762e-06, "perf/tokens_per_sec": 27156.48161865244, "train/loss_code": 1.4670562446117401, "train/loss_math": 2.235474318265915} +{"step": 4270, "train/loss": 2.7577115893363953, "train/lm_loss": 2.7577115893363953, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.168438629752002e-06, "perf/tokens_per_sec": 27057.42609785791, "train/loss_math": 2.207679192225138, "train/loss_prose": 3.435695171356201, "train/loss_code": 1.6958738565444946} +{"step": 4271, "train/loss": 2.795883446931839, "train/lm_loss": 2.795883446931839, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.1599398316870337e-06, "perf/tokens_per_sec": 27047.074838157132, "train/loss_code": 1.3020615577697754, "train/loss_prose": 3.5443957328796385, "train/loss_math": 2.040968179702759} +{"step": 4272, "train/loss": 2.5467804670333862, "train/lm_loss": 2.5467804670333862, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.15145167820208e-06, "perf/tokens_per_sec": 27181.14390136239, "train/loss_code": 1.7561581134796143, "train/loss_prose": 3.2535112698872886, "train/loss_math": 2.367131153742472} +{"step": 4273, "train/loss": 2.1601554453372955, "train/lm_loss": 2.1601554453372955, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.14297417343416e-06, "perf/tokens_per_sec": 27002.777604184677, "train/loss_prose": 3.4397796392440796, "train/loss_code": 1.4477385679880779, "train/loss_math": 2.0194897651672363} +{"step": 4274, "train/loss": 2.720077633857727, "train/lm_loss": 2.720077633857727, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.1345073215151066e-06, "perf/tokens_per_sec": 27222.27550800512, "train/loss_prose": 3.5559134483337402, "train/loss_code": 1.3270177642504375} +{"step": 4275, "train/loss": 1.9915468096733093, "train/lm_loss": 1.9915468096733093, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.126051126571561e-06, "perf/tokens_per_sec": 27168.506407103716, "train/loss_code": 1.377647340297699, "train/loss_math": 1.6994863152503967, "train/loss_prose": 3.5114063024520874} +{"step": 4276, "train/loss": 2.948828339576721, "train/lm_loss": 2.948828339576721, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.1176055927249594e-06, "perf/tokens_per_sec": 27221.930433018963, "train/loss_prose": 3.314819614092509, "train/loss_code": 1.8508542776107788} +{"step": 4277, "train/loss": 2.5554807484149933, "train/lm_loss": 2.5554807484149933, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.10917072409157e-06, "perf/tokens_per_sec": 26663.318233316673, "train/loss_prose": 3.2663448452949524, "train/loss_code": 1.9201348423957825, "train/loss_math": 1.7690985202789307} +{"step": 4278, "train/loss": 2.520220994949341, "train/lm_loss": 2.520220994949341, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.100746524782441e-06, "perf/tokens_per_sec": 27063.862354479435, "train/loss_code": 1.4836714267730713, "train/loss_prose": 4.05103874206543, "train/loss_math": 2.1152036666870115} +{"step": 4279, "train/loss": 2.35978627204895, "train/lm_loss": 2.35978627204895, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.092332998903416e-06, "perf/tokens_per_sec": 27058.66196260269, "train/loss_math": 2.0778324604034424, "train/loss_prose": 3.3147382140159607, "train/loss_code": 1.180501103401184} +{"step": 4280, "train/loss": 2.7430355548858643, "train/lm_loss": 2.7430355548858643, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.083930150555142e-06, "perf/tokens_per_sec": 27129.207678982282, "train/loss_prose": 3.47922819852829, "train/loss_code": 1.7912517786026, "train/loss_math": 2.222434639930725} +{"step": 4281, "train/loss": 1.8465774357318878, "train/lm_loss": 1.8465774357318878, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.075537983833082e-06, "perf/tokens_per_sec": 26828.45459429071, "train/loss_code": 1.229741021990776, "train/loss_math": 2.2402185996373496, "train/loss_prose": 3.1329991817474365} +{"step": 4282, "train/loss": 2.0220966935157776, "train/lm_loss": 2.0220966935157776, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.0671565028274644e-06, "perf/tokens_per_sec": 26999.97671509307, "train/loss_math": 2.0240344405174255, "train/loss_code": 1.313218504190445, "train/loss_prose": 3.437915325164795} +{"step": 4283, "train/loss": 2.391261339187622, "train/lm_loss": 2.391261339187622, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.058785711623327e-06, "perf/tokens_per_sec": 27031.49899142475, "train/loss_code": 2.0794227917989097, "train/loss_math": 2.1428622007369995, "train/loss_prose": 3.231618046760559} +{"step": 4284, "train/loss": 2.6568938493728638, "train/lm_loss": 2.6568938493728638, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.0504256143004866e-06, "perf/tokens_per_sec": 27067.742637061034, "train/loss_code": 1.5287531614303589, "train/loss_prose": 3.6458802223205566, "train/loss_math": 2.0853703022003174} +{"step": 4285, "train/loss": 2.5763454735279083, "train/lm_loss": 2.5763454735279083, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.0420762149335565e-06, "perf/tokens_per_sec": 27203.050919812144, "train/loss_math": 2.251370668411255, "train/loss_prose": 3.493910233179728, "train/loss_code": 1.8754306236902873} +{"step": 4286, "train/loss": 2.3388112485408783, "train/lm_loss": 2.3388112485408783, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.0337375175919307e-06, "perf/tokens_per_sec": 27146.998138567677, "train/loss_code": 1.341255784034729, "train/loss_math": 2.5126288731892905, "train/loss_prose": 3.5744175910949707} +{"step": 4287, "train/loss": 2.0402302742004395, "train/lm_loss": 2.0402302742004395, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.0254095263397923e-06, "perf/tokens_per_sec": 27138.807343014705, "train/loss_code": 1.408478558063507, "train/loss_prose": 3.3368701934814453, "train/loss_math": 2.007093667984009} +{"step": 4288, "train/loss": 2.49820813536644, "train/lm_loss": 2.49820813536644, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.0170922452360973e-06, "perf/tokens_per_sec": 27124.281917849745, "train/loss_math": 2.189441680908203, "train/loss_prose": 3.437723159790039, "train/loss_code": 1.5520850718021393} +{"step": 4289, "train/loss": 1.8276104032993317, "train/lm_loss": 1.8276104032993317, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.0087856783345914e-06, "perf/tokens_per_sec": 27088.22848889737, "train/loss_code": 1.3718061298131943, "train/loss_math": 2.2834147810935974} +{"step": 4290, "train/loss": 2.601003050804138, "train/lm_loss": 2.601003050804138, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.000489829683789e-06, "perf/tokens_per_sec": 27175.339510840164, "train/loss_math": 2.2995784282684326, "train/loss_code": 1.8652867078781128, "train/loss_prose": 3.5376691818237305} +{"step": 4291, "train/loss": 2.4745304584503174, "train/lm_loss": 2.4745304584503174, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.992204703326995e-06, "perf/tokens_per_sec": 26955.96037851603, "train/loss_prose": 3.1400288343429565, "train/loss_code": 1.66905015707016, "train/loss_math": 1.9490136504173279} +{"step": 4292, "train/loss": 2.5269708335399628, "train/lm_loss": 2.5269708335399628, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.9839303033022763e-06, "perf/tokens_per_sec": 27132.03544563681, "train/loss_math": 1.8902708768844605, "train/loss_prose": 3.58813746770223} +{"step": 4293, "train/loss": 2.1466917991638184, "train/lm_loss": 2.1466917991638184, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.975666633642471e-06, "perf/tokens_per_sec": 27039.41234316077, "train/loss_prose": 3.8116559982299805, "train/loss_code": 1.2766949534416199, "train/loss_math": 2.161697745323181} +{"step": 4294, "train/loss": 2.211834192276001, "train/lm_loss": 2.211834192276001, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.967413698375196e-06, "perf/tokens_per_sec": 27131.521262399183, "train/loss_code": 1.5281298557917278, "train/loss_math": 1.9234176476796467, "train/loss_prose": 3.6700154542922974} +{"step": 4295, "train/loss": 2.0643694698810577, "train/lm_loss": 2.0643694698810577, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.9591715015228284e-06, "perf/tokens_per_sec": 27183.767439358217, "train/loss_code": 1.639600783586502, "train/loss_prose": 3.8842341899871826, "train/loss_math": 2.024105985959371} +{"step": 4296, "train/loss": 2.7000206112861633, "train/lm_loss": 2.7000206112861633, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.950940047102518e-06, "perf/tokens_per_sec": 27189.876653878782, "train/loss_prose": 3.1411030769348143, "train/loss_code": 1.7836260795593262, "train/loss_math": 2.327397108078003} +{"step": 4297, "train/loss": 2.826467275619507, "train/lm_loss": 2.826467275619507, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.942719339126171e-06, "perf/tokens_per_sec": 27060.665155584258, "train/loss_math": 2.161938190460205, "train/loss_prose": 3.480793762207031, "train/loss_code": 1.5229154825210571} +{"step": 4298, "train/loss": 2.2903356552124023, "train/lm_loss": 2.2903356552124023, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.9345093816004643e-06, "perf/tokens_per_sec": 27106.777825287754, "train/loss_math": 2.1241254806518555, "train/loss_code": 1.35036301612854, "train/loss_prose": 3.0831944147745767} +{"step": 4299, "train/loss": 2.314038187265396, "train/lm_loss": 2.314038187265396, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.9263101785268254e-06, "perf/tokens_per_sec": 27167.690102535882, "train/loss_math": 1.966641366481781, "train/loss_prose": 3.4271701176961265, "train/loss_code": 1.4325041770935059} +{"step": 4300, "train/loss": 2.054467350244522, "train/lm_loss": 2.054467350244522, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.918121733901458e-06, "perf/tokens_per_sec": 26843.503657025536, "train/loss_math": 2.3638416131337485, "train/loss_code": 1.468349814414978, "train/loss_prose": 3.470815658569336} +{"step": 4301, "train/loss": 2.2580942809581757, "train/lm_loss": 2.2580942809581757, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.909944051715299e-06, "perf/tokens_per_sec": 26873.526377865306, "train/loss_prose": 3.5660593509674072, "train/loss_code": 1.3502190113067627, "train/loss_math": 1.965700387954712} +{"step": 4302, "train/loss": 2.15596604347229, "train/lm_loss": 2.15596604347229, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.901777135954062e-06, "perf/tokens_per_sec": 27181.918007445813, "train/loss_code": 0.7945649524529775, "train/loss_math": 2.318090796470642, "train/loss_prose": 3.4092835585276284} +{"step": 4303, "train/loss": 2.573962092399597, "train/lm_loss": 2.573962092399597, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.893620990598192e-06, "perf/tokens_per_sec": 27140.050653388265, "train/loss_prose": 3.4193063378334045, "train/loss_math": 2.3099356293678284, "train/loss_code": 1.1473002433776855} +{"step": 4304, "train/loss": 2.5204318463802338, "train/lm_loss": 2.5204318463802338, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.8854756196229016e-06, "perf/tokens_per_sec": 27266.73382989428, "train/loss_code": 1.3490646084149678, "train/loss_prose": 3.602250635623932, "train/loss_math": 1.7072577476501465} +{"step": 4305, "train/loss": 2.492219924926758, "train/lm_loss": 2.492219924926758, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.8773410269981457e-06, "perf/tokens_per_sec": 27273.486546512126, "train/loss_code": 1.4099906086921692, "train/loss_prose": 3.3974427382151284, "train/loss_math": 2.3084832032521567} +{"step": 4306, "train/loss": 2.302546977996826, "train/lm_loss": 2.302546977996826, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.8692172166886215e-06, "perf/tokens_per_sec": 27311.16393499957, "train/loss_math": 2.007142186164856, "train/loss_prose": 3.4892866611480713, "train/loss_code": 1.8568796515464783} +{"step": 4307, "train/loss": 1.872983992099762, "train/lm_loss": 1.872983992099762, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.8611041926537794e-06, "perf/tokens_per_sec": 27312.813883536513, "train/loss_code": 1.7371513366699218, "train/loss_math": 2.0993719895680747} +{"step": 4308, "train/loss": 2.7188687920570374, "train/lm_loss": 2.7188687920570374, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.853001958847809e-06, "perf/tokens_per_sec": 27221.024652802535, "train/loss_prose": 3.4747272968292235, "train/loss_code": 1.4591044386227925} +{"step": 4309, "train/loss": 2.644336014986038, "train/lm_loss": 2.644336014986038, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.8449105192196316e-06, "perf/tokens_per_sec": 27234.876735087284, "train/loss_math": 2.2824679613113403, "train/loss_prose": 3.5802669525146484, "train/loss_code": 1.1343419551849365} +{"step": 4310, "train/loss": 2.1979541778564453, "train/lm_loss": 2.1979541778564453, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.8368298777129294e-06, "perf/tokens_per_sec": 27178.434877253152, "train/loss_code": 1.1931099096934001, "train/loss_math": 2.341279665629069, "train/loss_prose": 3.490231990814209} +{"step": 4311, "train/loss": 2.5840147137641907, "train/lm_loss": 2.5840147137641907, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.828760038266104e-06, "perf/tokens_per_sec": 27249.08867758436, "train/loss_math": 2.18835312128067, "train/loss_prose": 3.4201548099517822, "train/loss_code": 1.3073961734771729} +{"step": 4312, "train/loss": 2.6162232756614685, "train/lm_loss": 2.6162232756614685, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.8207010048122955e-06, "perf/tokens_per_sec": 27180.84287201491, "train/loss_math": 2.0675038496653237, "train/loss_prose": 3.332250237464905, "train/loss_code": 1.3982733488082886} +{"step": 4313, "train/loss": 2.651623785495758, "train/lm_loss": 2.651623785495758, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.812652781279382e-06, "perf/tokens_per_sec": 27253.627526690674, "train/loss_math": 2.2454642256100974, "train/loss_prose": 3.8701021671295166} +{"step": 4314, "train/loss": 1.8580102324485779, "train/lm_loss": 1.8580102324485779, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.8046153715899692e-06, "perf/tokens_per_sec": 27307.951449335258, "train/loss_math": 2.0929621160030365, "train/loss_prose": 3.5759968757629395, "train/loss_code": 0.972078541914622} +{"step": 4315, "train/loss": 2.128572016954422, "train/lm_loss": 2.128572016954422, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.7965887796613884e-06, "perf/tokens_per_sec": 27147.555806982255, "train/loss_code": 1.491136113802592, "train/loss_math": 2.208409994840622, "train/loss_prose": 3.7215287685394287} +{"step": 4316, "train/loss": 2.3058567941188812, "train/lm_loss": 2.3058567941188812, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.7885730094057094e-06, "perf/tokens_per_sec": 27189.833621640613, "train/loss_math": 2.0581148862838745, "train/loss_prose": 3.611955483754476, "train/loss_code": 1.1649192571640015} +{"step": 4317, "train/loss": 2.4204520881175995, "train/lm_loss": 2.4204520881175995, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.7805680647297162e-06, "perf/tokens_per_sec": 27187.682183386903, "train/loss_math": 2.2807263851165773, "train/loss_prose": 3.495954155921936, "train/loss_code": 0.9680759906768799} +{"step": 4318, "train/loss": 2.6164349019527435, "train/lm_loss": 2.6164349019527435, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.772573949534918e-06, "perf/tokens_per_sec": 27319.415671731982, "train/loss_prose": 3.4534799456596375, "train/loss_math": 2.0751516819000244, "train/loss_code": 1.4836280345916748} +{"step": 4319, "train/loss": 2.561515748500824, "train/lm_loss": 2.561515748500824, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.764590667717562e-06, "perf/tokens_per_sec": 27186.391483853407, "train/loss_math": 2.361924409866333, "train/loss_code": 1.4345779120922089, "train/loss_prose": 3.2247801423072815} +{"step": 4320, "train/loss": 2.0879552960395813, "train/lm_loss": 2.0879552960395813, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.7566182231685944e-06, "perf/tokens_per_sec": 27266.73382989428, "train/loss_math": 2.012017917633057, "train/loss_prose": 3.6660265922546387, "train/loss_code": 1.4887630939483643} +{"step": 4321, "train/loss": 2.6808656752109528, "train/lm_loss": 2.6808656752109528, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.7486566197736874e-06, "perf/tokens_per_sec": 27069.74717482967, "train/loss_math": 2.3134390115737915, "train/loss_code": 1.7619459629058838, "train/loss_prose": 3.3240387439727783} +{"step": 4322, "train/loss": 2.4013015031814575, "train/lm_loss": 2.4013015031814575, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.7407058614132397e-06, "perf/tokens_per_sec": 27314.768027804676, "train/loss_math": 2.1020535230636597, "train/loss_prose": 3.5751466751098633, "train/loss_code": 1.8259523510932922} +{"step": 4323, "train/loss": 1.5621519833803177, "train/lm_loss": 1.5621519833803177, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.732765951962335e-06, "perf/tokens_per_sec": 26860.081337573443, "train/loss_code": 1.1543050527572631, "train/loss_math": 1.7727165222167969, "train/loss_prose": 3.1802573204040527} +{"step": 4324, "train/loss": 2.2126198410987854, "train/lm_loss": 2.2126198410987854, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.7248368952908053e-06, "perf/tokens_per_sec": 27034.646759279658, "train/loss_prose": 3.357093930244446, "train/loss_code": 1.5322620471318562, "train/loss_math": 2.1299949089686074} +{"step": 4325, "train/loss": 2.3911327123641968, "train/lm_loss": 2.3911327123641968, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.716918695263171e-06, "perf/tokens_per_sec": 27054.44344113724, "train/loss_prose": 3.376892328262329, "train/loss_code": 1.6054856777191162, "train/loss_math": 1.9291378259658813} +{"step": 4326, "train/loss": 2.5739850401878357, "train/lm_loss": 2.5739850401878357, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.7090113557386665e-06, "perf/tokens_per_sec": 27221.88729926082, "train/loss_math": 2.156224083900452, "train/loss_prose": 3.2702532609303794} +{"step": 4327, "train/loss": 2.0282825231552124, "train/lm_loss": 2.0282825231552124, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.7011148805712314e-06, "perf/tokens_per_sec": 27157.511897704873, "train/loss_code": 1.5142775177955627, "train/loss_math": 1.7523325681686401, "train/loss_prose": 3.332242488861084} +{"step": 4328, "train/loss": 2.92197385430336, "train/lm_loss": 2.92197385430336, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.6932292736095127e-06, "perf/tokens_per_sec": 27183.55237620551, "train/loss_math": 1.7547345161437988, "train/loss_prose": 3.4077879985173545, "train/loss_code": 1.1743274927139282} +{"step": 4329, "train/loss": 2.5241543650627136, "train/lm_loss": 2.5241543650627136, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.6853545386968606e-06, "perf/tokens_per_sec": 27067.998518969012, "train/loss_code": 1.6032767295837402, "train/loss_math": 1.9533703327178955, "train/loss_prose": 3.269985258579254} +{"step": 4330, "train/loss": 3.0896880626678467, "train/lm_loss": 3.0896880626678467, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.6774906796713295e-06, "perf/tokens_per_sec": 27150.087051617164, "train/loss_prose": 3.4812951485315957, "train/loss_math": 2.0757040977478027, "train/loss_code": 1.7540283203125} +{"step": 4331, "train/loss": 2.505287766456604, "train/lm_loss": 2.505287766456604, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.669637700365665e-06, "perf/tokens_per_sec": 26990.474998271842, "train/loss_code": 1.513439377148946, "train/loss_prose": 3.3233631253242493, "train/loss_math": 2.2085323333740234} +{"step": 4332, "train/loss": 2.116287648677826, "train/lm_loss": 2.116287648677826, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.6617956046073063e-06, "perf/tokens_per_sec": 26988.99095909047, "train/loss_prose": 3.7256691455841064, "train/loss_math": 2.0783679723739623, "train/loss_code": 1.4063957333564758} +{"step": 4333, "train/loss": 1.9814287722110748, "train/lm_loss": 1.9814287722110748, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.6539643962184057e-06, "perf/tokens_per_sec": 27153.99210030932, "train/loss_math": 2.078330087661743, "train/loss_code": 1.1771148145198822, "train/loss_prose": 3.105550527572632} +{"step": 4334, "train/loss": 2.0999720990657806, "train/lm_loss": 2.0999720990657806, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.646144079015797e-06, "perf/tokens_per_sec": 27074.653147097804, "train/loss_code": 1.515012303988139, "train/loss_math": 2.4509477615356445} +{"step": 4335, "train/loss": 2.9163868129253387, "train/lm_loss": 2.9163868129253387, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.6383346568110062e-06, "perf/tokens_per_sec": 27121.241688728496, "train/loss_prose": 3.447393703460693, "train/loss_code": 1.8664880394935608, "train/loss_math": 2.361149549484253} +{"step": 4336, "train/loss": 1.4411078691482544, "train/lm_loss": 1.4411078691482544, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.6305361334102496e-06, "perf/tokens_per_sec": 27143.952815298057, "train/loss_code": 1.260728120803833, "train/loss_math": 1.982246994972229} +{"step": 4337, "train/loss": 2.1381588876247406, "train/lm_loss": 2.1381588876247406, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.622748512614437e-06, "perf/tokens_per_sec": 27296.67032743543, "train/loss_code": 1.7243655920028687, "train/loss_math": 2.121977061033249, "train/loss_prose": 3.4442665576934814} +{"step": 4338, "train/loss": 2.8023948967456818, "train/lm_loss": 2.8023948967456818, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.6149717982191485e-06, "perf/tokens_per_sec": 27229.783054006675, "train/loss_prose": 3.847816228866577, "train/loss_code": 1.1323580741882324, "train/loss_math": 1.965178648630778} +{"step": 4339, "train/loss": 2.374780058860779, "train/lm_loss": 2.374780058860779, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.6072059940146775e-06, "perf/tokens_per_sec": 27012.37293081761, "train/loss_prose": 3.2636114756266275, "train/loss_math": 2.087119907140732, "train/loss_code": 0.8589259386062622} +{"step": 4340, "train/loss": 1.853681355714798, "train/lm_loss": 1.853681355714798, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.599451103785977e-06, "perf/tokens_per_sec": 27141.679991152818, "train/loss_code": 1.1027914136648178, "train/loss_math": 2.344710350036621, "train/loss_prose": 2.8644319772720337} +{"step": 4341, "train/loss": 2.1748437881469727, "train/lm_loss": 2.1748437881469727, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.5917071313126816e-06, "perf/tokens_per_sec": 26894.561220815736, "train/loss_prose": 3.394815444946289, "train/loss_code": 1.5044303735097249, "train/loss_math": 2.3726608753204346} +{"step": 4342, "train/loss": 1.8455619364976883, "train/lm_loss": 1.8455619364976883, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.5839740803691032e-06, "perf/tokens_per_sec": 27239.065340958638, "train/loss_code": 1.4823128581047058, "train/loss_prose": 3.1741814613342285, "train/loss_math": 2.089374840259552} +{"step": 4343, "train/loss": 1.946837157011032, "train/lm_loss": 1.946837157011032, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.5762519547242515e-06, "perf/tokens_per_sec": 27169.107926070366, "train/loss_math": 2.178524374961853, "train/loss_prose": 3.2246391773223877, "train/loss_code": 1.2119865814844768} +{"step": 4344, "train/loss": 2.3348633348941803, "train/lm_loss": 2.3348633348941803, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.5685407581417907e-06, "perf/tokens_per_sec": 27111.611660291128, "train/loss_prose": 3.7484511137008667, "train/loss_math": 1.8790020147959392, "train/loss_code": 1.8483330011367798} +{"step": 4345, "train/loss": 2.12953382730484, "train/lm_loss": 2.12953382730484, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.5608404943800622e-06, "perf/tokens_per_sec": 26685.310050559572, "train/loss_code": 1.7147947549819946, "train/loss_math": 2.2183405558268228, "train/loss_prose": 3.5220701694488525} +{"step": 4346, "train/loss": 2.3337000608444214, "train/lm_loss": 2.3337000608444214, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.553151167192086e-06, "perf/tokens_per_sec": 27108.01820258901, "train/loss_prose": 3.35517555475235, "train/loss_code": 1.3122246116399765} +{"step": 4347, "train/loss": 2.0967521369457245, "train/lm_loss": 2.0967521369457245, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.5454727803255363e-06, "perf/tokens_per_sec": 27183.767439358217, "train/loss_code": 1.6719239056110382, "train/loss_math": 2.166773716608683, "train/loss_prose": 3.586000919342041} +{"step": 4348, "train/loss": 2.2619371116161346, "train/lm_loss": 2.2619371116161346, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.5378053375227835e-06, "perf/tokens_per_sec": 27159.52975398148, "train/loss_code": 1.1178830067316692, "train/loss_math": 2.013266444206238, "train/loss_prose": 3.5717716217041016} +{"step": 4349, "train/loss": 2.0790484845638275, "train/lm_loss": 2.0790484845638275, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.5301488425208296e-06, "perf/tokens_per_sec": 27074.61047880505, "train/loss_code": 1.6303709983825683, "train/loss_prose": 3.0855727195739746, "train/loss_math": 2.30938720703125} +{"step": 4350, "train/loss": 2.107919603586197, "train/lm_loss": 2.107919603586197, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.5225032990513626e-06, "perf/tokens_per_sec": 27084.940128898812, "train/loss_code": 1.3794491291046143, "train/loss_math": 2.0395863950252533, "train/loss_prose": 2.973056674003601} +{"step": 4351, "train/loss": 2.6953134536743164, "train/lm_loss": 2.6953134536743164, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.514868710840723e-06, "perf/tokens_per_sec": 27161.67673880323, "train/loss_math": 2.3254045248031616, "train/loss_prose": 3.336965243021647, "train/loss_code": 2.3002676566441855} +{"step": 4352, "train/loss": 2.8644744753837585, "train/lm_loss": 2.8644744753837585, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.5072450816099267e-06, "perf/tokens_per_sec": 26920.90112683163, "train/loss_prose": 3.440006875991821, "train/loss_math": 2.2078869342803955, "train/loss_code": 1.2999869585037231} +{"step": 4353, "train/loss": 2.4942392110824585, "train/lm_loss": 2.4942392110824585, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.4996324150746347e-06, "perf/tokens_per_sec": 27047.0322567441, "train/loss_code": 1.1986947357654572, "train/loss_math": 2.1633341312408447, "train/loss_prose": 4.331933895746867} +{"step": 4354, "train/loss": 2.3863005936145782, "train/lm_loss": 2.3863005936145782, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.492030714945162e-06, "perf/tokens_per_sec": 27113.62270112448, "train/loss_code": 1.3854954242706299, "train/loss_math": 2.3689775466918945, "train/loss_prose": 3.398654301961263} +{"step": 4355, "train/loss": 2.3019249737262726, "train/lm_loss": 2.3019249737262726, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.4844399849264928e-06, "perf/tokens_per_sec": 27158.71398874122, "train/loss_prose": 3.378398895263672, "train/loss_math": 1.8505859375, "train/loss_code": 1.6074044406414032} +{"step": 4356, "train/loss": 2.2123640179634094, "train/lm_loss": 2.2123640179634094, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.47686022871825e-06, "perf/tokens_per_sec": 27219.385775148417, "train/loss_prose": 3.050297975540161, "train/loss_math": 2.083488345146179, "train/loss_code": 1.782617211341858} +{"step": 4357, "train/loss": 2.570004642009735, "train/lm_loss": 2.570004642009735, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.4692914500147184e-06, "perf/tokens_per_sec": 27239.36766132868, "train/loss_prose": 3.650073528289795, "train/loss_math": 1.891597330570221, "train/loss_code": 1.088273674249649} +{"step": 4358, "train/loss": 1.7910324037075043, "train/lm_loss": 1.7910324037075043, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.46173365250483e-06, "perf/tokens_per_sec": 27181.530948892636, "train/loss_code": 1.4275907576084137, "train/loss_math": 1.9303808212280273, "train/loss_prose": 3.8323333263397217} +{"step": 4359, "train/loss": 2.044170707464218, "train/lm_loss": 2.044170707464218, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.454186839872158e-06, "perf/tokens_per_sec": 27101.860205079665, "train/loss_code": 1.2384569346904755, "train/loss_prose": 3.0710810820261636, "train/loss_math": 2.186293601989746} +{"step": 4360, "train/loss": 2.0624889135360718, "train/lm_loss": 2.0624889135360718, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.446651015794932e-06, "perf/tokens_per_sec": 26745.006964963464, "train/loss_math": 2.1303024888038635, "train/loss_prose": 3.210108757019043, "train/loss_code": 1.5895309448242188} +{"step": 4361, "train/loss": 2.931399643421173, "train/lm_loss": 2.931399643421173, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.4391261839460167e-06, "perf/tokens_per_sec": 26077.95391847769, "train/loss_prose": 3.2597922484079995, "train/loss_math": 1.990058183670044, "train/loss_code": 1.9023858308792114} +{"step": 4362, "train/loss": 2.0473347306251526, "train/lm_loss": 2.0473347306251526, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.4316123479929286e-06, "perf/tokens_per_sec": 25802.1467669724, "train/loss_code": 1.0695286989212036, "train/loss_math": 2.2484036684036255, "train/loss_prose": 3.212440252304077} +{"step": 4363, "train/loss": 2.9030675292015076, "train/lm_loss": 2.9030675292015076, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.424109511597822e-06, "perf/tokens_per_sec": 26332.691387090446, "train/loss_prose": 3.3612921635309854, "train/loss_code": 1.5283931493759155} +{"step": 4364, "train/loss": 2.2640420496463776, "train/lm_loss": 2.2640420496463776, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.4166176784174795e-06, "perf/tokens_per_sec": 25398.56564130388, "train/loss_math": 2.2351603507995605, "train/loss_code": 1.2944558660189311, "train/loss_prose": 3.252882719039917} +{"step": 4365, "train/loss": 2.084394246339798, "train/lm_loss": 2.084394246339798, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.4091368521033387e-06, "perf/tokens_per_sec": 25829.884086384933, "train/loss_math": 1.9692742228507996, "train/loss_code": 1.7966870466868083, "train/loss_prose": 3.4079957008361816} +{"step": 4366, "train/loss": 2.792977035045624, "train/lm_loss": 2.792977035045624, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.4016670363014583e-06, "perf/tokens_per_sec": 26236.618816288817, "train/loss_math": 2.2275346517562866, "train/loss_prose": 3.4514896869659424, "train/loss_code": 0.6312987804412842} +{"step": 4367, "train/loss": 2.5946373343467712, "train/lm_loss": 2.5946373343467712, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.394208234652534e-06, "perf/tokens_per_sec": 26640.12328361403, "train/loss_math": 2.0801082253456116, "train/loss_prose": 3.1091663241386414} +{"step": 4368, "train/loss": 2.169036477804184, "train/lm_loss": 2.169036477804184, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.3867604507919e-06, "perf/tokens_per_sec": 26395.60395230318, "train/loss_code": 2.1140666007995605, "train/loss_math": 2.0021712481975555, "train/loss_prose": 3.0014071464538574} +{"step": 4369, "train/loss": 2.3663153052330017, "train/lm_loss": 2.3663153052330017, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.379323688349516e-06, "perf/tokens_per_sec": 26545.612576350122, "train/loss_code": 1.7103439966837566, "train/loss_math": 2.0984559853871665, "train/loss_prose": 3.752060890197754} +{"step": 4370, "train/loss": 2.0980345606803894, "train/lm_loss": 2.0980345606803894, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.371897950949964e-06, "perf/tokens_per_sec": 26330.673451183662, "train/loss_code": 1.5585618317127228, "train/loss_prose": 3.2862049341201782, "train/loss_math": 1.9888089299201965} +{"step": 4371, "train/loss": 1.8360032439231873, "train/lm_loss": 1.8360032439231873, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.364483242212456e-06, "perf/tokens_per_sec": 26074.510505770435, "train/loss_prose": 3.6317155361175537, "train/loss_code": 1.1537213325500488, "train/loss_math": 2.147141695022583} +{"step": 4372, "train/loss": 2.819374591112137, "train/lm_loss": 2.819374591112137, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.3570795657508445e-06, "perf/tokens_per_sec": 26848.41187465228, "train/loss_math": 2.411818265914917, "train/loss_prose": 3.4984837770462036, "train/loss_code": 1.3256068229675293} +{"step": 4373, "train/loss": 2.390224039554596, "train/lm_loss": 2.390224039554596, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.3496869251735804e-06, "perf/tokens_per_sec": 26021.431013904457, "train/loss_prose": 3.5374048550923667, "train/loss_code": 1.5716140270233154, "train/loss_math": 1.8973674178123474} +{"step": 4374, "train/loss": 2.4173943996429443, "train/lm_loss": 2.4173943996429443, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.3423053240837515e-06, "perf/tokens_per_sec": 26098.197393806266, "train/loss_code": 1.1238738000392914, "train/loss_prose": 3.4852208296457925, "train/loss_math": 2.2119153340657554} +{"step": 4375, "train/loss": 2.4100777208805084, "train/lm_loss": 2.4100777208805084, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.3349347660790582e-06, "perf/tokens_per_sec": 26676.48411820971, "train/loss_math": 1.9037795960903168, "train/loss_prose": 3.367470105489095, "train/loss_code": 1.5630937814712524} +{"step": 4376, "train/loss": 2.3067988753318787, "train/lm_loss": 2.3067988753318787, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.327575254751821e-06, "perf/tokens_per_sec": 26428.616323717633, "train/loss_code": 1.3530697425206502, "train/loss_math": 2.31553316116333, "train/loss_prose": 3.2547053496042886} +{"step": 4377, "train/loss": 2.278332620859146, "train/lm_loss": 2.278332620859146, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.3202267936889786e-06, "perf/tokens_per_sec": 26690.367910414137, "train/loss_code": 1.3186967968940735, "train/loss_math": 2.0215601523717246, "train/loss_prose": 3.1748623847961426} +{"step": 4378, "train/loss": 2.5352845191955566, "train/lm_loss": 2.5352845191955566, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.312889386472078e-06, "perf/tokens_per_sec": 26828.62217891298, "train/loss_math": 2.1972740093866983, "train/loss_code": 1.6933372616767883, "train/loss_prose": 3.434593439102173} +{"step": 4379, "train/loss": 2.804297089576721, "train/lm_loss": 2.804297089576721, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.3055630366772856e-06, "perf/tokens_per_sec": 27164.76899334632, "train/loss_math": 2.0519847869873047, "train/loss_prose": 3.347825860977173, "train/loss_code": 1.5912789106369019} +{"step": 4380, "train/loss": 2.0572830736637115, "train/lm_loss": 2.0572830736637115, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.2982477478753704e-06, "perf/tokens_per_sec": 25596.782325868295, "train/loss_code": 1.6014535129070282, "train/loss_math": 2.056924343109131, "train/loss_prose": 3.8816778659820557} +{"step": 4381, "train/loss": 2.4240390062332153, "train/lm_loss": 2.4240390062332153, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.2909435236317222e-06, "perf/tokens_per_sec": 22491.95060112094, "train/loss_prose": 3.5812620321909585, "train/loss_code": 1.5564777851104736, "train/loss_math": 1.9895461201667786} +{"step": 4382, "train/loss": 2.429170995950699, "train/lm_loss": 2.429170995950699, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.2836503675063265e-06, "perf/tokens_per_sec": 25735.896732055517, "train/loss_prose": 3.3741085529327393, "train/loss_code": 1.8565374215443928, "train/loss_math": 1.870715320110321} +{"step": 4383, "train/loss": 2.590795934200287, "train/lm_loss": 2.590795934200287, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.2763682830537813e-06, "perf/tokens_per_sec": 26036.179932893432, "train/loss_prose": 3.6624393463134766, "train/loss_math": 2.2073135375976562, "train/loss_code": 1.5585544109344482} +{"step": 4384, "train/loss": 2.8488028943538666, "train/lm_loss": 2.8488028943538666, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.269097273823287e-06, "perf/tokens_per_sec": 26440.656102636083, "train/loss_code": 1.352789580821991, "train/loss_math": 1.9755563735961914, "train/loss_prose": 3.621857738494873} +{"step": 4385, "train/loss": 2.412416249513626, "train/lm_loss": 2.412416249513626, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.2618373433586415e-06, "perf/tokens_per_sec": 25602.809451353547, "train/loss_prose": 3.8603200912475586, "train/loss_code": 1.3652316729227703, "train/loss_math": 1.8113372921943665} +{"step": 4386, "train/loss": 1.803995132446289, "train/lm_loss": 1.803995132446289, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.2545884951982483e-06, "perf/tokens_per_sec": 26729.111591001027, "train/loss_code": 1.3155645330746968, "train/loss_prose": 4.36439847946167, "train/loss_math": 2.174175977706909} +{"step": 4387, "train/loss": 2.1004823446273804, "train/lm_loss": 2.1004823446273804, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.2473507328751086e-06, "perf/tokens_per_sec": 26263.65236321165, "train/loss_code": 1.1571421225865681, "train/loss_prose": 3.119250774383545, "train/loss_math": 1.987340271472931} +{"step": 4388, "train/loss": 2.8057572841644287, "train/lm_loss": 2.8057572841644287, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.2401240599168205e-06, "perf/tokens_per_sec": 26442.40603716433, "train/loss_prose": 3.8247124552726746, "train/loss_code": 1.666850248972575, "train/loss_math": 2.1466574668884277} +{"step": 4389, "train/loss": 1.8548413217067719, "train/lm_loss": 1.8548413217067719, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.2329084798455746e-06, "perf/tokens_per_sec": 26941.58772156802, "train/loss_code": 1.5067263007164002, "train/loss_prose": 3.323773145675659, "train/loss_math": 1.9906628727912903} +{"step": 4390, "train/loss": 2.146306037902832, "train/lm_loss": 2.146306037902832, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.225703996178155e-06, "perf/tokens_per_sec": 26931.95816892643, "train/loss_math": 2.1705153703689577, "train/loss_code": 1.4769284129142761, "train/loss_prose": 3.3640146255493164} +{"step": 4391, "train/loss": 2.3041293621063232, "train/lm_loss": 2.3041293621063232, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.218510612425945e-06, "perf/tokens_per_sec": 25823.827222549575, "train/loss_math": 2.0861177146434784, "train/loss_code": 1.7199445366859436, "train/loss_prose": 3.32433819770813} +{"step": 4392, "train/loss": 2.4506754875183105, "train/lm_loss": 2.4506754875183105, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.2113283320949085e-06, "perf/tokens_per_sec": 24068.72272844446, "train/loss_math": 2.170509099960327, "train/loss_prose": 3.1231133341789246, "train/loss_code": 1.6474804083506267} +{"step": 4393, "train/loss": 2.50536847114563, "train/lm_loss": 2.50536847114563, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.2041571586856103e-06, "perf/tokens_per_sec": 25568.28731949143, "train/loss_prose": 3.4103652834892273, "train/loss_code": 1.4591881036758423, "train/loss_math": 2.0239224433898926} +{"step": 4394, "train/loss": 3.0239774584770203, "train/lm_loss": 3.0239774584770203, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.1969970956931762e-06, "perf/tokens_per_sec": 25925.108136290448, "train/loss_prose": 3.631013107299805, "train/loss_code": 1.9165563583374023, "train/loss_math": 2.2036406993865967} +{"step": 4395, "train/loss": 2.0517782270908356, "train/lm_loss": 2.0517782270908356, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.189848146607348e-06, "perf/tokens_per_sec": 25773.347611296555, "train/loss_prose": 3.4361517429351807, "train/loss_code": 1.2259639352560043, "train/loss_math": 2.3190335035324097} +{"step": 4396, "train/loss": 2.129663497209549, "train/lm_loss": 2.129663497209549, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.1827103149124313e-06, "perf/tokens_per_sec": 26599.37168027869, "train/loss_prose": 3.08443284034729, "train/loss_code": 1.1287277042865753, "train/loss_math": 2.152746558189392} +{"step": 4397, "train/loss": 2.3477739095687866, "train/lm_loss": 2.3477739095687866, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.1755836040873196e-06, "perf/tokens_per_sec": 26755.253264228046, "train/loss_math": 2.2019617160161338, "train/loss_prose": 2.785210609436035} +{"step": 4398, "train/loss": 2.36403551697731, "train/lm_loss": 2.36403551697731, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.168468017605485e-06, "perf/tokens_per_sec": 26649.545238645547, "train/loss_math": 2.1746777057647706, "train/loss_prose": 3.407054305076599, "train/loss_code": 1.2247854471206665} +{"step": 4399, "train/loss": 2.7188521921634674, "train/lm_loss": 2.7188521921634674, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.1613635589349756e-06, "perf/tokens_per_sec": 26896.66651636983, "train/loss_math": 1.993062674999237, "train/loss_prose": 3.2037660598754885, "train/loss_code": 1.7458621263504028} +{"step": 4400, "train/loss": 2.2171638011932373, "train/lm_loss": 2.2171638011932373, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.1542702315384245e-06, "perf/tokens_per_sec": 26066.6771622003, "train/loss_code": 1.6123638451099396, "train/loss_math": 2.1529732942581177, "train/loss_prose": 3.490954041481018} +{"step": 4400, "eval/loss": 2.1575486044047523, "eval/lm_loss": 2.1575486044047523, "eval/lb_loss": 0.0, "eval/z_loss": 0.0, "eval/perplexity": 8.649907302434041, "eval/loss_code": 1.5544367706802882, "eval/ppl_code": 4.732420336505375, "eval/loss_prose": 3.485345598898436, "eval/ppl_prose": 32.63370333314584, "eval/loss_math": 2.0248919424322462, "eval/ppl_math": 7.575292332744537} +{"step": 4401, "train/loss": 2.779515564441681, "train/lm_loss": 2.779515564441681, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.1471880388730296e-06, "perf/tokens_per_sec": 25892.13114961636, "train/loss_code": 1.8322688937187195, "train/loss_prose": 3.2678783893585206, "train/loss_math": 2.2321937084198} +{"step": 4402, "train/loss": 2.205195814371109, "train/lm_loss": 2.205195814371109, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.1401169843905693e-06, "perf/tokens_per_sec": 26892.41415885429, "train/loss_math": 1.9599927902221679, "train/loss_prose": 3.0557286739349365, "train/loss_code": 1.7301450967788696} +{"step": 4403, "train/loss": 2.41982364654541, "train/lm_loss": 2.41982364654541, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.1330570715373753e-06, "perf/tokens_per_sec": 26675.03440608779, "train/loss_code": 1.8100071907043458, "train/loss_math": 2.4109110832214355, "train/loss_prose": 3.9488213062286377} +{"step": 4404, "train/loss": 1.8867913782596588, "train/lm_loss": 1.8867913782596588, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.1260083037543817e-06, "perf/tokens_per_sec": 26750.170785103248, "train/loss_code": 1.2649264136950176, "train/loss_math": 1.8466438055038452, "train/loss_prose": 2.879809856414795} +{"step": 4405, "train/loss": 2.640676736831665, "train/lm_loss": 2.640676736831665, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.118970684477062e-06, "perf/tokens_per_sec": 26848.49579141154, "train/loss_math": 2.4024693965911865, "train/loss_code": 1.619993269443512, "train/loss_prose": 3.5593395233154297} +{"step": 4406, "train/loss": 2.1400774717330933, "train/lm_loss": 2.1400774717330933, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.1119442171354696e-06, "perf/tokens_per_sec": 26372.588853121368, "train/loss_math": 2.0213334560394287, "train/loss_code": 1.2042686194181442, "train/loss_prose": 3.4274036089579263} +{"step": 4407, "train/loss": 2.443579375743866, "train/lm_loss": 2.443579375743866, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.1049289051542185e-06, "perf/tokens_per_sec": 26789.79856662571, "train/loss_math": 2.250707983970642, "train/loss_code": 1.2003255486488342, "train/loss_prose": 3.4652867317199707} +{"step": 4408, "train/loss": 2.0396502017974854, "train/lm_loss": 2.0396502017974854, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.0979247519524877e-06, "perf/tokens_per_sec": 25987.343831211067, "train/loss_prose": 3.8766353130340576, "train/loss_math": 2.122020810842514, "train/loss_code": 1.31749427318573} +{"step": 4409, "train/loss": 2.528792142868042, "train/lm_loss": 2.528792142868042, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.0909317609440095e-06, "perf/tokens_per_sec": 26056.31906038803, "train/loss_prose": 3.340496301651001, "train/loss_math": 2.041769766807556} +{"step": 4410, "train/loss": 2.2977139353752136, "train/lm_loss": 2.2977139353752136, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.083949935537097e-06, "perf/tokens_per_sec": 26770.220293477396, "train/loss_math": 2.00544810295105, "train/loss_prose": 3.7117110093434653, "train/loss_code": 1.0785605510075886} +{"step": 4411, "train/loss": 2.018092602491379, "train/lm_loss": 2.018092602491379, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.0769792791345945e-06, "perf/tokens_per_sec": 25810.791312604415, "train/loss_math": 2.1313987970352173, "train/loss_code": 1.6390987396240235, "train/loss_prose": 3.6864492893218994} +{"step": 4412, "train/loss": 2.6792027354240417, "train/lm_loss": 2.6792027354240417, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.070019795133918e-06, "perf/tokens_per_sec": 26213.800013732594, "train/loss_math": 2.323330670595169, "train/loss_prose": 3.557401180267334, "train/loss_code": 1.468095302581787} +{"step": 4413, "train/loss": 2.0633178055286407, "train/lm_loss": 2.0633178055286407, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.0630714869270347e-06, "perf/tokens_per_sec": 25768.940516492697, "train/loss_math": 2.02666038274765, "train/loss_code": 1.3803713619709015, "train/loss_prose": 3.4658679962158203} +{"step": 4414, "train/loss": 2.8334542512893677, "train/lm_loss": 2.8334542512893677, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.0561343579004715e-06, "perf/tokens_per_sec": 25665.614215093825, "train/loss_prose": 3.8581278324127197, "train/loss_code": 1.7491446733474731, "train/loss_math": 1.8684165477752686} +{"step": 4415, "train/loss": 2.0195019245147705, "train/lm_loss": 2.0195019245147705, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.0492084114352965e-06, "perf/tokens_per_sec": 26961.121365203566, "train/loss_prose": 2.877767324447632, "train/loss_math": 1.8670177459716797, "train/loss_code": 1.4662046432495117} +{"step": 4416, "train/loss": 2.893097758293152, "train/lm_loss": 2.893097758293152, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.0422936509071346e-06, "perf/tokens_per_sec": 27092.03031555044, "train/loss_code": 1.2564959675073624, "train/loss_prose": 3.4386316935221353} +{"step": 4417, "train/loss": 2.3270629048347473, "train/lm_loss": 2.3270629048347473, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.03539007968615e-06, "perf/tokens_per_sec": 27058.363640086372, "train/loss_code": 1.3334709405899048, "train/loss_prose": 3.473193645477295, "train/loss_math": 1.8433269262313843} +{"step": 4418, "train/loss": 2.3345234990119934, "train/lm_loss": 2.3345234990119934, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.028497701137061e-06, "perf/tokens_per_sec": 26070.474559089984, "train/loss_prose": 3.1738494634628296, "train/loss_math": 2.1259378492832184, "train/loss_code": 1.912368655204773} +{"step": 4419, "train/loss": 2.757276952266693, "train/lm_loss": 2.757276952266693, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.0216165186191407e-06, "perf/tokens_per_sec": 27140.09352819163, "train/loss_math": 2.250803518295288, "train/loss_prose": 4.663044214248657, "train/loss_code": 1.4781100749969482} +{"step": 4420, "train/loss": 2.4852986335754395, "train/lm_loss": 2.4852986335754395, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.014746535486181e-06, "perf/tokens_per_sec": 27064.2887047481, "train/loss_math": 2.246581276257833, "train/loss_code": 1.6777710318565369, "train/loss_prose": 3.262367566426595} +{"step": 4421, "train/loss": 2.1765935122966766, "train/lm_loss": 2.1765935122966766, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.007887755086532e-06, "perf/tokens_per_sec": 27242.607229335976, "train/loss_prose": 3.4229110876719155, "train/loss_code": 1.3499037474393845, "train/loss_math": 1.7443993091583252} +{"step": 4422, "train/loss": 2.8662473559379578, "train/lm_loss": 2.8662473559379578, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.001040180763078e-06, "perf/tokens_per_sec": 25645.270343213357, "train/loss_prose": 3.3605911254882814, "train/loss_code": 1.63959538936615, "train/loss_math": 2.2437137365341187} +{"step": 4423, "train/loss": 2.4534979164600372, "train/lm_loss": 2.4534979164600372, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.9942038158532407e-06, "perf/tokens_per_sec": 26715.0628446316, "train/loss_math": 2.2697285413742065, "train/loss_prose": 3.1873323917388916, "train/loss_code": 1.1695983409881592} +{"step": 4424, "train/loss": 2.0318607091903687, "train/lm_loss": 2.0318607091903687, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.9873786636889906e-06, "perf/tokens_per_sec": 26107.517569520533, "train/loss_code": 1.5613098740577698, "train/loss_math": 2.0314162373542786, "train/loss_prose": 2.9734069108963013} +{"step": 4425, "train/loss": 2.34727543592453, "train/lm_loss": 2.34727543592453, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.9805647275968205e-06, "perf/tokens_per_sec": 26606.9515975908, "train/loss_math": 2.3108969211578367, "train/loss_code": 0.9295465350151062, "train/loss_prose": 3.147086024284363} +{"step": 4426, "train/loss": 2.199659585952759, "train/lm_loss": 2.199659585952759, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.973762010897759e-06, "perf/tokens_per_sec": 26926.555120708057, "train/loss_code": 1.9891101519266765, "train/loss_prose": 3.2255070209503174, "train/loss_math": 2.1011097729206085} +{"step": 4427, "train/loss": 2.1017472743988037, "train/lm_loss": 2.1017472743988037, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.966970516907368e-06, "perf/tokens_per_sec": 26417.318674018876, "train/loss_prose": 3.4383466243743896, "train/loss_code": 1.2523099780082703, "train/loss_math": 2.060118476549784} +{"step": 4428, "train/loss": 2.613328695297241, "train/lm_loss": 2.613328695297241, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.9601902489357364e-06, "perf/tokens_per_sec": 26152.426857133287, "train/loss_code": 1.6579606533050537, "train/loss_prose": 3.1610019207000732, "train/loss_math": 1.7856982946395874} +{"step": 4429, "train/loss": 2.413503274321556, "train/lm_loss": 2.413503274321556, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.95342121028749e-06, "perf/tokens_per_sec": 25787.893983629518, "train/loss_math": 2.2607710361480713, "train/loss_code": 0.9756680130958557, "train/loss_prose": 3.530062735080719} +{"step": 4430, "train/loss": 2.121448189020157, "train/lm_loss": 2.121448189020157, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.9466634042617698e-06, "perf/tokens_per_sec": 24714.542250968163, "train/loss_math": 2.051130751768748, "train/loss_code": 2.33240008354187} +{"step": 4431, "train/loss": 1.901684820652008, "train/lm_loss": 1.901684820652008, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.9399168341522526e-06, "perf/tokens_per_sec": 24852.042691447536, "train/loss_code": 1.4303871989250183, "train/loss_math": 1.9941929578781128, "train/loss_prose": 4.07315731048584} +{"step": 4432, "train/loss": 2.1130928993225098, "train/lm_loss": 2.1130928993225098, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.9331815032471277e-06, "perf/tokens_per_sec": 26465.42089890687, "train/loss_math": 2.0106990933418274, "train/loss_code": 1.5772528797388077, "train/loss_prose": 3.2871668338775635} +{"step": 4433, "train/loss": 2.2786752581596375, "train/lm_loss": 2.2786752581596375, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.926457414829125e-06, "perf/tokens_per_sec": 24945.685632598656, "train/loss_math": 1.8572236597537994, "train/loss_code": 1.8315710425376892, "train/loss_prose": 3.568682312965393} +{"step": 4434, "train/loss": 2.3890434205532074, "train/lm_loss": 2.3890434205532074, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.9197445721754776e-06, "perf/tokens_per_sec": 26314.460478994224, "train/loss_math": 2.2011346220970154, "train/loss_prose": 3.696604371070862, "train/loss_code": 1.457299828529358} +{"step": 4435, "train/loss": 2.1317126750946045, "train/lm_loss": 2.1317126750946045, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.913042978557944e-06, "perf/tokens_per_sec": 26740.885668612842, "train/loss_prose": 3.8583359718322754, "train/loss_math": 1.998326826095581, "train/loss_code": 1.601865291595459} +{"step": 4436, "train/loss": 2.030312418937683, "train/lm_loss": 2.030312418937683, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.9063526372427977e-06, "perf/tokens_per_sec": 26435.28565713266, "train/loss_code": 1.4802995026111603, "train/loss_prose": 3.150445580482483, "train/loss_math": 2.010205388069153} +{"step": 4437, "train/loss": 2.2554800510406494, "train/lm_loss": 2.2554800510406494, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.8996735514908326e-06, "perf/tokens_per_sec": 26019.775761743065, "train/loss_code": 1.4491735696792603, "train/loss_prose": 3.5993240674336753} +{"step": 4438, "train/loss": 1.794919192790985, "train/lm_loss": 1.794919192790985, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.8930057245573508e-06, "perf/tokens_per_sec": 26623.98095704487, "train/loss_code": 1.5745463728904725, "train/loss_math": 2.1622068881988525} +{"step": 4439, "train/loss": 2.998989462852478, "train/lm_loss": 2.998989462852478, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.8863491596921745e-06, "perf/tokens_per_sec": 26473.90371931901, "train/loss_prose": 3.3741225719451906, "train/loss_math": 2.3737677733103433} +{"step": 4440, "train/loss": 2.74791157245636, "train/lm_loss": 2.74791157245636, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.8797038601396283e-06, "perf/tokens_per_sec": 27193.06141753222, "train/loss_math": 2.3181488513946533, "train/loss_prose": 3.337611675262451, "train/loss_code": 1.6783992052078247} +{"step": 4441, "train/loss": 2.3127514123916626, "train/lm_loss": 2.3127514123916626, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.8730698291385518e-06, "perf/tokens_per_sec": 26859.073501868268, "train/loss_math": 2.2139581441879272, "train/loss_prose": 3.410819888114929, "train/loss_code": 1.412269413471222} +{"step": 4442, "train/loss": 2.0804228484630585, "train/lm_loss": 2.0804228484630585, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.866447069922289e-06, "perf/tokens_per_sec": 27166.100597563887, "train/loss_code": 1.6463284254074098, "train/loss_prose": 3.5784170627593994, "train/loss_math": 2.416661858558655} +{"step": 4443, "train/loss": 2.732612431049347, "train/lm_loss": 2.732612431049347, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.8598355857186973e-06, "perf/tokens_per_sec": 27175.38249721205, "train/loss_prose": 3.8219521840413413, "train/loss_code": 1.6738253831863403, "train/loss_math": 2.1803044080734253} +{"step": 4444, "train/loss": 2.1959221959114075, "train/lm_loss": 2.1959221959114075, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.8532353797501318e-06, "perf/tokens_per_sec": 27135.72099591066, "train/loss_code": 0.6962563991546631, "train/loss_math": 2.169090191523234, "train/loss_prose": 3.22253147761027} +{"step": 4445, "train/loss": 2.391851782798767, "train/lm_loss": 2.391851782798767, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.8466464552334527e-06, "perf/tokens_per_sec": 26902.436715372245, "train/loss_prose": 3.2613678574562073, "train/loss_code": 1.3800254662831624, "train/loss_math": 1.9492660760879517} +{"step": 4446, "train/loss": 2.665708154439926, "train/lm_loss": 2.665708154439926, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.8400688153800195e-06, "perf/tokens_per_sec": 26265.74035246507, "train/loss_prose": 3.3677072525024414, "train/loss_code": 0.20414148271083832, "train/loss_math": 2.1414936780929565} +{"step": 4447, "train/loss": 2.25073903799057, "train/lm_loss": 2.25073903799057, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.8335024633956976e-06, "perf/tokens_per_sec": 26475.12765177083, "train/loss_code": 1.1311286091804504, "train/loss_math": 2.2137235701084137, "train/loss_prose": 3.4443804025650024} +{"step": 4448, "train/loss": 2.421325445175171, "train/lm_loss": 2.421325445175171, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.8269474024808441e-06, "perf/tokens_per_sec": 25093.471972656964, "train/loss_code": 1.3160308798154194, "train/loss_math": 2.1394243240356445, "train/loss_prose": 3.7145539124806723} +{"step": 4449, "train/loss": 2.8460980355739594, "train/lm_loss": 2.8460980355739594, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.8204036358303173e-06, "perf/tokens_per_sec": 26975.98080576832, "train/loss_code": 1.0209639370441437, "train/loss_prose": 3.4544759591420493} +{"step": 4450, "train/loss": 2.145424723625183, "train/lm_loss": 2.145424723625183, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.8138711666334685e-06, "perf/tokens_per_sec": 26490.436361561744, "train/loss_math": 1.9249087572097778, "train/loss_code": 1.2554650008678436, "train/loss_prose": 4.145860075950623} +{"step": 4451, "train/loss": 2.8184807300567627, "train/lm_loss": 2.8184807300567627, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.8073499980741426e-06, "perf/tokens_per_sec": 26178.808388863406, "train/loss_prose": 3.4603402614593506, "train/loss_code": 2.125845193862915, "train/loss_math": 2.1935465335845947} +{"step": 4452, "train/loss": 2.310741752386093, "train/lm_loss": 2.310741752386093, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.8008401333306835e-06, "perf/tokens_per_sec": 26758.253666438228, "train/loss_code": 1.2039485573768616, "train/loss_math": 2.0097609758377075, "train/loss_prose": 4.0194960832595825} +{"step": 4453, "train/loss": 2.5382848978042603, "train/lm_loss": 2.5382848978042603, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.7943415755759168e-06, "perf/tokens_per_sec": 25174.84662569495, "train/loss_code": 1.771798849105835, "train/loss_prose": 3.5551544825236, "train/loss_math": 2.0324055751164756} +{"step": 4454, "train/loss": 2.0310434103012085, "train/lm_loss": 2.0310434103012085, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.787854327977162e-06, "perf/tokens_per_sec": 26420.446911875297, "train/loss_prose": 3.213507056236267, "train/loss_code": 1.3225934505462646, "train/loss_math": 2.265479326248169} +{"step": 4455, "train/loss": 3.1287530660629272, "train/lm_loss": 3.1287530660629272, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.7813783936962258e-06, "perf/tokens_per_sec": 27043.79646161184, "train/loss_math": 2.3747326135635376, "train/loss_code": 1.572191596031189, "train/loss_prose": 3.7416733741760253} +{"step": 4456, "train/loss": 2.2752257585525513, "train/lm_loss": 2.2752257585525513, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.774913775889389e-06, "perf/tokens_per_sec": 26481.126557011154, "train/loss_prose": 3.891170620918274, "train/loss_code": 1.2718833684921265, "train/loss_math": 2.2012717723846436} +{"step": 4457, "train/loss": 2.3638647496700287, "train/lm_loss": 2.3638647496700287, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.7684604777074425e-06, "perf/tokens_per_sec": 26685.683105876797, "train/loss_math": 2.1242870688438416, "train/loss_code": 1.1066264112790425, "train/loss_prose": 3.7808216412862143} +{"step": 4458, "train/loss": 2.7388654351234436, "train/lm_loss": 2.7388654351234436, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.762018502295637e-06, "perf/tokens_per_sec": 27032.51980878831, "train/loss_math": 2.2222313284873962, "train/loss_prose": 3.255499541759491} +{"step": 4459, "train/loss": 2.367700070142746, "train/lm_loss": 2.367700070142746, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.7555878527937164e-06, "perf/tokens_per_sec": 26893.34030039996, "train/loss_math": 1.97899063428243, "train/loss_code": 1.659471035003662, "train/loss_prose": 3.228562275568644} +{"step": 4460, "train/loss": 2.6224634647369385, "train/lm_loss": 2.6224634647369385, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.7491685323359014e-06, "perf/tokens_per_sec": 26960.909810518915, "train/loss_math": 2.0167304277420044, "train/loss_prose": 3.4335955381393433, "train/loss_code": 1.195133090019226} +{"step": 4461, "train/loss": 2.673248291015625, "train/lm_loss": 2.673248291015625, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.7427605440508837e-06, "perf/tokens_per_sec": 26966.284328514, "train/loss_math": 2.3994892239570618, "train/loss_prose": 3.7155165672302246, "train/loss_code": 0.6414790153503418} +{"step": 4462, "train/loss": 2.216221868991852, "train/lm_loss": 2.216221868991852, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.7363638910618484e-06, "perf/tokens_per_sec": 27184.240590272777, "train/loss_code": 2.0441256364186606, "train/loss_math": 2.116607129573822, "train/loss_prose": 3.1309690475463867} +{"step": 4463, "train/loss": 2.351668268442154, "train/lm_loss": 2.351668268442154, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.7299785764864436e-06, "perf/tokens_per_sec": 27127.665509231847, "train/loss_code": 1.4650235176086426, "train/loss_prose": 3.739223599433899, "train/loss_math": 2.1012131571769714} +{"step": 4464, "train/loss": 2.8673686385154724, "train/lm_loss": 2.8673686385154724, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.7236046034367958e-06, "perf/tokens_per_sec": 26957.694588021153, "train/loss_prose": 3.550812864303589, "train/loss_code": 1.673451542854309, "train/loss_math": 1.837981104850769} +{"step": 4465, "train/loss": 1.8963604271411896, "train/lm_loss": 1.8963604271411896, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.717241975019493e-06, "perf/tokens_per_sec": 26837.17177170422, "train/loss_prose": 3.2743263244628906, "train/loss_math": 2.032909631729126, "train/loss_code": 1.4494569599628448} +{"step": 4466, "train/loss": 2.2424288988113403, "train/lm_loss": 2.2424288988113403, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.7108906943356124e-06, "perf/tokens_per_sec": 26947.96897979671, "train/loss_code": 1.0889158844947815, "train/loss_math": 2.226408004760742, "train/loss_prose": 3.4279837608337402} +{"step": 4467, "train/loss": 2.410041630268097, "train/lm_loss": 2.410041630268097, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.7045507644806892e-06, "perf/tokens_per_sec": 26020.524540207167, "train/loss_prose": 3.4078519344329834, "train/loss_code": 1.848238229751587, "train/loss_math": 2.3066378831863403} +{"step": 4468, "train/loss": 2.4699429869651794, "train/lm_loss": 2.4699429869651794, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.6982221885447264e-06, "perf/tokens_per_sec": 26642.7673702706, "train/loss_prose": 3.5241469144821167, "train/loss_code": 2.013663113117218, "train/loss_math": 2.3282984495162964} +{"step": 4469, "train/loss": 2.4935227930545807, "train/lm_loss": 2.4935227930545807, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.6919049696121958e-06, "perf/tokens_per_sec": 26119.98130537045, "train/loss_math": 2.1255416870117188, "train/loss_prose": 3.310321271419525, "train/loss_code": 1.2279067635536194} +{"step": 4470, "train/loss": 2.0590514540672302, "train/lm_loss": 2.0590514540672302, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.6855991107620344e-06, "perf/tokens_per_sec": 25573.006702917697, "train/loss_code": 1.1979794104894002, "train/loss_math": 2.1762123107910156, "train/loss_prose": 4.173624515533447} +{"step": 4471, "train/loss": 2.606979012489319, "train/lm_loss": 2.606979012489319, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.679304615067634e-06, "perf/tokens_per_sec": 26305.032267547795, "train/loss_prose": 2.996327257156372, "train/loss_math": 2.0597038865089417, "train/loss_code": 1.7547874450683594} +{"step": 4472, "train/loss": 2.0009728223085403, "train/lm_loss": 2.0009728223085403, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.6730214855968707e-06, "perf/tokens_per_sec": 26557.38972570552, "train/loss_code": 1.172382429242134, "train/loss_math": 2.3703142404556274, "train/loss_prose": 3.2888118028640747} +{"step": 4473, "train/loss": 2.58718004822731, "train/lm_loss": 2.58718004822731, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.6667497254120507e-06, "perf/tokens_per_sec": 26760.254308464904, "train/loss_prose": 3.1979418992996216, "train/loss_math": 1.9764183461666107} +{"step": 4474, "train/loss": 2.3012292683124542, "train/lm_loss": 2.3012292683124542, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.6604893375699594e-06, "perf/tokens_per_sec": 26691.363124933, "train/loss_math": 2.018131732940674, "train/loss_prose": 3.0691757202148438, "train/loss_code": 1.1297801733016968} +{"step": 4475, "train/loss": 2.1053704619407654, "train/lm_loss": 2.1053704619407654, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.654240325121831e-06, "perf/tokens_per_sec": 27012.670240081257, "train/loss_code": 1.360186020533244, "train/loss_math": 2.08848237991333, "train/loss_prose": 3.2484798431396484} +{"step": 4476, "train/loss": 2.45722633600235, "train/lm_loss": 2.45722633600235, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.6480026911133623e-06, "perf/tokens_per_sec": 26564.411742239747, "train/loss_math": 2.0328285296758017, "train/loss_prose": 3.1780696511268616, "train/loss_code": 0.8470458984375} +{"step": 4477, "train/loss": 1.7408530712127686, "train/lm_loss": 1.7408530712127686, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.6417764385846996e-06, "perf/tokens_per_sec": 26301.528629539076, "train/loss_code": 1.583805012702942, "train/loss_math": 2.002599914868673} +{"step": 4478, "train/loss": 2.59624844789505, "train/lm_loss": 2.59624844789505, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.635561570570443e-06, "perf/tokens_per_sec": 26182.03999573281, "train/loss_prose": 3.2783292293548585, "train/loss_code": 1.1492295563220978, "train/loss_math": 2.079882860183716} +{"step": 4479, "train/loss": 2.631638288497925, "train/lm_loss": 2.631638288497925, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.629358090099639e-06, "perf/tokens_per_sec": 26844.09087059581, "train/loss_code": 1.171898066997528, "train/loss_prose": 3.6859307885169983, "train/loss_math": 1.9827929735183716} +{"step": 4480, "train/loss": 2.4319211542606354, "train/lm_loss": 2.4319211542606354, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.6231660001957916e-06, "perf/tokens_per_sec": 26385.34754896992, "train/loss_code": 1.4994309147198994, "train/loss_prose": 3.4240405559539795, "train/loss_math": 2.3424772024154663} +{"step": 4481, "train/loss": 2.2347626388072968, "train/lm_loss": 2.2347626388072968, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.6169853038768583e-06, "perf/tokens_per_sec": 25871.231940824, "train/loss_prose": 3.354644457499186, "train/loss_code": 1.1396677494049072, "train/loss_math": 1.8449445168177288} +{"step": 4482, "train/loss": 2.504559338092804, "train/lm_loss": 2.504559338092804, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.6108160041552217e-06, "perf/tokens_per_sec": 25921.2356345772, "train/loss_prose": 3.311514675617218, "train/loss_code": 1.6379293998082478, "train/loss_math": 1.8766292333602905} +{"step": 4483, "train/loss": 2.7279956936836243, "train/lm_loss": 2.7279956936836243, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.6046581040377319e-06, "perf/tokens_per_sec": 27037.327193494624, "train/loss_code": 1.7945804198582966, "train/loss_prose": 3.5353317260742188, "train/loss_math": 2.298895835876465} +{"step": 4484, "train/loss": 2.2872674465179443, "train/lm_loss": 2.2872674465179443, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.5985116065256684e-06, "perf/tokens_per_sec": 26771.63865180866, "train/loss_math": 2.117826666150774, "train/loss_prose": 3.473353147506714} +{"step": 4485, "train/loss": 2.076837867498398, "train/lm_loss": 2.076837867498398, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.5923765146147656e-06, "perf/tokens_per_sec": 22831.72595341123, "train/loss_code": 1.4137082993984222, "train/loss_math": 2.1987714767456055, "train/loss_prose": 3.2811635732650757} +{"step": 4486, "train/loss": 2.2682501673698425, "train/lm_loss": 2.2682501673698425, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.586252831295193e-06, "perf/tokens_per_sec": 25427.737142095953, "train/loss_prose": 3.392974297205607, "train/loss_code": 1.2161771257718403, "train/loss_math": 2.1592729091644287} +{"step": 4487, "train/loss": 2.1489240527153015, "train/lm_loss": 2.1489240527153015, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.5801405595515546e-06, "perf/tokens_per_sec": 25725.183445314102, "train/loss_prose": 3.261193633079529, "train/loss_math": 2.136646807193756, "train/loss_code": 1.0612088441848755} +{"step": 4488, "train/loss": 2.151138186454773, "train/lm_loss": 2.151138186454773, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.5740397023629006e-06, "perf/tokens_per_sec": 25445.664182807308, "train/loss_math": 2.013195276260376, "train/loss_code": 1.8096832831700642, "train/loss_prose": 2.8702350854873657} +{"step": 4489, "train/loss": 2.710967183113098, "train/lm_loss": 2.710967183113098, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.5679502627027136e-06, "perf/tokens_per_sec": 25663.658882860487, "train/loss_prose": 3.351530969142914, "train/loss_math": 2.4126217365264893, "train/loss_code": 1.7281849384307861} +{"step": 4490, "train/loss": 2.037497967481613, "train/lm_loss": 2.037497967481613, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.5618722435389144e-06, "perf/tokens_per_sec": 26727.11560265593, "train/loss_code": 1.510412871837616, "train/loss_math": 2.093204951286316, "train/loss_prose": 2.8131322860717773} +{"step": 4491, "train/loss": 2.202878564596176, "train/lm_loss": 2.202878564596176, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.5558056478338523e-06, "perf/tokens_per_sec": 26629.96495916346, "train/loss_prose": 2.971479058265686, "train/loss_math": 2.031058645248413, "train/loss_code": 1.5247771739959717} +{"step": 4492, "train/loss": 3.1433833837509155, "train/lm_loss": 3.1433833837509155, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.5497504785443151e-06, "perf/tokens_per_sec": 25856.12444746787, "train/loss_prose": 3.292771373476301, "train/loss_math": 2.097668170928955} +{"step": 4493, "train/loss": 2.5834479331970215, "train/lm_loss": 2.5834479331970215, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.543706738621517e-06, "perf/tokens_per_sec": 26646.486505266523, "train/loss_math": 2.118747293949127, "train/loss_prose": 3.380318800608317, "train/loss_code": 2.051638126373291} +{"step": 4494, "train/loss": 2.8641860485076904, "train/lm_loss": 2.8641860485076904, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.5376744310111019e-06, "perf/tokens_per_sec": 27004.98477460569, "train/loss_prose": 3.3481236934661864, "train/loss_code": 1.3769676685333252, "train/loss_math": 2.397951364517212} +{"step": 4495, "train/loss": 1.986992508172989, "train/lm_loss": 1.986992508172989, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.5316535586531483e-06, "perf/tokens_per_sec": 26581.139909456335, "train/loss_math": 2.1758132696151735, "train/loss_code": 1.6722909609476726} +{"step": 4496, "train/loss": 2.1138824820518494, "train/lm_loss": 2.1138824820518494, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.5256441244821534e-06, "perf/tokens_per_sec": 27036.17837089261, "train/loss_prose": 3.569772243499756, "train/loss_math": 2.1475154161453247, "train/loss_code": 1.5837418635686238} +{"step": 4497, "train/loss": 1.85682812333107, "train/lm_loss": 1.85682812333107, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.519646131427044e-06, "perf/tokens_per_sec": 26525.03791812378, "train/loss_code": 1.372163474559784, "train/loss_prose": 3.3108216524124146} +{"step": 4498, "train/loss": 2.1964064240455627, "train/lm_loss": 2.1964064240455627, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.5136595824111643e-06, "perf/tokens_per_sec": 26826.48563179353, "train/loss_math": 2.384632428487142, "train/loss_code": 1.8082930445671082, "train/loss_prose": 3.1841821670532227} +{"step": 4499, "train/loss": 2.2090435326099396, "train/lm_loss": 2.2090435326099396, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.5076844803522922e-06, "perf/tokens_per_sec": 26206.122614469612, "train/loss_code": 1.6938217083613079, "train/loss_math": 2.330738127231598, "train/loss_prose": 3.267930746078491} +{"step": 4500, "train/loss": 1.9591783285140991, "train/lm_loss": 1.9591783285140991, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.5017208281626144e-06, "perf/tokens_per_sec": 24961.016906205143, "train/loss_code": 1.5642426311969757, "train/loss_math": 2.0729596614837646, "train/loss_prose": 3.1975765228271484} +{"step": 4501, "train/loss": 2.441721200942993, "train/lm_loss": 2.441721200942993, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.495768628748745e-06, "perf/tokens_per_sec": 26058.80899718326, "train/loss_math": 2.0477844874064126, "train/loss_code": 1.2827062606811523, "train/loss_prose": 3.608334223429362} +{"step": 4502, "train/loss": 2.7610633969306946, "train/lm_loss": 2.7610633969306946, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.4898278850117159e-06, "perf/tokens_per_sec": 26963.575642194708, "train/loss_math": 2.1770448684692383, "train/loss_code": 1.5999492406845093, "train/loss_prose": 3.6336297392845154} +{"step": 4503, "train/loss": 2.3367032408714294, "train/lm_loss": 2.3367032408714294, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.483898599846964e-06, "perf/tokens_per_sec": 27115.163274614933, "train/loss_math": 1.9161229133605957, "train/loss_prose": 3.188751459121704, "train/loss_code": 1.689501941204071} +{"step": 4504, "train/loss": 2.1243870556354523, "train/lm_loss": 2.1243870556354523, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.4779807761443636e-06, "perf/tokens_per_sec": 26698.497668137323, "train/loss_math": 2.2049129605293274, "train/loss_code": 1.5663093328475952, "train/loss_prose": 3.4765172004699707} +{"step": 4505, "train/loss": 2.5953535735607147, "train/lm_loss": 2.5953535735607147, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.4720744167881828e-06, "perf/tokens_per_sec": 26395.766172907413, "train/loss_prose": 3.490196704864502, "train/loss_math": 2.129352420568466, "train/loss_code": 1.7748284339904785} +{"step": 4506, "train/loss": 1.8217937052249908, "train/lm_loss": 1.8217937052249908, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.4661795246571103e-06, "perf/tokens_per_sec": 26645.24667823681, "train/loss_math": 1.9952588478724163, "train/loss_code": 1.1153077632188797, "train/loss_prose": 4.1273417472839355} +{"step": 4507, "train/loss": 1.7608361393213272, "train/lm_loss": 1.7608361393213272, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.4602961026242479e-06, "perf/tokens_per_sec": 26438.987776107162, "train/loss_math": 2.0709632635116577, "train/loss_code": 1.0835899859666824, "train/loss_prose": 3.5394394397735596} +{"step": 4508, "train/loss": 2.205579787492752, "train/lm_loss": 2.205579787492752, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.4544241535570991e-06, "perf/tokens_per_sec": 25107.150746351545, "train/loss_prose": 3.680763602256775, "train/loss_math": 1.7972622315088909, "train/loss_code": 1.6304414669672649} +{"step": 4509, "train/loss": 2.448816239833832, "train/lm_loss": 2.448816239833832, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.4485636803175829e-06, "perf/tokens_per_sec": 26228.00705932643, "train/loss_math": 1.9648977518081665, "train/loss_code": 1.1225923597812653, "train/loss_prose": 3.353887379169464} +{"step": 4510, "train/loss": 2.4131233990192413, "train/lm_loss": 2.4131233990192413, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.4427146857620228e-06, "perf/tokens_per_sec": 26161.745973298977, "train/loss_prose": 3.4013763268788657, "train/loss_math": 1.9184295535087585, "train/loss_code": 1.4271399974822998} +{"step": 4511, "train/loss": 2.245197296142578, "train/lm_loss": 2.245197296142578, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.4368771727411495e-06, "perf/tokens_per_sec": 25653.77176630102, "train/loss_code": 1.4284789562225342, "train/loss_prose": 3.061915636062622} +{"step": 4512, "train/loss": 2.182376265525818, "train/lm_loss": 2.182376265525818, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.4310511441000928e-06, "perf/tokens_per_sec": 26671.88700224648, "train/loss_code": 1.4543522238731383, "train/loss_prose": 3.3957497278849282} +{"step": 4513, "train/loss": 1.9657608270645142, "train/lm_loss": 1.9657608270645142, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.425236602678387e-06, "perf/tokens_per_sec": 26154.65646860732, "train/loss_math": 2.251856962839762, "train/loss_code": 1.4461149722337723, "train/loss_prose": 3.186056137084961} +{"step": 4514, "train/loss": 2.547135442495346, "train/lm_loss": 2.547135442495346, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.4194335513099761e-06, "perf/tokens_per_sec": 24728.095654414316, "train/loss_code": 1.3112558523813884, "train/loss_math": 2.513509511947632, "train/loss_prose": 3.482451558113098} +{"step": 4515, "train/loss": 2.201068103313446, "train/lm_loss": 2.201068103313446, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.4136419928231892e-06, "perf/tokens_per_sec": 26465.70629004355, "train/loss_math": 2.104514569044113, "train/loss_code": 1.8498385349909465, "train/loss_prose": 3.6409707069396973} +{"step": 4516, "train/loss": 1.964655727148056, "train/lm_loss": 1.964655727148056, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.4078619300407686e-06, "perf/tokens_per_sec": 25573.50157788813, "train/loss_code": 1.4659441113471985, "train/loss_math": 2.16929292678833, "train/loss_prose": 4.048938751220703} +{"step": 4517, "train/loss": 2.9775070548057556, "train/lm_loss": 2.9775070548057556, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.4020933657798385e-06, "perf/tokens_per_sec": 26611.19658977552, "train/loss_math": 2.003252625465393, "train/loss_prose": 3.562059688568115} +{"step": 4518, "train/loss": 2.2747886180877686, "train/lm_loss": 2.2747886180877686, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.3963363028519333e-06, "perf/tokens_per_sec": 25774.004938805025, "train/loss_code": 2.20074257850647, "train/loss_math": 2.131011962890625, "train/loss_prose": 2.9325718879699707} +{"step": 4519, "train/loss": 2.1690218448638916, "train/lm_loss": 2.1690218448638916, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.3905907440629752e-06, "perf/tokens_per_sec": 25874.58289945133, "train/loss_math": 2.17901082833608, "train/loss_code": 1.316356102625529, "train/loss_prose": 3.4330369234085083} +{"step": 4520, "train/loss": 2.888333886861801, "train/lm_loss": 2.888333886861801, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.3848566922132766e-06, "perf/tokens_per_sec": 25532.30230000535, "train/loss_prose": 3.600244426727295, "train/loss_code": 1.2158524990081787, "train/loss_math": 1.9447976350784302} +{"step": 4521, "train/loss": 1.7062485814094543, "train/lm_loss": 1.7062485814094543, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.379134150097547e-06, "perf/tokens_per_sec": 25988.71975687201, "train/loss_math": 1.9392167031764984, "train/loss_code": 1.4732803702354431} +{"step": 4522, "train/loss": 2.041023463010788, "train/lm_loss": 2.041023463010788, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.3734231205048825e-06, "perf/tokens_per_sec": 25948.171506938696, "train/loss_math": 2.177932341893514, "train/loss_code": 1.330342173576355, "train/loss_prose": 4.473021984100342} +{"step": 4523, "train/loss": 2.295744627714157, "train/lm_loss": 2.295744627714157, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.3677236062187654e-06, "perf/tokens_per_sec": 25647.52918063134, "train/loss_code": 1.2972320318222046, "train/loss_math": 2.0179311831792197, "train/loss_prose": 3.2392334938049316} +{"step": 4524, "train/loss": 2.362770915031433, "train/lm_loss": 2.362770915031433, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.362035610017079e-06, "perf/tokens_per_sec": 25170.715574608737, "train/loss_math": 2.3793280124664307, "train/loss_code": 1.751848856608073, "train/loss_prose": 3.2543179988861084} +{"step": 4525, "train/loss": 2.1821593642234802, "train/lm_loss": 2.1821593642234802, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.3563591346720804e-06, "perf/tokens_per_sec": 26496.319611589992, "train/loss_code": 1.4890487790107727, "train/loss_math": 2.2573153495788576, "train/loss_prose": 3.1926004886627197} +{"step": 4526, "train/loss": 2.5702555179595947, "train/lm_loss": 2.5702555179595947, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.3506941829504156e-06, "perf/tokens_per_sec": 26118.869406178877, "train/loss_math": 2.242812236150106, "train/loss_prose": 3.552585482597351} +{"step": 4527, "train/loss": 2.169913589954376, "train/lm_loss": 2.169913589954376, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.345040757613103e-06, "perf/tokens_per_sec": 25820.29536358349, "train/loss_prose": 3.4077080488204956, "train/loss_code": 1.6337241381406784, "train/loss_math": 2.0044975876808167} +{"step": 4528, "train/loss": 2.5227431058883667, "train/lm_loss": 2.5227431058883667, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.3393988614155667e-06, "perf/tokens_per_sec": 24456.791984716525, "train/loss_prose": 3.133046547571818, "train/loss_code": 1.5699405670166016, "train/loss_math": 2.3032161593437195} +{"step": 4529, "train/loss": 2.545221120119095, "train/lm_loss": 2.545221120119095, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.333768497107593e-06, "perf/tokens_per_sec": 26014.771875364368, "train/loss_code": 1.7462259531021118, "train/loss_prose": 3.8160685698191323, "train/loss_math": 1.928659200668335} +{"step": 4530, "train/loss": 2.3306951224803925, "train/lm_loss": 2.3306951224803925, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.328149667433351e-06, "perf/tokens_per_sec": 26125.979251130666, "train/loss_code": 1.5296993404626846, "train/loss_prose": 3.509240468343099, "train/loss_math": 1.9990417957305908} +{"step": 4531, "train/loss": 2.4015198051929474, "train/lm_loss": 2.4015198051929474, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.3225423751313942e-06, "perf/tokens_per_sec": 26458.451112326067, "train/loss_prose": 3.538764397303263, "train/loss_math": 2.089267293612162, "train/loss_code": 1.16403129696846} +{"step": 4532, "train/loss": 2.357035994529724, "train/lm_loss": 2.357035994529724, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.3169466229346422e-06, "perf/tokens_per_sec": 24869.490901142297, "train/loss_code": 1.7488117615381877, "train/loss_prose": 3.225736061731974, "train/loss_math": 1.966322422027588} +{"step": 4533, "train/loss": 2.2932485938072205, "train/lm_loss": 2.2932485938072205, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.3113624135704078e-06, "perf/tokens_per_sec": 25864.221115180968, "train/loss_code": 1.5499219298362732, "train/loss_math": 2.068042278289795, "train/loss_prose": 3.359419345855713} +{"step": 4534, "train/loss": 2.1663677096366882, "train/lm_loss": 2.1663677096366882, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.305789749760361e-06, "perf/tokens_per_sec": 25625.302694107926, "train/loss_code": 1.4216975569725037, "train/loss_prose": 3.2835583686828613, "train/loss_math": 1.980107456445694} +{"step": 4535, "train/loss": 2.6922378540039062, "train/lm_loss": 2.6922378540039062, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.3002286342205462e-06, "perf/tokens_per_sec": 26340.241209759148, "train/loss_math": 2.349510590235392, "train/loss_code": 2.3123748302459717, "train/loss_prose": 3.2882068951924643} +{"step": 4536, "train/loss": 2.703905999660492, "train/lm_loss": 2.703905999660492, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.2946790696613837e-06, "perf/tokens_per_sec": 25350.032586401772, "train/loss_prose": 3.3240557312965393, "train/loss_math": 2.3128451108932495, "train/loss_code": 1.396489143371582} +{"step": 4537, "train/loss": 2.402619779109955, "train/lm_loss": 2.402619779109955, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.2891410587876711e-06, "perf/tokens_per_sec": 26679.01113600964, "train/loss_code": 1.4587671160697937, "train/loss_prose": 3.746131340662638, "train/loss_math": 2.147495746612549} +{"step": 4538, "train/loss": 2.3963004052639008, "train/lm_loss": 2.3963004052639008, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.2836146042985636e-06, "perf/tokens_per_sec": 26648.635812130342, "train/loss_prose": 3.5523767471313477, "train/loss_code": 1.4292128086090088, "train/loss_math": 1.884948968887329} +{"step": 4539, "train/loss": 2.355902224779129, "train/lm_loss": 2.355902224779129, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.2780997088875869e-06, "perf/tokens_per_sec": 26411.511054331568, "train/loss_prose": 3.040898323059082, "train/loss_code": 1.5978058973948162, "train/loss_math": 1.890206217765808} +{"step": 4540, "train/loss": 2.5180094838142395, "train/lm_loss": 2.5180094838142395, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.2725963752426379e-06, "perf/tokens_per_sec": 26028.645642592764, "train/loss_code": 1.6439581314722698, "train/loss_prose": 3.380388855934143, "train/loss_math": 1.690646767616272} +{"step": 4541, "train/loss": 2.4403201937675476, "train/lm_loss": 2.4403201937675476, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.2671046060459686e-06, "perf/tokens_per_sec": 26109.382403540447, "train/loss_math": 1.949864149093628, "train/loss_prose": 3.561680555343628, "train/loss_code": 1.493963360786438} +{"step": 4542, "train/loss": 2.970665991306305, "train/lm_loss": 2.970665991306305, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.2616244039742014e-06, "perf/tokens_per_sec": 26600.113003342845, "train/loss_math": 2.2363524436950684, "train/loss_prose": 3.522437906265259, "train/loss_code": 1.6804335117340088} +{"step": 4543, "train/loss": 2.0654271245002747, "train/lm_loss": 2.0654271245002747, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.2561557716983307e-06, "perf/tokens_per_sec": 25735.241346871153, "train/loss_prose": 3.456740379333496, "train/loss_code": 1.4117746651172638, "train/loss_math": 1.9814187288284302} +{"step": 4544, "train/loss": 2.335664302110672, "train/lm_loss": 2.335664302110672, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.250698711883691e-06, "perf/tokens_per_sec": 26065.05569437807, "train/loss_math": 1.9283925890922546, "train/loss_prose": 3.3465703328450522, "train/loss_code": 1.5962727069854736} +{"step": 4545, "train/loss": 2.015859156847, "train/lm_loss": 2.015859156847, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.2452532271899853e-06, "perf/tokens_per_sec": 26957.779189314722, "train/loss_math": 1.961008071899414, "train/loss_code": 0.8492876291275024, "train/loss_prose": 3.2921327352523804} +{"step": 4546, "train/loss": 2.3825957775115967, "train/lm_loss": 2.3825957775115967, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.2398193202712822e-06, "perf/tokens_per_sec": 26440.452636908874, "train/loss_prose": 3.3002585570017495, "train/loss_math": 1.9104042847951253, "train/loss_code": 1.7143882513046265} +{"step": 4547, "train/loss": 2.1431318819522858, "train/lm_loss": 2.1431318819522858, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.234396993775999e-06, "perf/tokens_per_sec": 26743.88284907026, "train/loss_code": 1.4872123003005981, "train/loss_math": 2.1488128900527954, "train/loss_prose": 5.411368370056152} +{"step": 4548, "train/loss": 2.3567221760749817, "train/lm_loss": 2.3567221760749817, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.2289862503469168e-06, "perf/tokens_per_sec": 24169.79931598103, "train/loss_math": 2.189239740371704, "train/loss_prose": 3.2734530766805015, "train/loss_code": 1.2328495383262634} +{"step": 4549, "train/loss": 2.585527092218399, "train/lm_loss": 2.585527092218399, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.2235870926211619e-06, "perf/tokens_per_sec": 26814.384956359958, "train/loss_prose": 3.5166074434916177, "train/loss_code": 1.9532458782196045, "train/loss_math": 2.1373281478881836} +{"step": 4550, "train/loss": 2.6823544800281525, "train/lm_loss": 2.6823544800281525, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.218199523230218e-06, "perf/tokens_per_sec": 26546.679065512697, "train/loss_prose": 3.428839683532715, "train/loss_math": 2.309471050898234, "train/loss_code": 0.8150635957717896} +{"step": 4551, "train/loss": 2.222967028617859, "train/lm_loss": 2.222967028617859, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.2128235447999181e-06, "perf/tokens_per_sec": 26143.392859968957, "train/loss_code": 1.5172700484593709, "train/loss_math": 1.586840569972992, "train/loss_prose": 3.352747917175293} +{"step": 4552, "train/loss": 1.8061667382717133, "train/lm_loss": 1.8061667382717133, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.2074591599504608e-06, "perf/tokens_per_sec": 26849.209105045597, "train/loss_code": 1.073193609714508, "train/loss_prose": 3.274757146835327, "train/loss_math": 2.2939340670903525} +{"step": 4553, "train/loss": 2.402737647294998, "train/lm_loss": 2.402737647294998, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.2021063712963716e-06, "perf/tokens_per_sec": 26059.95531851636, "train/loss_prose": 3.3971714973449707, "train/loss_code": 1.6160621643066406, "train/loss_math": 1.9327541589736938} +{"step": 4554, "train/loss": 1.9081596732139587, "train/lm_loss": 1.9081596732139587, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.1967651814465354e-06, "perf/tokens_per_sec": 24916.05514640832, "train/loss_math": 1.9884729385375977, "train/loss_code": 1.5599377393722533, "train/loss_prose": 3.4886422157287598} +{"step": 4555, "train/loss": 1.8104547560214996, "train/lm_loss": 1.8104547560214996, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.1914355930041837e-06, "perf/tokens_per_sec": 25746.31026788056, "train/loss_code": 1.1050748348236084, "train/loss_math": 2.2126708030700684, "train/loss_prose": 3.3727961778640747} +{"step": 4556, "train/loss": 2.271452933549881, "train/lm_loss": 2.271452933549881, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.1861176085668917e-06, "perf/tokens_per_sec": 26607.981811029233, "train/loss_math": 2.24859881401062, "train/loss_code": 1.19319353501002, "train/loss_prose": 3.364948590596517} +{"step": 4557, "train/loss": 2.5912230610847473, "train/lm_loss": 2.5912230610847473, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.180811230726589e-06, "perf/tokens_per_sec": 26305.032267547795, "train/loss_code": 1.2277283668518066, "train/loss_math": 2.1421406865119934, "train/loss_prose": 3.4975114464759827} +{"step": 4558, "train/loss": 2.2262831926345825, "train/lm_loss": 2.2262831926345825, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.1755164620695315e-06, "perf/tokens_per_sec": 26255.62472815746, "train/loss_code": 1.9658477306365967, "train/loss_prose": 3.039315938949585, "train/loss_math": 2.302519599596659} +{"step": 4559, "train/loss": 2.5158415138721466, "train/lm_loss": 2.5158415138721466, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.170233305176327e-06, "perf/tokens_per_sec": 26659.67713923037, "train/loss_prose": 3.5927957693735757, "train/loss_math": 2.069857954978943, "train/loss_code": 1.5693856477737427} +{"step": 4560, "train/loss": 1.9360359907150269, "train/lm_loss": 1.9360359907150269, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.1649617626219217e-06, "perf/tokens_per_sec": 26717.264778196804, "train/loss_math": 2.0143049240112303, "train/loss_code": 1.8055874903996785} +{"step": 4561, "train/loss": 2.481022149324417, "train/lm_loss": 2.481022149324417, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.159701836975602e-06, "perf/tokens_per_sec": 26427.15277410304, "train/loss_prose": 3.4609829783439636, "train/loss_code": 1.501061275601387} +{"step": 4562, "train/loss": 1.9772286117076874, "train/lm_loss": 1.9772286117076874, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.1544535308009917e-06, "perf/tokens_per_sec": 26480.065328267454, "train/loss_math": 2.232162892818451, "train/loss_code": 1.7222943902015686} +{"step": 4563, "train/loss": 3.3827710151672363, "train/lm_loss": 3.3827710151672363, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.1492168466560538e-06, "perf/tokens_per_sec": 26627.94247824645, "train/loss_prose": 4.436985111236572, "train/loss_math": 1.8744224309921265, "train/loss_code": 1.5014101266860962} +{"step": 4564, "train/loss": 2.482307881116867, "train/lm_loss": 2.482307881116867, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.1439917870930793e-06, "perf/tokens_per_sec": 26927.145973514645, "train/loss_math": 2.1750965118408203, "train/loss_prose": 3.3058629631996155, "train/loss_code": 1.4866382082303364} +{"step": 4565, "train/loss": 1.9770781099796295, "train/lm_loss": 1.9770781099796295, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.1387783546587011e-06, "perf/tokens_per_sec": 27024.738023215024, "train/loss_code": 1.586925446987152, "train/loss_prose": 3.147536516189575} +{"step": 4566, "train/loss": 2.0815343260765076, "train/lm_loss": 2.0815343260765076, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.1335765518938862e-06, "perf/tokens_per_sec": 25972.610908277282, "train/loss_prose": 3.198528528213501, "train/loss_code": 1.659314799308777, "train/loss_math": 1.9586434364318848} +{"step": 4567, "train/loss": 2.103548616170883, "train/lm_loss": 2.103548616170883, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.1283863813339263e-06, "perf/tokens_per_sec": 27246.841426298954, "train/loss_prose": 3.3086787462234497, "train/loss_math": 2.049489219983419, "train/loss_code": 1.3541878660519917} +{"step": 4568, "train/loss": 2.7008234560489655, "train/lm_loss": 2.7008234560489655, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.1232078455084477e-06, "perf/tokens_per_sec": 26973.270171229757, "train/loss_prose": 3.1592092514038086, "train/loss_math": 2.6298611164093018, "train/loss_code": 1.590339481830597} +{"step": 4569, "train/loss": 2.446160227060318, "train/lm_loss": 2.446160227060318, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.1180409469414094e-06, "perf/tokens_per_sec": 27025.630787430313, "train/loss_prose": 3.6424858570098877, "train/loss_math": 2.1944544315338135, "train/loss_code": 1.611842393875122} +{"step": 4570, "train/loss": 2.5980923175811768, "train/lm_loss": 2.5980923175811768, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.1128856881510913e-06, "perf/tokens_per_sec": 25966.800911112154, "train/loss_prose": 3.657729148864746, "train/loss_code": 1.964996576309204, "train/loss_math": 1.9515643119812012} +{"step": 4571, "train/loss": 2.2126882672309875, "train/lm_loss": 2.2126882672309875, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.1077420716501031e-06, "perf/tokens_per_sec": 25880.15676444359, "train/loss_math": 2.1715363025665284, "train/loss_prose": 3.755927562713623, "train/loss_code": 1.5439483523368835} +{"step": 4572, "train/loss": 1.9531112611293793, "train/lm_loss": 1.9531112611293793, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.1026100999453836e-06, "perf/tokens_per_sec": 26213.68001977485, "train/loss_code": 1.2224233865737915, "train/loss_math": 2.3825693130493164, "train/loss_prose": 3.565101146697998} +{"step": 4573, "train/loss": 2.65818589925766, "train/lm_loss": 2.65818589925766, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.0974897755381935e-06, "perf/tokens_per_sec": 27109.258693412146, "train/loss_prose": 3.690367817878723, "train/loss_code": 1.1734506487846375, "train/loss_math": 2.078557252883911} +{"step": 4574, "train/loss": 2.1549055576324463, "train/lm_loss": 2.1549055576324463, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.0923811009241142e-06, "perf/tokens_per_sec": 27047.54324254967, "train/loss_prose": 3.1889599561691284, "train/loss_code": 1.7550355911254882, "train/loss_math": 2.086146354675293} +{"step": 4575, "train/loss": 2.6269773840904236, "train/lm_loss": 2.6269773840904236, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.087284078593051e-06, "perf/tokens_per_sec": 26752.25353481245, "train/loss_prose": 3.277447462081909, "train/loss_math": 2.257695992787679, "train/loss_code": 1.1329423189163208} +{"step": 4576, "train/loss": 2.0232483446598053, "train/lm_loss": 2.0232483446598053, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.0821987110292365e-06, "perf/tokens_per_sec": 26005.95081235156, "train/loss_math": 2.148184597492218, "train/loss_prose": 3.0626657009124756, "train/loss_code": 1.510194222132365} +{"step": 4577, "train/loss": 2.3955768644809723, "train/lm_loss": 2.3955768644809723, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.0771250007112155e-06, "perf/tokens_per_sec": 26130.827285780342, "train/loss_math": 2.300325314203898, "train/loss_code": 1.6224256753921509, "train/loss_prose": 3.00626269976298} +{"step": 4578, "train/loss": 2.0800353288650513, "train/lm_loss": 2.0800353288650513, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.0720629501118462e-06, "perf/tokens_per_sec": 27046.43613103925, "train/loss_code": 0.9908927083015442, "train/loss_prose": 3.4446179072062173, "train/loss_math": 2.342857837677002} +{"step": 4579, "train/loss": 2.5987167954444885, "train/lm_loss": 2.5987167954444885, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.067012561698319e-06, "perf/tokens_per_sec": 27144.42458121743, "train/loss_prose": 3.5254948139190674, "train/loss_math": 1.9667221307754517, "train/loss_code": 1.3771552443504333} +{"step": 4580, "train/loss": 2.301359623670578, "train/lm_loss": 2.301359623670578, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.061973837932126e-06, "perf/tokens_per_sec": 26003.35288998137, "train/loss_prose": 3.3984154065450034, "train/loss_math": 1.9699938893318176, "train/loss_code": 1.425214409828186} +{"step": 4581, "train/loss": 2.0728269517421722, "train/lm_loss": 2.0728269517421722, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.056946781269083e-06, "perf/tokens_per_sec": 26702.89643766971, "train/loss_prose": 3.160209894180298, "train/loss_code": 1.4380689859390259, "train/loss_math": 1.9826629161834717} +{"step": 4582, "train/loss": 2.429990231990814, "train/lm_loss": 2.429990231990814, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.051931394159314e-06, "perf/tokens_per_sec": 26524.587436081125, "train/loss_code": 1.632968266805013, "train/loss_math": 1.971150517463684, "train/loss_prose": 3.1424665451049805} +{"step": 4583, "train/loss": 2.36975821852684, "train/lm_loss": 2.36975821852684, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.0469276790472604e-06, "perf/tokens_per_sec": 26228.00705932643, "train/loss_code": 1.132529765367508, "train/loss_math": 2.1868149439493814, "train/loss_prose": 3.3775204022725425} +{"step": 4584, "train/loss": 2.244389295578003, "train/lm_loss": 2.244389295578003, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.0419356383716688e-06, "perf/tokens_per_sec": 25604.22127700163, "train/loss_math": 1.8814853429794312, "train/loss_prose": 2.8690600395202637, "train/loss_code": 1.8666123449802399} +{"step": 4585, "train/loss": 2.253559798002243, "train/lm_loss": 2.253559798002243, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.0369552745656013e-06, "perf/tokens_per_sec": 26769.4277469257, "train/loss_math": 2.0390061140060425, "train/loss_code": 1.3919163942337036, "train/loss_prose": 5.049614429473877} +{"step": 4586, "train/loss": 1.6451124250888824, "train/lm_loss": 1.6451124250888824, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.0319865900564303e-06, "perf/tokens_per_sec": 26356.324237417368, "train/loss_code": 1.2082739114761352, "train/loss_prose": 3.145717144012451, "train/loss_math": 1.9869061708450317} +{"step": 4587, "train/loss": 2.3485622107982635, "train/lm_loss": 2.3485622107982635, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.0270295872658264e-06, "perf/tokens_per_sec": 26293.598552769727, "train/loss_code": 1.3213269412517548, "train/loss_prose": 3.1854946613311768, "train/loss_math": 2.196453253428141} +{"step": 4588, "train/loss": 2.774535834789276, "train/lm_loss": 2.774535834789276, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.0220842686097742e-06, "perf/tokens_per_sec": 26463.586388555566, "train/loss_prose": 3.314260959625244, "train/loss_code": 1.8122901320457458, "train/loss_math": 2.657331347465515} +{"step": 4589, "train/loss": 2.2053795158863068, "train/lm_loss": 2.2053795158863068, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.0171506364985622e-06, "perf/tokens_per_sec": 25918.928330147504, "train/loss_code": 0.8828644951184591, "train/loss_prose": 3.603905359903971, "train/loss_math": 2.0913630723953247} +{"step": 4590, "train/loss": 2.102118104696274, "train/lm_loss": 2.102118104696274, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.0122286933367835e-06, "perf/tokens_per_sec": 25593.693570541105, "train/loss_code": 1.4388606399297714, "train/loss_math": 2.2475279569625854, "train/loss_prose": 3.2832231521606445} +{"step": 4591, "train/loss": 2.442203640937805, "train/lm_loss": 2.442203640937805, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.0073184415233333e-06, "perf/tokens_per_sec": 26045.41637204902, "train/loss_prose": 3.643505811691284, "train/loss_code": 2.0123151938120523, "train/loss_math": 2.071223775545756} +{"step": 4592, "train/loss": 2.6863113045692444, "train/lm_loss": 2.6863113045692444, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.0024198834514082e-06, "perf/tokens_per_sec": 26699.120047493092, "train/loss_math": 1.9485380351543427, "train/loss_prose": 3.424084484577179} +{"step": 4593, "train/loss": 2.1155291497707367, "train/lm_loss": 2.1155291497707367, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 9.975330215085065e-07, "perf/tokens_per_sec": 26500.447617255348, "train/loss_math": 2.092650353908539, "train/loss_code": 1.6051162679990132, "train/loss_prose": 3.7382829189300537} +{"step": 4594, "train/loss": 2.4030203223228455, "train/lm_loss": 2.4030203223228455, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 9.926578580764234e-07, "perf/tokens_per_sec": 26665.925536620914, "train/loss_code": 1.3947754800319672, "train/loss_prose": 3.4112653136253357} +{"step": 4595, "train/loss": 1.5371304899454117, "train/lm_loss": 1.5371304899454117, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 9.877943955312552e-07, "perf/tokens_per_sec": 26912.97265915667, "train/loss_code": 1.1850127577781677, "train/loss_math": 1.9379073977470398, "train/loss_prose": 2.4961647987365723} +{"step": 4596, "train/loss": 2.6496673226356506, "train/lm_loss": 2.6496673226356506, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 9.829426362434002e-07, "perf/tokens_per_sec": 26618.78324894175, "train/loss_prose": 3.470223367214203, "train/loss_math": 1.9308889309565227, "train/loss_code": 1.5237784385681152} +{"step": 4597, "train/loss": 2.4849697947502136, "train/lm_loss": 2.4849697947502136, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 9.78102582577539e-07, "perf/tokens_per_sec": 26978.77671830048, "train/loss_prose": 3.7235478162765503, "train/loss_math": 2.0891335010528564, "train/loss_code": 2.038063883781433} +{"step": 4598, "train/loss": 2.6420599222183228, "train/lm_loss": 2.6420599222183228, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 9.732742368926546e-07, "perf/tokens_per_sec": 26434.187425662898, "train/loss_prose": 3.193054437637329, "train/loss_math": 2.181060870488485, "train/loss_code": 1.8210781812667847} +{"step": 4599, "train/loss": 1.9738927781581879, "train/lm_loss": 1.9738927781581879, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 9.684576015420278e-07, "perf/tokens_per_sec": 26300.24016986594, "train/loss_prose": 3.211733818054199, "train/loss_math": 1.9867342313130696, "train/loss_code": 1.135823905467987} +{"step": 4600, "train/loss": 3.0887486338615417, "train/lm_loss": 3.0887486338615417, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 9.636526788732309e-07, "perf/tokens_per_sec": 25366.277040834408, "train/loss_prose": 3.878562307357788, "train/loss_code": 1.8599358797073364, "train/loss_math": 1.728620171546936} +{"step": 4600, "eval/loss": 2.156658709049225, "eval/lm_loss": 2.156658709049225, "eval/lb_loss": 0.0, "eval/z_loss": 0.0, "eval/perplexity": 8.642213214074365, "eval/loss_code": 1.5539842376562014, "eval/ppl_code": 4.730279244513202, "eval/loss_prose": 3.4845200160093475, "eval/ppl_prose": 32.60677262436222, "eval/loss_math": 2.0232794070776388, "eval/ppl_math": 7.56308674964845} +{"step": 4601, "train/loss": 2.1949514150619507, "train/lm_loss": 2.1949514150619507, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 9.588594712281186e-07, "perf/tokens_per_sec": 26026.31917781656, "train/loss_prose": 3.2757961750030518, "train/loss_math": 2.201337456703186, "train/loss_code": 1.109849472840627} +{"step": 4602, "train/loss": 2.3016852736473083, "train/lm_loss": 2.3016852736473083, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 9.540779809428468e-07, "perf/tokens_per_sec": 26631.987747332132, "train/loss_code": 1.1077546328306198, "train/loss_prose": 3.4956159591674805} +{"step": 4603, "train/loss": 1.6474145650863647, "train/lm_loss": 1.6474145650863647, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 9.493082103478517e-07, "perf/tokens_per_sec": 25463.84297078454, "train/loss_code": 1.158894455432892, "train/loss_math": 1.9209439754486084, "train/loss_prose": 3.5429558753967285} +{"step": 4604, "train/loss": 2.1170241832733154, "train/lm_loss": 2.1170241832733154, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 9.445501617678654e-07, "perf/tokens_per_sec": 26302.61586558924, "train/loss_code": 1.7828231811523438, "train/loss_math": 1.8369019031524658, "train/loss_prose": 3.092587947845459} +{"step": 4605, "train/loss": 1.9875975549221039, "train/lm_loss": 1.9875975549221039, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 9.398038375219048e-07, "perf/tokens_per_sec": 27064.459248616837, "train/loss_code": 1.6189750730991364, "train/loss_math": 1.7247382402420044, "train/loss_prose": 2.9877012968063354} +{"step": 4606, "train/loss": 2.2178929448127747, "train/lm_loss": 2.2178929448127747, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 9.350692399232641e-07, "perf/tokens_per_sec": 27116.704023183538, "train/loss_prose": 3.221145749092102, "train/loss_code": 1.4389388958613079, "train/loss_math": 2.3280117511749268} +{"step": 4607, "train/loss": 2.00653076171875, "train/lm_loss": 2.00653076171875, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 9.303463712795307e-07, "perf/tokens_per_sec": 25762.603128425606, "train/loss_prose": 3.623773455619812, "train/loss_math": 1.9377409219741821, "train/loss_code": 0.9971588651339213} +{"step": 4608, "train/loss": 2.155963957309723, "train/lm_loss": 2.155963957309723, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 9.256352338925744e-07, "perf/tokens_per_sec": 26196.1726825817, "train/loss_code": 1.0803375989198685, "train/loss_math": 2.423508882522583, "train/loss_prose": 3.500950892766317} +{"step": 4609, "train/loss": 1.6803705096244812, "train/lm_loss": 1.6803705096244812, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 9.209358300585474e-07, "perf/tokens_per_sec": 25768.901864434745, "train/loss_math": 2.040050427118937, "train/loss_code": 1.4645625233650208} +{"step": 4610, "train/loss": 1.7850403189659119, "train/lm_loss": 1.7850403189659119, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 9.162481620678815e-07, "perf/tokens_per_sec": 27274.049423874978, "train/loss_code": 1.3082427263259888, "train/loss_prose": 3.713635206222534, "train/loss_math": 2.0127371549606323} +{"step": 4611, "train/loss": 2.6897023916244507, "train/lm_loss": 2.6897023916244507, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 9.115722322052878e-07, "perf/tokens_per_sec": 26744.88205853095, "train/loss_prose": 3.250252604484558, "train/loss_math": 1.9646856784820557, "train/loss_code": 2.622551441192627} +{"step": 4612, "train/loss": 2.3050180077552795, "train/lm_loss": 2.3050180077552795, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 9.069080427497573e-07, "perf/tokens_per_sec": 26106.04969608558, "train/loss_prose": 3.420109192530314, "train/loss_math": 2.1848307847976685, "train/loss_code": 1.270051638285319} +{"step": 4613, "train/loss": 2.18962299823761, "train/lm_loss": 2.18962299823761, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 9.022555959745604e-07, "perf/tokens_per_sec": 26589.24440468581, "train/loss_code": 1.5469882190227509, "train/loss_prose": 3.0922152201334634, "train/loss_math": 2.0523855686187744} +{"step": 4614, "train/loss": 2.4100051820278168, "train/lm_loss": 2.4100051820278168, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.976148941472501e-07, "perf/tokens_per_sec": 26683.071937674827, "train/loss_math": 2.1806057929992675, "train/loss_prose": 3.643994688987732, "train/loss_code": 1.0890228748321533} +{"step": 4615, "train/loss": 2.6664499044418335, "train/lm_loss": 2.6664499044418335, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.929859395296364e-07, "perf/tokens_per_sec": 27140.736666498156, "train/loss_prose": 3.465499448776245, "train/loss_math": 2.138099193572998, "train/loss_code": 0.9330017268657684} +{"step": 4616, "train/loss": 2.85470849275589, "train/lm_loss": 2.85470849275589, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.883687343778202e-07, "perf/tokens_per_sec": 26414.881599542427, "train/loss_code": 1.3250364065170288, "train/loss_prose": 3.5458378314971926, "train/loss_math": 2.4584062099456787} +{"step": 4617, "train/loss": 2.371702492237091, "train/lm_loss": 2.371702492237091, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.837632809421681e-07, "perf/tokens_per_sec": 25673.55362346563, "train/loss_prose": 3.0650466680526733, "train/loss_code": 1.219496726989746, "train/loss_math": 2.13722026348114} +{"step": 4618, "train/loss": 2.373925417661667, "train/lm_loss": 2.373925417661667, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.79169581467329e-07, "perf/tokens_per_sec": 26824.726377900504, "train/loss_prose": 3.3021562894185386, "train/loss_math": 2.0508293310801187, "train/loss_code": 1.466223269701004} +{"step": 4619, "train/loss": 2.5604870319366455, "train/lm_loss": 2.5604870319366455, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.745876381922147e-07, "perf/tokens_per_sec": 26903.869119040348, "train/loss_prose": 3.312603175640106, "train/loss_code": 1.6891108552614849, "train/loss_math": 2.166151762008667} +{"step": 4620, "train/loss": 2.1351656019687653, "train/lm_loss": 2.1351656019687653, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.700174533500055e-07, "perf/tokens_per_sec": 26403.149320321205, "train/loss_math": 2.2130995094776154, "train/loss_code": 1.5737499793370564, "train/loss_prose": 3.507676839828491} +{"step": 4621, "train/loss": 2.2069186568260193, "train/lm_loss": 2.2069186568260193, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.65459029168153e-07, "perf/tokens_per_sec": 26961.205988007037, "train/loss_math": 2.0644313437598094, "train/loss_prose": 3.2043299674987793} +{"step": 4622, "train/loss": 1.9375636279582977, "train/lm_loss": 1.9375636279582977, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.609123678683828e-07, "perf/tokens_per_sec": 25400.818781963804, "train/loss_code": 1.186177909374237, "train/loss_math": 1.9314879179000854, "train/loss_prose": 3.073755383491516} +{"step": 4623, "train/loss": 2.104147881269455, "train/lm_loss": 2.104147881269455, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.563774716666778e-07, "perf/tokens_per_sec": 26992.97703379804, "train/loss_prose": 3.533679723739624, "train/loss_math": 2.1050380766391754, "train/loss_code": 1.6264501015345256} +{"step": 4624, "train/loss": 3.3672954440116882, "train/lm_loss": 3.3672954440116882, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.51854342773295e-07, "perf/tokens_per_sec": 26573.94462739736, "train/loss_prose": 3.480130297797067, "train/loss_math": 2.577451229095459} +{"step": 4625, "train/loss": 2.0177548229694366, "train/lm_loss": 2.0177548229694366, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.473429833927487e-07, "perf/tokens_per_sec": 26792.17996746862, "train/loss_code": 1.714993268251419, "train/loss_math": 2.155188798904419, "train/loss_prose": 2.8164985179901123} +{"step": 4626, "train/loss": 2.2729702591896057, "train/lm_loss": 2.2729702591896057, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.428433957238247e-07, "perf/tokens_per_sec": 27147.898998933353, "train/loss_code": 1.7556055386861165, "train/loss_math": 1.936560074488322, "train/loss_prose": 3.5536327362060547} +{"step": 4627, "train/loss": 2.305667757987976, "train/lm_loss": 2.305667757987976, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.383555819595601e-07, "perf/tokens_per_sec": 27117.2604476434, "train/loss_code": 1.521358275413513, "train/loss_prose": 3.6128499507904053} +{"step": 4628, "train/loss": 2.345701426267624, "train/lm_loss": 2.345701426267624, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.338795442872693e-07, "perf/tokens_per_sec": 27263.228930346522, "train/loss_code": 1.7686698734760284, "train/loss_math": 2.2564847469329834, "train/loss_prose": 3.5889806747436523} +{"step": 4629, "train/loss": 2.124618172645569, "train/lm_loss": 2.124618172645569, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.294152848885157e-07, "perf/tokens_per_sec": 27230.17148798646, "train/loss_prose": 3.674201250076294, "train/loss_math": 2.1068572402000427, "train/loss_code": 1.6317718029022217} +{"step": 4630, "train/loss": 2.621648848056793, "train/lm_loss": 2.621648848056793, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.249628059391251e-07, "perf/tokens_per_sec": 26783.366722894498, "train/loss_math": 1.9770335853099823, "train/loss_prose": 3.8532063961029053, "train/loss_code": 1.505436897277832} +{"step": 4631, "train/loss": 2.242316395044327, "train/lm_loss": 2.242316395044327, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.205221096091786e-07, "perf/tokens_per_sec": 27065.695755946857, "train/loss_prose": 3.4207271337509155, "train/loss_math": 2.0877884924411774, "train/loss_code": 1.3729615211486816} +{"step": 4632, "train/loss": 2.147350490093231, "train/lm_loss": 2.147350490093231, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.160931980630226e-07, "perf/tokens_per_sec": 27159.701500276657, "train/loss_prose": 3.1809935569763184, "train/loss_math": 2.2538065910339355, "train/loss_code": 1.3517990112304688} +{"step": 4633, "train/loss": 2.584537982940674, "train/lm_loss": 2.584537982940674, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.116760734592527e-07, "perf/tokens_per_sec": 27094.123911613817, "train/loss_prose": 3.041229724884033, "train/loss_math": 2.204697370529175, "train/loss_code": 1.8972927331924438} +{"step": 4634, "train/loss": 1.7249895632266998, "train/lm_loss": 1.7249895632266998, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.072707379507216e-07, "perf/tokens_per_sec": 27238.37434816869, "train/loss_code": 1.534936120112737, "train/loss_math": 2.2951496839523315} +{"step": 4635, "train/loss": 2.542396366596222, "train/lm_loss": 2.542396366596222, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.028771936845342e-07, "perf/tokens_per_sec": 27077.0427861512, "train/loss_prose": 3.6415747006734214, "train/loss_code": 1.761225700378418, "train/loss_math": 1.9133053421974182} +{"step": 4636, "train/loss": 2.3611634969711304, "train/lm_loss": 2.3611634969711304, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.984954428020525e-07, "perf/tokens_per_sec": 27115.9336270126, "train/loss_code": 1.1652729511260986, "train/loss_prose": 3.308379113674164, "train/loss_math": 2.15997314453125} +{"step": 4637, "train/loss": 2.5384970903396606, "train/lm_loss": 2.5384970903396606, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.941254874388904e-07, "perf/tokens_per_sec": 26595.542169271543, "train/loss_math": 1.9083951711654663, "train/loss_prose": 3.8276838461558023, "train/loss_code": 1.5498695373535156} +{"step": 4638, "train/loss": 2.409897267818451, "train/lm_loss": 2.409897267818451, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.897673297249081e-07, "perf/tokens_per_sec": 26713.152026669817, "train/loss_code": 1.6846959193547566, "train/loss_math": 2.4874861240386963, "train/loss_prose": 3.38131582736969} +{"step": 4639, "train/loss": 2.4932084381580353, "train/lm_loss": 2.4932084381580353, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.854209717842231e-07, "perf/tokens_per_sec": 27289.602683868674, "train/loss_math": 2.0181915163993835, "train/loss_prose": 3.9722414016723633, "train/loss_code": 1.9642093777656555} +{"step": 4640, "train/loss": 1.8668487071990967, "train/lm_loss": 1.8668487071990967, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.81086415735191e-07, "perf/tokens_per_sec": 27104.42569733687, "train/loss_math": 2.2759622732798257, "train/loss_code": 1.6213805437088014} +{"step": 4641, "train/loss": 2.0509005188941956, "train/lm_loss": 2.0509005188941956, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.767636636904274e-07, "perf/tokens_per_sec": 26585.664652867188, "train/loss_math": 2.1235197385152182, "train/loss_prose": 3.176663398742676, "train/loss_code": 1.2277725736300151} +{"step": 4642, "train/loss": 2.3424038887023926, "train/lm_loss": 2.3424038887023926, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.724527177567858e-07, "perf/tokens_per_sec": 26544.259078885567, "train/loss_prose": 3.142965316772461, "train/loss_math": 2.253146489461263, "train/loss_code": 1.275447517633438} +{"step": 4643, "train/loss": 1.7343606650829315, "train/lm_loss": 1.7343606650829315, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.681535800353717e-07, "perf/tokens_per_sec": 26866.550083509006, "train/loss_code": 1.3286607265472412, "train/loss_prose": 3.739819049835205, "train/loss_math": 2.1631014347076416} +{"step": 4644, "train/loss": 2.2650171518325806, "train/lm_loss": 2.2650171518325806, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.638662526215284e-07, "perf/tokens_per_sec": 26397.83466040571, "train/loss_math": 2.121114432811737, "train/loss_code": 1.0641295909881592, "train/loss_prose": 3.5618398189544678} +{"step": 4645, "train/loss": 2.229785829782486, "train/lm_loss": 2.229785829782486, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.595907376048512e-07, "perf/tokens_per_sec": 26308.496551386183, "train/loss_prose": 3.763213793436686, "train/loss_code": 1.3097289562225343} +{"step": 4646, "train/loss": 2.33037006855011, "train/lm_loss": 2.33037006855011, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.553270370691701e-07, "perf/tokens_per_sec": 26450.100357186846, "train/loss_code": 1.7758393287658691, "train/loss_prose": 3.346131443977356, "train/loss_math": 2.2077266375223794} +{"step": 4647, "train/loss": 2.039148598909378, "train/lm_loss": 2.039148598909378, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.510751530925674e-07, "perf/tokens_per_sec": 26824.349424942775, "train/loss_code": 1.409621462225914, "train/loss_prose": 3.1177639961242676, "train/loss_math": 2.219588041305542} +{"step": 4648, "train/loss": 2.1536672115325928, "train/lm_loss": 2.1536672115325928, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.468350877473551e-07, "perf/tokens_per_sec": 27030.946080876325, "train/loss_math": 2.1471114456653595, "train/loss_prose": 3.161203384399414, "train/loss_code": 1.1592418253421783} +{"step": 4649, "train/loss": 2.6898006796836853, "train/lm_loss": 2.6898006796836853, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.426068431000882e-07, "perf/tokens_per_sec": 26140.44920528169, "train/loss_prose": 3.1520820260047913, "train/loss_math": 2.2275192737579346} +{"step": 4650, "train/loss": 2.7568258345127106, "train/lm_loss": 2.7568258345127106, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.383904212115655e-07, "perf/tokens_per_sec": 26779.024886913096, "train/loss_math": 1.981248418490092, "train/loss_code": 2.5504820346832275, "train/loss_prose": 3.390094757080078} +{"step": 4651, "train/loss": 2.4040298759937286, "train/lm_loss": 2.4040298759937286, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.341858241368183e-07, "perf/tokens_per_sec": 26490.80398814535, "train/loss_math": 2.054673969745636, "train/loss_prose": 3.053313970565796, "train/loss_code": 1.8536005020141602} +{"step": 4652, "train/loss": 2.671977460384369, "train/lm_loss": 2.671977460384369, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.299930539251154e-07, "perf/tokens_per_sec": 27016.790770817868, "train/loss_prose": 3.301646852493286, "train/loss_code": 1.6267213821411133, "train/loss_math": 1.614142894744873} +{"step": 4653, "train/loss": 2.368419051170349, "train/lm_loss": 2.368419051170349, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.258121126199613e-07, "perf/tokens_per_sec": 26089.359293304035, "train/loss_code": 1.265680472056071, "train/loss_math": 2.2334179878234863, "train/loss_prose": 3.5611585776011148} +{"step": 4654, "train/loss": 2.060735374689102, "train/lm_loss": 2.060735374689102, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.216430022591008e-07, "perf/tokens_per_sec": 26206.602320171456, "train/loss_code": 1.514670467376709, "train/loss_prose": 3.288716197013855, "train/loss_math": 2.3350989818573} +{"step": 4655, "train/loss": 2.5345404148101807, "train/lm_loss": 2.5345404148101807, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.174857248745004e-07, "perf/tokens_per_sec": 26667.291470437576, "train/loss_prose": 3.345417022705078, "train/loss_math": 2.0740893185138702, "train/loss_code": 1.94371497631073} +{"step": 4656, "train/loss": 2.3583796322345734, "train/lm_loss": 2.3583796322345734, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.133402824923696e-07, "perf/tokens_per_sec": 25852.894980467223, "train/loss_math": 2.1214561462402344, "train/loss_code": 1.348486453294754, "train/loss_prose": 3.2685653368631997} +{"step": 4657, "train/loss": 2.52363920211792, "train/lm_loss": 2.52363920211792, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.092066771331507e-07, "perf/tokens_per_sec": 27259.897947558413, "train/loss_prose": 3.9069392681121826, "train/loss_code": 1.0315213799476624, "train/loss_math": 2.1350841522216797} +{"step": 4658, "train/loss": 2.1644621789455414, "train/lm_loss": 2.1644621789455414, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.050849108115127e-07, "perf/tokens_per_sec": 26036.061559538623, "train/loss_math": 2.095598554611206, "train/loss_prose": 2.9005110263824463, "train/loss_code": 1.9685971140861511} +{"step": 4659, "train/loss": 2.3413787484169006, "train/lm_loss": 2.3413787484169006, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.009749855363456e-07, "perf/tokens_per_sec": 26165.889934889386, "train/loss_prose": 3.7103171348571777, "train/loss_code": 1.5947696367899578, "train/loss_math": 2.17536191145579} +{"step": 4660, "train/loss": 1.8544159531593323, "train/lm_loss": 1.8544159531593323, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.968769033107836e-07, "perf/tokens_per_sec": 26910.991271875137, "train/loss_code": 1.008076074719429, "train/loss_prose": 3.908838987350464, "train/loss_math": 1.9772697687149048} +{"step": 4661, "train/loss": 2.3069811165332794, "train/lm_loss": 2.3069811165332794, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.927906661321815e-07, "perf/tokens_per_sec": 27065.908958285414, "train/loss_math": 2.1051112016042075, "train/loss_code": 1.2638474106788635, "train/loss_prose": 3.204273303349813} +{"step": 4662, "train/loss": 2.2448700666427612, "train/lm_loss": 2.2448700666427612, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.887162759921211e-07, "perf/tokens_per_sec": 27187.596132938546, "train/loss_prose": 3.116279721260071, "train/loss_math": 2.015031170845032, "train/loss_code": 1.6512449979782104} +{"step": 4663, "train/loss": 2.3922233283519745, "train/lm_loss": 2.3922233283519745, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.846537348764115e-07, "perf/tokens_per_sec": 26880.506296157215, "train/loss_prose": 3.5353033542633057, "train/loss_math": 2.005888024965922, "train/loss_code": 1.2571063935756683} +{"step": 4664, "train/loss": 2.355530768632889, "train/lm_loss": 2.355530768632889, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.806030447650879e-07, "perf/tokens_per_sec": 26041.82364488262, "train/loss_code": 1.5746963421503704, "train/loss_prose": 3.7491466999053955, "train/loss_math": 2.207287867863973} +{"step": 4665, "train/loss": 2.1109792292118073, "train/lm_loss": 2.1109792292118073, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.765642076323991e-07, "perf/tokens_per_sec": 26602.378413197854, "train/loss_prose": 3.353293538093567, "train/loss_math": 2.008432388305664, "train/loss_code": 1.5410950779914856} +{"step": 4666, "train/loss": 1.9679231941699982, "train/lm_loss": 1.9679231941699982, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.725372254468343e-07, "perf/tokens_per_sec": 25948.994557919716, "train/loss_code": 1.4224363565444946, "train/loss_math": 2.0858412086963654, "train/loss_prose": 3.1327121257781982} +{"step": 4667, "train/loss": 2.4682759940624237, "train/lm_loss": 2.4682759940624237, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.685221001710956e-07, "perf/tokens_per_sec": 26460.16264960964, "train/loss_math": 2.1591131885846457, "train/loss_code": 1.9498090744018555, "train/loss_prose": 4.841719627380371} +{"step": 4668, "train/loss": 2.4642414450645447, "train/lm_loss": 2.4642414450645447, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.645188337621011e-07, "perf/tokens_per_sec": 26280.325376769117, "train/loss_prose": 3.4087687333424888, "train/loss_code": 1.52503502368927, "train/loss_math": 2.1458519299825034} +{"step": 4669, "train/loss": 2.551049381494522, "train/lm_loss": 2.551049381494522, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.605274281709928e-07, "perf/tokens_per_sec": 26299.515466774028, "train/loss_prose": 3.4245176315307617, "train/loss_code": 1.6581817865371704, "train/loss_math": 1.6969799995422363} +{"step": 4670, "train/loss": 2.6356857419013977, "train/lm_loss": 2.6356857419013977, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.565478853431394e-07, "perf/tokens_per_sec": 26278.194873112676, "train/loss_math": 2.0855972468852997, "train/loss_code": 2.0509235858917236, "train/loss_prose": 3.5640575885772705} +{"step": 4671, "train/loss": 2.238760083913803, "train/lm_loss": 2.238760083913803, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.525802072181203e-07, "perf/tokens_per_sec": 27118.11652986488, "train/loss_prose": 3.410177230834961, "train/loss_code": 1.2543829282124836, "train/loss_math": 1.958199918270111} +{"step": 4672, "train/loss": 2.0532081723213196, "train/lm_loss": 2.0532081723213196, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.4862439572973e-07, "perf/tokens_per_sec": 26616.020214695935, "train/loss_code": 1.422957718372345, "train/loss_prose": 3.551642894744873, "train/loss_math": 1.8152747750282288} +{"step": 4673, "train/loss": 2.3151752054691315, "train/lm_loss": 2.3151752054691315, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.446804528059874e-07, "perf/tokens_per_sec": 26052.0517041629, "train/loss_math": 2.131649971008301, "train/loss_prose": 3.399750590324402, "train/loss_code": 1.7756502230962117} +{"step": 4674, "train/loss": 2.6084050238132477, "train/lm_loss": 2.6084050238132477, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.407483803691216e-07, "perf/tokens_per_sec": 26891.530174154508, "train/loss_code": 1.3898880332708359, "train/loss_math": 2.2514610290527344, "train/loss_prose": 4.352075815200806} +{"step": 4675, "train/loss": 2.400118589401245, "train/lm_loss": 2.400118589401245, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.368281803355691e-07, "perf/tokens_per_sec": 26350.462569174324, "train/loss_math": 2.329460700352987, "train/loss_prose": 3.3403757413228354, "train/loss_code": 1.0957199335098267} +{"step": 4676, "train/loss": 2.3525470793247223, "train/lm_loss": 2.3525470793247223, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.329198546160015e-07, "perf/tokens_per_sec": 26063.988008653672, "train/loss_math": 2.1956196228663125, "train/loss_code": 1.7172735333442688, "train/loss_prose": 2.932990233103434} +{"step": 4677, "train/loss": 2.6394283175468445, "train/lm_loss": 2.6394283175468445, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.29023405115281e-07, "perf/tokens_per_sec": 27182.56312953112, "train/loss_prose": 3.231066417694092, "train/loss_math": 2.345911979675293, "train/loss_code": 1.3070912957191467} +{"step": 4678, "train/loss": 2.8049644231796265, "train/lm_loss": 2.8049644231796265, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.251388337324887e-07, "perf/tokens_per_sec": 27223.440200675363, "train/loss_prose": 3.573016345500946, "train/loss_math": 2.0369123220443726} +{"step": 4679, "train/loss": 2.12685489654541, "train/lm_loss": 2.12685489654541, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.212661423609184e-07, "perf/tokens_per_sec": 27222.44804877894, "train/loss_math": 1.888873279094696, "train/loss_prose": 3.3018860816955566, "train/loss_code": 1.6583301424980164} +{"step": 4680, "train/loss": 1.8990572392940521, "train/lm_loss": 1.8990572392940521, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.174053328880769e-07, "perf/tokens_per_sec": 26063.988008653672, "train/loss_math": 2.1531164169311525, "train/loss_code": 1.475625475247701} +{"step": 4681, "train/loss": 1.9221399575471878, "train/lm_loss": 1.9221399575471878, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.135564071956729e-07, "perf/tokens_per_sec": 27159.44388164842, "train/loss_code": 0.9667447010676066, "train/loss_math": 2.1963676611582437, "train/loss_prose": 2.943891406059265} +{"step": 4682, "train/loss": 2.2717068195343018, "train/lm_loss": 2.2717068195343018, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.097193671596252e-07, "perf/tokens_per_sec": 27177.832945226284, "train/loss_code": 1.4463726580142975, "train/loss_prose": 3.3954549630482993, "train/loss_math": 2.201799154281616} +{"step": 4683, "train/loss": 2.0533123314380646, "train/lm_loss": 2.0533123314380646, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.058942146500629e-07, "perf/tokens_per_sec": 27123.982146663235, "train/loss_prose": 3.3783775568008423, "train/loss_math": 2.1335023641586304, "train/loss_code": 1.089745044708252} +{"step": 4684, "train/loss": 2.2487217485904694, "train/lm_loss": 2.2487217485904694, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.020809515313142e-07, "perf/tokens_per_sec": 26762.338628723468, "train/loss_math": 2.2752038836479187, "train/loss_code": 1.7669274806976318, "train/loss_prose": 3.5881762504577637} +{"step": 4685, "train/loss": 2.4647600948810577, "train/lm_loss": 2.4647600948810577, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.982795796619256e-07, "perf/tokens_per_sec": 26069.960263493736, "train/loss_code": 1.6348679860432942, "train/loss_prose": 3.357926686604818, "train/loss_math": 2.369847893714905} +{"step": 4686, "train/loss": 1.8406221568584442, "train/lm_loss": 1.8406221568584442, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.944901008946318e-07, "perf/tokens_per_sec": 26140.01169158203, "train/loss_math": 2.0773932456970217, "train/loss_code": 1.4460038741429646} +{"step": 4687, "train/loss": 2.125745862722397, "train/lm_loss": 2.125745862722397, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.907125170763806e-07, "perf/tokens_per_sec": 26738.971154998497, "train/loss_code": 1.2431854009628296, "train/loss_prose": 3.3102556069691977, "train/loss_math": 2.1024582386016846} +{"step": 4688, "train/loss": 2.5624616146087646, "train/lm_loss": 2.5624616146087646, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.869468300483183e-07, "perf/tokens_per_sec": 26349.290548385365, "train/loss_math": 2.248997390270233, "train/loss_prose": 3.4451568126678467, "train/loss_code": 1.1682320833206177} +{"step": 4689, "train/loss": 2.2240808308124542, "train/lm_loss": 2.2240808308124542, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.83193041645802e-07, "perf/tokens_per_sec": 26859.241469233584, "train/loss_math": 2.0462945302327475, "train/loss_code": 0.9426940977573395, "train/loss_prose": 3.25612465540568} +{"step": 4690, "train/loss": 2.4607839584350586, "train/lm_loss": 2.4607839584350586, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.794511536983794e-07, "perf/tokens_per_sec": 26899.614483939964, "train/loss_prose": 3.2630794843037925, "train/loss_math": 2.100880801677704, "train/loss_code": 1.4935100078582764} +{"step": 4691, "train/loss": 2.7185018062591553, "train/lm_loss": 2.7185018062591553, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.757211680297969e-07, "perf/tokens_per_sec": 26325.872007477952, "train/loss_prose": 3.2690821647644044, "train/loss_math": 2.0626875162124634, "train/loss_code": 1.277228832244873} +{"step": 4692, "train/loss": 2.1567353308200836, "train/lm_loss": 2.1567353308200836, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.720030864580117e-07, "perf/tokens_per_sec": 26932.422594562056, "train/loss_math": 2.338987350463867, "train/loss_code": 1.5533379316329956, "train/loss_prose": 3.2379190921783447} +{"step": 4693, "train/loss": 2.2692074179649353, "train/lm_loss": 2.2692074179649353, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.68296910795163e-07, "perf/tokens_per_sec": 27228.617818555857, "train/loss_prose": 3.0662783781687417, "train/loss_code": 1.3605493307113647, "train/loss_math": 2.077908436457316} +{"step": 4694, "train/loss": 2.69108384847641, "train/lm_loss": 2.69108384847641, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.646026428476031e-07, "perf/tokens_per_sec": 26241.62825197043, "train/loss_code": 1.5204865336418152, "train/loss_prose": 3.574330508708954, "train/loss_math": 2.0951874256134033} +{"step": 4695, "train/loss": 2.195894628763199, "train/lm_loss": 2.195894628763199, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.609202844158723e-07, "perf/tokens_per_sec": 26041.7446949773, "train/loss_prose": 3.1097963651021323, "train/loss_math": 1.8874793648719788, "train/loss_code": 1.4876030683517456} +{"step": 4696, "train/loss": 2.184912323951721, "train/lm_loss": 2.184912323951721, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.572498372947016e-07, "perf/tokens_per_sec": 25516.450934669665, "train/loss_code": 1.5182817379633586, "train/loss_math": 2.1920213997364044, "train/loss_prose": 4.156368255615234} +{"step": 4697, "train/loss": 2.12711963057518, "train/lm_loss": 2.12711963057518, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.535913032730294e-07, "perf/tokens_per_sec": 25657.296530712825, "train/loss_code": 1.1051208972930908, "train/loss_math": 2.471254825592041, "train/loss_prose": 3.3750728766123452} +{"step": 4698, "train/loss": 2.351836711168289, "train/lm_loss": 2.351836711168289, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.499446841339739e-07, "perf/tokens_per_sec": 26200.886969305993, "train/loss_code": 2.1119621992111206, "train/loss_math": 1.9540688395500183, "train/loss_prose": 3.387247323989868} +{"step": 4699, "train/loss": 2.165966421365738, "train/lm_loss": 2.165966421365738, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.463099816548579e-07, "perf/tokens_per_sec": 24986.429246270913, "train/loss_prose": 3.475682258605957, "train/loss_math": 2.4572336673736572, "train/loss_code": 1.365474909543991} +{"step": 4700, "train/loss": 2.2040207386016846, "train/lm_loss": 2.2040207386016846, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.426871976071918e-07, "perf/tokens_per_sec": 25546.919675292906, "train/loss_code": 1.7574835419654846, "train/loss_math": 2.5010865529378257, "train/loss_prose": 3.0989720821380615} +{"step": 4701, "train/loss": 2.4985973238945007, "train/lm_loss": 2.4985973238945007, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.390763337566745e-07, "perf/tokens_per_sec": 25676.89194250604, "train/loss_code": 1.583890438079834, "train/loss_prose": 3.4685922463734946, "train/loss_math": 2.4156655073165894} +{"step": 4702, "train/loss": 2.075116455554962, "train/lm_loss": 2.075116455554962, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.354773918631983e-07, "perf/tokens_per_sec": 25858.07029896552, "train/loss_math": 2.232693672180176, "train/loss_code": 1.4599097569783528, "train/loss_prose": 3.2904274463653564} +{"step": 4703, "train/loss": 2.6479512751102448, "train/lm_loss": 2.6479512751102448, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.318903736808406e-07, "perf/tokens_per_sec": 25823.904856674733, "train/loss_math": 2.05182147026062, "train/loss_code": 1.6985285878181458, "train/loss_prose": 3.4207273721694946} +{"step": 4704, "train/loss": 2.0263121724128723, "train/lm_loss": 2.0263121724128723, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.283152809578751e-07, "perf/tokens_per_sec": 26180.64352450835, "train/loss_math": 1.9364497065544128, "train/loss_prose": 3.18094265460968, "train/loss_code": 1.4939281344413757} +{"step": 4705, "train/loss": 2.1559221744537354, "train/lm_loss": 2.1559221744537354, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.247521154367552e-07, "perf/tokens_per_sec": 25725.144924411223, "train/loss_math": 2.029652738571167, "train/loss_code": 1.8723263144493103, "train/loss_prose": 3.3544609546661377} +{"step": 4706, "train/loss": 1.9733538925647736, "train/lm_loss": 1.9733538925647736, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.212008788541278e-07, "perf/tokens_per_sec": 25746.31026788056, "train/loss_math": 2.0652732849121094, "train/loss_code": 1.8814344704151154} +{"step": 4707, "train/loss": 2.014468103647232, "train/lm_loss": 2.014468103647232, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.176615729408168e-07, "perf/tokens_per_sec": 26083.615757174568, "train/loss_math": 1.9423753499984742, "train/loss_code": 1.568298101425171, "train/loss_prose": 3.2672717571258545} +{"step": 4708, "train/loss": 1.9665639996528625, "train/lm_loss": 1.9665639996528625, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.141341994218396e-07, "perf/tokens_per_sec": 26491.49842175465, "train/loss_prose": 3.1637028455734253, "train/loss_code": 1.4119320213794708, "train/loss_math": 1.8786889910697937} +{"step": 4709, "train/loss": 2.2811955511569977, "train/lm_loss": 2.2811955511569977, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.106187600163987e-07, "perf/tokens_per_sec": 25912.16521167293, "train/loss_prose": 3.3163618246714273, "train/loss_code": 1.5157668441534042, "train/loss_math": 2.2374119758605957} +{"step": 4710, "train/loss": 2.907840073108673, "train/lm_loss": 2.907840073108673, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.071152564378734e-07, "perf/tokens_per_sec": 26204.48375248624, "train/loss_prose": 3.5372478485107424, "train/loss_math": 2.1678836345672607, "train/loss_code": 1.2407135963439941} +{"step": 4711, "train/loss": 2.3996710181236267, "train/lm_loss": 2.3996710181236267, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.036236903938285e-07, "perf/tokens_per_sec": 25572.968636405585, "train/loss_prose": 3.3859129746754966, "train/loss_code": 1.160318911075592, "train/loss_math": 2.23966375986735} +{"step": 4712, "train/loss": 2.937492221593857, "train/lm_loss": 2.937492221593857, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.001440635860111e-07, "perf/tokens_per_sec": 25052.598000443308, "train/loss_prose": 3.5583877086639406, "train/loss_code": 1.8922410607337952, "train/loss_math": 1.9235172271728516} +{"step": 4713, "train/loss": 2.4866830706596375, "train/lm_loss": 2.4866830706596375, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.966763777103505e-07, "perf/tokens_per_sec": 25370.659869601495, "train/loss_math": 1.9439932505289714, "train/loss_prose": 3.7583581606547036, "train/loss_code": 1.3932055234909058} +{"step": 4714, "train/loss": 2.1060831546783447, "train/lm_loss": 2.1060831546783447, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.932206344569562e-07, "perf/tokens_per_sec": 25523.4261434374, "train/loss_math": 2.067650467157364, "train/loss_code": 1.09735307097435, "train/loss_prose": 3.1916781663894653} +{"step": 4715, "train/loss": 2.421222299337387, "train/lm_loss": 2.421222299337387, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.897768355101084e-07, "perf/tokens_per_sec": 25900.01610697783, "train/loss_math": 1.8913448452949524, "train/loss_prose": 3.3369531631469727, "train/loss_code": 1.7935398817062378} +{"step": 4716, "train/loss": 2.64290714263916, "train/lm_loss": 2.64290714263916, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.863449825482813e-07, "perf/tokens_per_sec": 25847.410626405785, "train/loss_prose": 3.3410168886184692, "train/loss_math": 2.4174128770828247, "train/loss_code": 1.4721819162368774} +{"step": 4717, "train/loss": 2.309372305870056, "train/lm_loss": 2.309372305870056, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.82925077244109e-07, "perf/tokens_per_sec": 25946.995810389148, "train/loss_math": 1.9839408695697784, "train/loss_prose": 3.7933579683303833, "train/loss_code": 1.4762493968009949} +{"step": 4718, "train/loss": 2.572647988796234, "train/lm_loss": 2.572647988796234, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.795171212644223e-07, "perf/tokens_per_sec": 26709.746463807864, "train/loss_prose": 3.255692780017853, "train/loss_code": 1.5147981643676758, "train/loss_math": 2.2644087076187134} +{"step": 4719, "train/loss": 2.7545599341392517, "train/lm_loss": 2.7545599341392517, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.7612111627021175e-07, "perf/tokens_per_sec": 25457.918812626514, "train/loss_prose": 3.617177367210388, "train/loss_code": 0.8705275058746338, "train/loss_math": 2.2324140071868896} +{"step": 4720, "train/loss": 1.6253501772880554, "train/lm_loss": 1.6253501772880554, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.727370639166506e-07, "perf/tokens_per_sec": 26298.54925810891, "train/loss_code": 1.2723123550415039, "train/loss_math": 2.2137463887532554} +{"step": 4721, "train/loss": 2.4593071043491364, "train/lm_loss": 2.4593071043491364, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.693649658530808e-07, "perf/tokens_per_sec": 25677.582737597917, "train/loss_math": 2.2258639335632324, "train/loss_prose": 3.672650416692098, "train/loss_code": 0.9894566833972931} +{"step": 4722, "train/loss": 1.801760971546173, "train/lm_loss": 1.801760971546173, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.6600482372302636e-07, "perf/tokens_per_sec": 25635.8946801383, "train/loss_prose": 3.124928116798401, "train/loss_math": 1.8317604064941406, "train/loss_code": 1.125177577137947} +{"step": 4723, "train/loss": 2.0183428525924683, "train/lm_loss": 2.0183428525924683, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.6265663916417735e-07, "perf/tokens_per_sec": 26852.314558973165, "train/loss_code": 1.151013453801473, "train/loss_math": 1.8960235913594563, "train/loss_prose": 3.5028159618377686} +{"step": 4724, "train/loss": 2.617266535758972, "train/lm_loss": 2.617266535758972, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.5932041380840065e-07, "perf/tokens_per_sec": 25949.4648962089, "train/loss_math": 2.07355535030365, "train/loss_code": 2.6963491439819336, "train/loss_prose": 3.3158538341522217} +{"step": 4725, "train/loss": 2.5050957798957825, "train/lm_loss": 2.5050957798957825, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.5599614928173164e-07, "perf/tokens_per_sec": 25742.18241426949, "train/loss_prose": 3.379653215408325, "train/loss_code": 1.8668754895528157, "train/loss_math": 2.1505892276763916} +{"step": 4726, "train/loss": 2.6054677069187164, "train/lm_loss": 2.6054677069187164, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.5268384720437707e-07, "perf/tokens_per_sec": 25218.748893918975, "train/loss_prose": 3.59316356976827, "train/loss_code": 1.7874034643173218, "train/loss_math": 2.163147727648417} +{"step": 4727, "train/loss": 2.3700434267520905, "train/lm_loss": 2.3700434267520905, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.493835091907067e-07, "perf/tokens_per_sec": 25528.963534020055, "train/loss_code": 1.7131290197372437, "train/loss_prose": 3.4649008909861245} +{"step": 4728, "train/loss": 2.6871670484542847, "train/lm_loss": 2.6871670484542847, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.4609513684926995e-07, "perf/tokens_per_sec": 26046.9564157417, "train/loss_prose": 3.484579861164093, "train/loss_math": 1.8897542357444763} +{"step": 4729, "train/loss": 2.048152595758438, "train/lm_loss": 2.048152595758438, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.4281873178278475e-07, "perf/tokens_per_sec": 26046.443047608653, "train/loss_prose": 3.354886054992676, "train/loss_math": 1.9623871644337971, "train/loss_code": 1.2627622485160828} +{"step": 4730, "train/loss": 2.4677318036556244, "train/lm_loss": 2.4677318036556244, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.395542955881238e-07, "perf/tokens_per_sec": 26832.22575475971, "train/loss_math": 2.394255797068278, "train/loss_prose": 3.361034870147705, "train/loss_code": 1.2379913330078125} +{"step": 4731, "train/loss": 1.7869396209716797, "train/lm_loss": 1.7869396209716797, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.3630182985633096e-07, "perf/tokens_per_sec": 25179.532583413333, "train/loss_math": 2.084012985229492, "train/loss_code": 1.2915185242891312, "train/loss_prose": 2.8774032592773438} +{"step": 4732, "train/loss": 1.8967758417129517, "train/lm_loss": 1.8967758417129517, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.3306133617262713e-07, "perf/tokens_per_sec": 26826.234295681068, "train/loss_code": 1.3337147235870361, "train/loss_math": 2.234612536430359} +{"step": 4733, "train/loss": 2.2329885363578796, "train/lm_loss": 2.2329885363578796, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.2983281611638225e-07, "perf/tokens_per_sec": 26712.985881504148, "train/loss_code": 1.5102156400680542, "train/loss_math": 2.139197587966919, "train/loss_prose": 3.4578338861465454} +{"step": 4734, "train/loss": 1.8963377475738525, "train/lm_loss": 1.8963377475738525, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.26616271261146e-07, "perf/tokens_per_sec": 25754.45412134368, "train/loss_code": 1.265314022699992, "train/loss_math": 2.040348529815674, "train/loss_prose": 3.213365077972412} +{"step": 4735, "train/loss": 1.9898770451545715, "train/lm_loss": 1.9898770451545715, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.234117031746143e-07, "perf/tokens_per_sec": 25738.32580354227, "train/loss_math": 2.349642594655355, "train/loss_code": 1.4226678311824799, "train/loss_prose": 3.179417848587036} +{"step": 4736, "train/loss": 2.2917342483997345, "train/lm_loss": 2.2917342483997345, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.2021911341865726e-07, "perf/tokens_per_sec": 26385.67174008533, "train/loss_prose": 3.478877147038778, "train/loss_code": 1.2526697715123494, "train/loss_math": 2.069616436958313} +{"step": 4737, "train/loss": 2.1143911480903625, "train/lm_loss": 2.1143911480903625, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.170385035493107e-07, "perf/tokens_per_sec": 25962.60205888931, "train/loss_code": 1.4053154587745667, "train/loss_prose": 3.21599543094635, "train/loss_math": 2.4309380054473877} +{"step": 4738, "train/loss": 2.2965813279151917, "train/lm_loss": 2.2965813279151917, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.138698751167597e-07, "perf/tokens_per_sec": 26665.01499181263, "train/loss_prose": 3.5679968198140464, "train/loss_code": 1.1741109291712444, "train/loss_math": 2.073163390159607} +{"step": 4739, "train/loss": 2.3913316130638123, "train/lm_loss": 2.3913316130638123, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.107132296653549e-07, "perf/tokens_per_sec": 26203.844252193332, "train/loss_prose": 3.59867262840271, "train/loss_code": 0.9594829082489014, "train/loss_math": 2.138556639353434} +{"step": 4740, "train/loss": 2.374259799718857, "train/lm_loss": 2.374259799718857, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.075685687336073e-07, "perf/tokens_per_sec": 26606.457123476273, "train/loss_prose": 3.1365859508514404, "train/loss_code": 1.2301531434059143, "train/loss_math": 1.9937142729759216} +{"step": 4741, "train/loss": 2.348614275455475, "train/lm_loss": 2.348614275455475, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.0443589385418536e-07, "perf/tokens_per_sec": 25642.590882631266, "train/loss_code": 1.588545838991801, "train/loss_math": 2.3738139470418296, "train/loss_prose": 3.4509177207946777} +{"step": 4742, "train/loss": 2.3398976922035217, "train/lm_loss": 2.3398976922035217, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.013152065539205e-07, "perf/tokens_per_sec": 26229.648865078118, "train/loss_prose": 2.9542264540990195, "train/loss_code": 1.917382836341858, "train/loss_math": 2.007245580355326} +{"step": 4743, "train/loss": 2.2208667397499084, "train/lm_loss": 2.2208667397499084, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.9820650835379614e-07, "perf/tokens_per_sec": 25263.584697621412, "train/loss_code": 1.3145324289798737, "train/loss_math": 2.098244458436966, "train/loss_prose": 3.3724453449249268} +{"step": 4744, "train/loss": 2.1241301894187927, "train/lm_loss": 2.1241301894187927, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.95109800768953e-07, "perf/tokens_per_sec": 25967.899900390126, "train/loss_math": 2.12156879901886, "train/loss_code": 1.4466423590977986, "train/loss_prose": 3.144203782081604} +{"step": 4745, "train/loss": 2.43496435880661, "train/lm_loss": 2.43496435880661, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.920250853086893e-07, "perf/tokens_per_sec": 25936.45849002767, "train/loss_code": 1.6894125938415527, "train/loss_prose": 3.448447863260905, "train/loss_math": 2.0330666303634644} +{"step": 4746, "train/loss": 2.337972968816757, "train/lm_loss": 2.337972968816757, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.8895236347645523e-07, "perf/tokens_per_sec": 25892.13114961636, "train/loss_prose": 3.385510047276815, "train/loss_code": 1.51308074593544, "train/loss_math": 2.4949305057525635} +{"step": 4747, "train/loss": 2.051767885684967, "train/lm_loss": 2.051767885684967, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.8589163676986674e-07, "perf/tokens_per_sec": 24160.010215346363, "train/loss_code": 1.5298550426959991, "train/loss_math": 2.2228986422220864, "train/loss_prose": 3.626026153564453} +{"step": 4748, "train/loss": 2.2030873000621796, "train/lm_loss": 2.2030873000621796, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.828429066806749e-07, "perf/tokens_per_sec": 25475.510676663187, "train/loss_math": 2.218016654253006, "train/loss_prose": 3.178609848022461, "train/loss_code": 1.197706550359726} +{"step": 4749, "train/loss": 2.3230473399162292, "train/lm_loss": 2.3230473399162292, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.7980617469479953e-07, "perf/tokens_per_sec": 25279.458126716087, "train/loss_code": 1.6784482598304749, "train/loss_prose": 3.797058582305908, "train/loss_math": 2.138234317302704} +{"step": 4750, "train/loss": 2.0082426369190216, "train/lm_loss": 2.0082426369190216, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.767814422923038e-07, "perf/tokens_per_sec": 26083.378148096413, "train/loss_code": 1.1722548604011536, "train/loss_prose": 3.1108274459838867, "train/loss_math": 2.1091737747192383} +{"step": 4751, "train/loss": 1.7948530614376068, "train/lm_loss": 1.7948530614376068, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.7376871094740585e-07, "perf/tokens_per_sec": 25876.453579427613, "train/loss_code": 0.9819373860955238, "train/loss_prose": 3.735398054122925, "train/loss_math": 2.2318920294443765} +{"step": 4752, "train/loss": 2.410021126270294, "train/lm_loss": 2.410021126270294, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.707679821284782e-07, "perf/tokens_per_sec": 25809.472935902482, "train/loss_prose": 3.2900725603103638, "train/loss_math": 2.16278076171875, "train/loss_code": 1.8861188888549805} +{"step": 4753, "train/loss": 2.245835542678833, "train/lm_loss": 2.245835542678833, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.677792572980371e-07, "perf/tokens_per_sec": 25467.428991582936, "train/loss_prose": 3.3587961991628013, "train/loss_code": 1.36791064341863, "train/loss_math": 1.8932817578315735} +{"step": 4754, "train/loss": 2.3983419835567474, "train/lm_loss": 2.3983419835567474, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.6480253791274786e-07, "perf/tokens_per_sec": 25590.605560562162, "train/loss_math": 2.3344727754592896, "train/loss_code": 1.990250051021576, "train/loss_prose": 3.2783948183059692} +{"step": 4755, "train/loss": 2.559612959623337, "train/lm_loss": 2.559612959623337, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.6183782542343057e-07, "perf/tokens_per_sec": 25642.743979581177, "train/loss_prose": 3.4653360843658447, "train/loss_math": 2.0232322216033936, "train/loss_code": 1.2845471501350403} +{"step": 4756, "train/loss": 1.630852848291397, "train/lm_loss": 1.630852848291397, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.588851212750488e-07, "perf/tokens_per_sec": 26112.239687258712, "train/loss_code": 1.3958796858787537, "train/loss_math": 2.202968120574951, "train/loss_prose": 2.4685771465301514} +{"step": 4757, "train/loss": 2.281307637691498, "train/lm_loss": 2.281307637691498, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.55944426906718e-07, "perf/tokens_per_sec": 24401.80214504553, "train/loss_prose": 3.2435969511667886, "train/loss_code": 1.18840891122818, "train/loss_math": 2.0476176341374717} +{"step": 4758, "train/loss": 2.4202654659748077, "train/lm_loss": 2.4202654659748077, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.5301574375169175e-07, "perf/tokens_per_sec": 25919.241160913247, "train/loss_code": 1.2917538285255432, "train/loss_math": 2.1817009449005127, "train/loss_prose": 3.4111711978912354} +{"step": 4759, "train/loss": 2.5731140077114105, "train/lm_loss": 2.5731140077114105, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.5009907323737825e-07, "perf/tokens_per_sec": 25454.86347762311, "train/loss_prose": 3.420236885547638, "train/loss_code": 1.5524994532267253, "train/loss_math": 2.2464659214019775} +{"step": 4760, "train/loss": 2.5672041177749634, "train/lm_loss": 2.5672041177749634, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.471944167853236e-07, "perf/tokens_per_sec": 26067.42864252604, "train/loss_prose": 3.25240159034729, "train/loss_math": 2.1555423736572266, "train/loss_code": 2.156900405883789} +{"step": 4761, "train/loss": 2.2297243177890778, "train/lm_loss": 2.2297243177890778, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.44301775811226e-07, "perf/tokens_per_sec": 26655.333952916826, "train/loss_code": 1.121001958847046, "train/loss_math": 2.201263815164566, "train/loss_prose": 3.3953676223754883} +{"step": 4762, "train/loss": 2.202508807182312, "train/lm_loss": 2.202508807182312, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.414211517249244e-07, "perf/tokens_per_sec": 25686.336027080004, "train/loss_prose": 3.107151826222738, "train/loss_code": 1.535264492034912, "train/loss_math": 2.1575567722320557} +{"step": 4763, "train/loss": 2.4431680142879486, "train/lm_loss": 2.4431680142879486, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.3855254593039564e-07, "perf/tokens_per_sec": 24594.07187307186, "train/loss_math": 2.141164779663086, "train/loss_code": 1.7903198599815369, "train/loss_prose": 4.050867676734924} +{"step": 4764, "train/loss": 2.161422312259674, "train/lm_loss": 2.161422312259674, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.3569595982576583e-07, "perf/tokens_per_sec": 24633.038323449455, "train/loss_math": 2.1464023113250734, "train/loss_prose": 3.388347864151001, "train/loss_code": 1.5855092406272888} +{"step": 4765, "train/loss": 2.419156402349472, "train/lm_loss": 2.419156402349472, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.328513948032991e-07, "perf/tokens_per_sec": 25894.58980426676, "train/loss_math": 2.2024890184402466, "train/loss_prose": 3.069158911705017} +{"step": 4766, "train/loss": 2.6871185898780823, "train/lm_loss": 2.6871185898780823, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.30018852249403e-07, "perf/tokens_per_sec": 25999.575020241384, "train/loss_prose": 3.304642677307129, "train/loss_code": 1.983487327893575, "train/loss_math": 2.327915906906128} +{"step": 4767, "train/loss": 2.928195834159851, "train/lm_loss": 2.928195834159851, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.2719833354462324e-07, "perf/tokens_per_sec": 25946.133700629478, "train/loss_code": 1.517451524734497, "train/loss_prose": 3.639009952545166, "train/loss_math": 2.1956140995025635} +{"step": 4768, "train/loss": 2.351607620716095, "train/lm_loss": 2.351607620716095, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.2438984006364914e-07, "perf/tokens_per_sec": 25889.946070741276, "train/loss_code": 0.9156313389539719, "train/loss_prose": 3.174922227859497, "train/loss_math": 2.1409546732902527} +{"step": 4769, "train/loss": 2.6389718055725098, "train/lm_loss": 2.6389718055725098, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.215933731753024e-07, "perf/tokens_per_sec": 25176.986452978177, "train/loss_prose": 3.0912376642227173, "train/loss_code": 1.9668958187103271, "train/loss_math": 2.4065158367156982} +{"step": 4770, "train/loss": 2.1518827080726624, "train/lm_loss": 2.1518827080726624, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.1880893424254823e-07, "perf/tokens_per_sec": 25887.95640905091, "train/loss_math": 2.0647412737210593, "train/loss_code": 1.0784417390823364, "train/loss_prose": 3.7481720447540283} +{"step": 4771, "train/loss": 2.1004420816898346, "train/lm_loss": 2.1004420816898346, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.1603652462249e-07, "perf/tokens_per_sec": 26391.508542741358, "train/loss_code": 1.5888157486915588, "train/loss_math": 2.1512553691864014, "train/loss_prose": 3.9945075511932373} +{"step": 4772, "train/loss": 2.307413160800934, "train/lm_loss": 2.307413160800934, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.1327614566636633e-07, "perf/tokens_per_sec": 26417.52178385751, "train/loss_math": 2.0505709886550902, "train/loss_prose": 3.1242884397506714, "train/loss_code": 1.957873821258545} +{"step": 4773, "train/loss": 2.524849057197571, "train/lm_loss": 2.524849057197571, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.105277987195537e-07, "perf/tokens_per_sec": 26892.45625481344, "train/loss_prose": 3.976581891377767, "train/loss_math": 2.230207681655884, "train/loss_code": 1.5097097754478455} +{"step": 4774, "train/loss": 2.095047652721405, "train/lm_loss": 2.095047652721405, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.077914851215585e-07, "perf/tokens_per_sec": 25823.089721640605, "train/loss_code": 0.9374565184116364, "train/loss_math": 2.1341007351875305, "train/loss_prose": 3.1745325326919556} +{"step": 4775, "train/loss": 2.0422702729701996, "train/lm_loss": 2.0422702729701996, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.050672062060278e-07, "perf/tokens_per_sec": 25789.3650093596, "train/loss_math": 1.8421985507011414, "train/loss_code": 1.6651801466941833, "train/loss_prose": 2.9965219497680664} +{"step": 4776, "train/loss": 2.3790913224220276, "train/lm_loss": 2.3790913224220276, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.0235496330074387e-07, "perf/tokens_per_sec": 25608.76306943784, "train/loss_code": 1.4549628694852192, "train/loss_math": 2.0917136669158936, "train/loss_prose": 3.494804541269938} +{"step": 4777, "train/loss": 2.7688754200935364, "train/lm_loss": 2.7688754200935364, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.9965475772762154e-07, "perf/tokens_per_sec": 26215.720066470632, "train/loss_prose": 3.5441142916679382, "train/loss_code": 1.128492832183838, "train/loss_math": 2.2820173104604087} +{"step": 4778, "train/loss": 2.4098251312971115, "train/lm_loss": 2.4098251312971115, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.9696659080270804e-07, "perf/tokens_per_sec": 26107.95399615824, "train/loss_prose": 3.356225609779358, "train/loss_math": 1.9508193731307983, "train/loss_code": 0.9760302901268005} +{"step": 4779, "train/loss": 2.4318938851356506, "train/lm_loss": 2.4318938851356506, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.942904638361804e-07, "perf/tokens_per_sec": 25689.94657706519, "train/loss_code": 1.516747236251831, "train/loss_math": 1.9289807081222534, "train/loss_prose": 3.6823157469431558} +{"step": 4780, "train/loss": 2.5354144275188446, "train/lm_loss": 2.5354144275188446, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.916263781323508e-07, "perf/tokens_per_sec": 27079.262114436762, "train/loss_math": 2.2815884749094644, "train/loss_code": 1.8647134900093079, "train/loss_prose": 3.2363741397857666} +{"step": 4781, "train/loss": 2.229593336582184, "train/lm_loss": 2.229593336582184, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.8897433498966676e-07, "perf/tokens_per_sec": 26512.26654444388, "train/loss_code": 1.5482746809720993, "train/loss_prose": 3.1997389793395996, "train/loss_math": 2.044430732727051} +{"step": 4782, "train/loss": 2.8686184883117676, "train/lm_loss": 2.8686184883117676, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.863343357006998e-07, "perf/tokens_per_sec": 26711.32454350963, "train/loss_prose": 3.5093981623649597, "train/loss_math": 2.22783887386322} +{"step": 4783, "train/loss": 2.148684084415436, "train/lm_loss": 2.148684084415436, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.8370638155215123e-07, "perf/tokens_per_sec": 26230.810266432552, "train/loss_code": 1.470072603225708, "train/loss_math": 2.5188920497894287, "train/loss_prose": 3.660108804702759} +{"step": 4784, "train/loss": 2.976423680782318, "train/lm_loss": 2.976423680782318, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.810904738248549e-07, "perf/tokens_per_sec": 25813.777546049696, "train/loss_prose": 3.3722151279449464, "train/loss_math": 2.3167709509531655} +{"step": 4785, "train/loss": 2.4692540764808655, "train/lm_loss": 2.4692540764808655, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.784866137937714e-07, "perf/tokens_per_sec": 25738.595728678978, "train/loss_prose": 3.2711284160614014, "train/loss_math": 2.168185830116272, "train/loss_code": 1.2679041624069214} +{"step": 4786, "train/loss": 2.431557685136795, "train/lm_loss": 2.431557685136795, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.75894802727994e-07, "perf/tokens_per_sec": 26416.546885114825, "train/loss_math": 2.129103273153305, "train/loss_code": 0.863885760307312, "train/loss_prose": 3.3573877016703286} +{"step": 4787, "train/loss": 2.099230170249939, "train/lm_loss": 2.099230170249939, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.7331504189073987e-07, "perf/tokens_per_sec": 26132.099199299082, "train/loss_code": 1.4057606061299641, "train/loss_math": 1.9457262357076008, "train/loss_prose": 3.3696902990341187} +{"step": 4788, "train/loss": 2.1089634597301483, "train/lm_loss": 2.1089634597301483, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.7074733253934783e-07, "perf/tokens_per_sec": 26017.293278309924, "train/loss_math": 1.9819132328033446, "train/loss_code": 0.6699316501617432, "train/loss_prose": 3.14610493183136} +{"step": 4789, "train/loss": 1.9440182745456696, "train/lm_loss": 1.9440182745456696, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.681916759252917e-07, "perf/tokens_per_sec": 26723.581771205198, "train/loss_code": 1.6023741761843364, "train/loss_prose": 3.2426254749298096, "train/loss_math": 2.6952757835388184} +{"step": 4790, "train/loss": 2.08945095539093, "train/lm_loss": 2.08945095539093, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.656480732941696e-07, "perf/tokens_per_sec": 25844.02782391027, "train/loss_code": 1.924817979335785, "train/loss_math": 2.1004793643951416, "train/loss_prose": 2.7148971557617188} +{"step": 4791, "train/loss": 2.1748076379299164, "train/lm_loss": 2.1748076379299164, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.6311652588569824e-07, "perf/tokens_per_sec": 25966.015975720533, "train/loss_prose": 3.0241851806640625, "train/loss_math": 2.146878103415171, "train/loss_code": 1.4930075407028198} +{"step": 4792, "train/loss": 1.6485752761363983, "train/lm_loss": 1.6485752761363983, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.605970349337267e-07, "perf/tokens_per_sec": 25944.6055559331, "train/loss_code": 1.2473820000886917, "train/loss_math": 2.049768477678299} +{"step": 4793, "train/loss": 2.119377911090851, "train/lm_loss": 2.119377911090851, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.580896016662199e-07, "perf/tokens_per_sec": 26158.559203604367, "train/loss_math": 2.0435033241907754, "train/loss_code": 1.5216742753982544, "train/loss_prose": 3.1297450065612793} +{"step": 4794, "train/loss": 2.451063632965088, "train/lm_loss": 2.451063632965088, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.555942273052753e-07, "perf/tokens_per_sec": 24887.179939447495, "train/loss_code": 1.2062626282374065, "train/loss_prose": 3.5246431827545166, "train/loss_math": 1.8911491632461548} +{"step": 4795, "train/loss": 2.045602411031723, "train/lm_loss": 2.045602411031723, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.531109130671061e-07, "perf/tokens_per_sec": 25645.270343213357, "train/loss_code": 1.1936672806739808, "train/loss_prose": 3.465494473775228} +{"step": 4796, "train/loss": 2.159080892801285, "train/lm_loss": 2.159080892801285, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.5063966016204975e-07, "perf/tokens_per_sec": 25312.644755395184, "train/loss_prose": 2.955238461494446, "train/loss_math": 1.9775283734003704, "train/loss_code": 1.8098617394765217} +{"step": 4797, "train/loss": 2.760391443967819, "train/lm_loss": 2.760391443967819, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.4818046979456497e-07, "perf/tokens_per_sec": 26249.085834247526, "train/loss_math": 2.0366519689559937, "train/loss_prose": 3.538239896297455, "train/loss_code": 1.8202153444290161} +{"step": 4798, "train/loss": 1.938315898180008, "train/lm_loss": 1.938315898180008, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.457333431632264e-07, "perf/tokens_per_sec": 26752.50348657226, "train/loss_math": 2.1372445821762085, "train/loss_code": 1.001746505498886, "train/loss_prose": 2.8168108463287354} +{"step": 4799, "train/loss": 2.7384590208530426, "train/lm_loss": 2.7384590208530426, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.4329828146074095e-07, "perf/tokens_per_sec": 26282.05415245849, "train/loss_math": 2.1854027211666107, "train/loss_prose": 3.743460734685262, "train/loss_code": 1.93567955493927} +{"step": 4800, "train/loss": 2.121221274137497, "train/lm_loss": 2.121221274137497, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.408752858739288e-07, "perf/tokens_per_sec": 25125.216350527226, "train/loss_code": 1.3891905546188354, "train/loss_math": 2.1611336867014566, "train/loss_prose": 3.1593987941741943} +{"step": 4800, "eval/loss": 2.1566788039028273, "eval/lm_loss": 2.1566788039028273, "eval/lb_loss": 0.0, "eval/z_loss": 0.0, "eval/perplexity": 8.64238687982859, "eval/loss_code": 1.553814276372461, "eval/ppl_code": 4.729475348497888, "eval/loss_prose": 3.484818563126681, "eval/ppl_prose": 32.61650873560643, "eval/loss_math": 2.023372275825219, "eval/ppl_math": 7.563789156658054} +{"step": 4801, "train/loss": 2.5615840554237366, "train/lm_loss": 2.5615840554237366, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.384643575837203e-07, "perf/tokens_per_sec": 25675.932566536743, "train/loss_prose": 3.1599207719167075, "train/loss_math": 2.3450063467025757, "train/loss_code": 1.6328849792480469} +{"step": 4802, "train/loss": 2.2287992238998413, "train/lm_loss": 2.2287992238998413, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.3606549776517827e-07, "perf/tokens_per_sec": 26364.858635401, "train/loss_math": 2.1971155802408853, "train/loss_prose": 3.7116355895996094, "train/loss_code": 1.2719250520070393} +{"step": 4803, "train/loss": 3.0662647485733032, "train/lm_loss": 3.0662647485733032, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.3367870758747855e-07, "perf/tokens_per_sec": 26254.14015355274, "train/loss_prose": 3.645384645462036, "train/loss_math": 2.101064920425415} +{"step": 4804, "train/loss": 2.609261780977249, "train/lm_loss": 2.609261780977249, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.3130398821391007e-07, "perf/tokens_per_sec": 25834.117812293705, "train/loss_prose": 3.616844415664673, "train/loss_math": 2.4177911281585693, "train/loss_code": 1.7293258905410767} +{"step": 4805, "train/loss": 2.332393705844879, "train/lm_loss": 2.332393705844879, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.289413408018859e-07, "perf/tokens_per_sec": 25247.879608728366, "train/loss_prose": 3.8946763277053833, "train/loss_math": 2.0252708196640015, "train/loss_code": 1.5979948441187541} +{"step": 4806, "train/loss": 1.9999074339866638, "train/lm_loss": 1.9999074339866638, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.2659076650292933e-07, "perf/tokens_per_sec": 26729.527459430865, "train/loss_math": 2.2762388388315835, "train/loss_prose": 3.2371609210968018, "train/loss_code": 0.8987405101458231} +{"step": 4807, "train/loss": 3.14387583732605, "train/lm_loss": 3.14387583732605, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.2425226646268227e-07, "perf/tokens_per_sec": 25037.62840845165, "train/loss_prose": 3.2992056097303117, "train/loss_math": 2.056567430496216} +{"step": 4808, "train/loss": 2.400247424840927, "train/lm_loss": 2.400247424840927, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.219258418208997e-07, "perf/tokens_per_sec": 25892.24821782208, "train/loss_prose": 3.52884840965271, "train/loss_code": 1.4017441670099895, "train/loss_math": 2.2051010131835938} +{"step": 4809, "train/loss": 2.290368467569351, "train/lm_loss": 2.290368467569351, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.1961149371145795e-07, "perf/tokens_per_sec": 26675.6971120776, "train/loss_math": 2.2506373822689056, "train/loss_prose": 3.2043263912200928, "train/loss_code": 1.4558724761009216} +{"step": 4810, "train/loss": 2.100181519985199, "train/lm_loss": 2.100181519985199, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.1730922326233803e-07, "perf/tokens_per_sec": 25825.37999374654, "train/loss_prose": 3.135047674179077, "train/loss_math": 1.9720495144526164, "train/loss_code": 1.5384026765823364} +{"step": 4811, "train/loss": 2.4222222566604614, "train/lm_loss": 2.4222222566604614, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.1501903159563686e-07, "perf/tokens_per_sec": 26093.242564204596, "train/loss_prose": 3.5784424543380737, "train/loss_math": 2.1402495861053468, "train/loss_code": 1.5196447372436523} +{"step": 4812, "train/loss": 2.4024146795272827, "train/lm_loss": 2.4024146795272827, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.1274091982756705e-07, "perf/tokens_per_sec": 25456.900286132997, "train/loss_code": 1.2278388142585754, "train/loss_math": 2.293621778488159, "train/loss_prose": 3.2942583560943604} +{"step": 4813, "train/loss": 1.9543877840042114, "train/lm_loss": 1.9543877840042114, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.1047488906845713e-07, "perf/tokens_per_sec": 26770.428866159302, "train/loss_prose": 3.4162685871124268, "train/loss_math": 1.8699707090854645, "train/loss_code": 1.5796505610148113} +{"step": 4814, "train/loss": 2.240693747997284, "train/lm_loss": 2.240693747997284, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.0822094042274032e-07, "perf/tokens_per_sec": 26028.724513094003, "train/loss_math": 1.8691000938415527, "train/loss_prose": 3.1538097262382507, "train/loss_code": 1.147070328394572} +{"step": 4815, "train/loss": 2.317542552947998, "train/lm_loss": 2.317542552947998, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.0597907498896007e-07, "perf/tokens_per_sec": 26225.965099966263, "train/loss_prose": 3.125946819782257, "train/loss_code": 1.509138286113739} +{"step": 4816, "train/loss": 2.5095217525959015, "train/lm_loss": 2.5095217525959015, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.0374929385978125e-07, "perf/tokens_per_sec": 25462.522059782543, "train/loss_math": 2.33127498626709, "train/loss_code": 1.5420315265655518, "train/loss_prose": 3.5958427588144937} +{"step": 4817, "train/loss": 2.0731338262557983, "train/lm_loss": 2.0731338262557983, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.015315981219651e-07, "perf/tokens_per_sec": 26044.113400389604, "train/loss_math": 2.0503535866737366, "train/loss_code": 1.5160349756479263, "train/loss_prose": 3.210111975669861} +{"step": 4818, "train/loss": 2.604025721549988, "train/lm_loss": 2.604025721549988, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.993259888563942e-07, "perf/tokens_per_sec": 25494.715792597868, "train/loss_code": 1.106248527765274, "train/loss_prose": 3.307198333740234, "train/loss_math": 2.0837161540985107} +{"step": 4819, "train/loss": 1.7388393580913544, "train/lm_loss": 1.7388393580913544, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.9713246713805588e-07, "perf/tokens_per_sec": 25982.470295367584, "train/loss_code": 1.3483410716056823, "train/loss_math": 2.3896697362264} +{"step": 4820, "train/loss": 2.482149988412857, "train/lm_loss": 2.482149988412857, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.949510340360422e-07, "perf/tokens_per_sec": 26009.454893387676, "train/loss_math": 1.9668088754018147, "train/loss_code": 1.5209102630615234, "train/loss_prose": 3.638317823410034} +{"step": 4821, "train/loss": 2.6109314262866974, "train/lm_loss": 2.6109314262866974, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.9278169061355821e-07, "perf/tokens_per_sec": 25740.061135358093, "train/loss_math": 1.9818793137868245, "train/loss_prose": 3.6215951442718506, "train/loss_code": 2.038514196872711} +{"step": 4822, "train/loss": 2.470258742570877, "train/lm_loss": 2.470258742570877, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.906244379279193e-07, "perf/tokens_per_sec": 26644.998726675756, "train/loss_prose": 3.4569900035858154, "train/loss_code": 1.406126856803894, "train/loss_math": 2.1929489771525064} +{"step": 4823, "train/loss": 2.256461203098297, "train/lm_loss": 2.256461203098297, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.884792770305399e-07, "perf/tokens_per_sec": 26700.489384221102, "train/loss_prose": 3.0150888760884604, "train/loss_math": 2.567664623260498, "train/loss_code": 1.6096895039081573} +{"step": 4824, "train/loss": 2.6934736371040344, "train/lm_loss": 2.6934736371040344, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.8634620896695043e-07, "perf/tokens_per_sec": 26221.76206698658, "train/loss_code": 1.9231962362925212, "train/loss_prose": 3.524549961090088, "train/loss_math": 1.6800001859664917} +{"step": 4825, "train/loss": 2.232243150472641, "train/lm_loss": 2.232243150472641, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.842252347767748e-07, "perf/tokens_per_sec": 25975.634705405635, "train/loss_prose": 3.1559311548868814, "train/loss_math": 2.0179941654205322, "train/loss_code": 1.4513877232869465} +{"step": 4826, "train/loss": 2.309105545282364, "train/lm_loss": 2.309105545282364, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.8211635549375827e-07, "perf/tokens_per_sec": 26243.472205843264, "train/loss_code": 1.9418166875839233, "train/loss_math": 2.287350614865621, "train/loss_prose": 2.8926711082458496} +{"step": 4827, "train/loss": 2.7911866307258606, "train/lm_loss": 2.7911866307258606, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.8001957214573705e-07, "perf/tokens_per_sec": 26542.290601671033, "train/loss_math": 2.4152592420578003, "train/loss_prose": 3.398263931274414, "train/loss_code": 1.952959418296814} +{"step": 4828, "train/loss": 2.19527605175972, "train/lm_loss": 2.19527605175972, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.7793488575466034e-07, "perf/tokens_per_sec": 25814.010268584952, "train/loss_code": 1.6312339901924133, "train/loss_prose": 3.5550936460494995, "train/loss_math": 1.9635424613952637} +{"step": 4829, "train/loss": 2.2460196912288666, "train/lm_loss": 2.2460196912288666, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.7586229733657644e-07, "perf/tokens_per_sec": 25842.74492354671, "train/loss_prose": 3.2431979974110923, "train/loss_code": 1.2694088617960613, "train/loss_math": 2.215168833732605} +{"step": 4830, "train/loss": 2.0351662933826447, "train/lm_loss": 2.0351662933826447, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.7380180790164402e-07, "perf/tokens_per_sec": 26215.7600705603, "train/loss_code": 1.823799204826355, "train/loss_prose": 3.196885585784912, "train/loss_math": 1.9827243089675903} +{"step": 4831, "train/loss": 1.9383438229560852, "train/lm_loss": 1.9383438229560852, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.717534184541153e-07, "perf/tokens_per_sec": 26152.227803165693, "train/loss_math": 2.077016258239746, "train/loss_code": 1.011677473783493, "train/loss_prose": 3.0983145236968994} +{"step": 4832, "train/loss": 1.977394163608551, "train/lm_loss": 1.977394163608551, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.6971712999235556e-07, "perf/tokens_per_sec": 26395.766172907413, "train/loss_math": 2.2622159719467163, "train/loss_code": 1.6925724148750305} +{"step": 4833, "train/loss": 2.4101706445217133, "train/lm_loss": 2.4101706445217133, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.6769294350882648e-07, "perf/tokens_per_sec": 25396.98868066222, "train/loss_math": 2.1957836151123047, "train/loss_code": 0.803039938211441, "train/loss_prose": 3.32092946767807} +{"step": 4834, "train/loss": 2.1379392445087433, "train/lm_loss": 2.1379392445087433, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.6568085999008888e-07, "perf/tokens_per_sec": 26542.823702087753, "train/loss_prose": 2.845250129699707, "train/loss_math": 2.1835816303888955, "train/loss_code": 1.6207563082377117} +{"step": 4835, "train/loss": 1.870867908000946, "train/lm_loss": 1.870867908000946, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.6368088041681108e-07, "perf/tokens_per_sec": 25544.41265097413, "train/loss_code": 1.4942501187324524, "train/loss_math": 2.0137537121772766, "train/loss_prose": 3.4681849479675293} +{"step": 4836, "train/loss": 2.081443816423416, "train/lm_loss": 2.081443816423416, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.61693005763755e-07, "perf/tokens_per_sec": 26853.36386116174, "train/loss_code": 1.229160726070404, "train/loss_math": 2.2116146802902223, "train/loss_prose": 3.135154962539673} +{"step": 4837, "train/loss": 2.319332003593445, "train/lm_loss": 2.319332003593445, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.5971723699979013e-07, "perf/tokens_per_sec": 24761.740922187295, "train/loss_prose": 3.386338949203491, "train/loss_math": 2.1237975358963013, "train/loss_code": 1.8035283486048381} +{"step": 4838, "train/loss": 2.403913676738739, "train/lm_loss": 2.403913676738739, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.5775357508787947e-07, "perf/tokens_per_sec": 24780.31346902509, "train/loss_prose": 3.4081188837687173, "train/loss_code": 1.258751630783081, "train/loss_math": 2.16314967473348} +{"step": 4839, "train/loss": 2.3004029989242554, "train/lm_loss": 2.3004029989242554, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.5580202098509077e-07, "perf/tokens_per_sec": 25946.25125767784, "train/loss_code": 1.7889230251312256, "train/loss_prose": 3.4229482412338257, "train/loss_math": 2.200817823410034} +{"step": 4840, "train/loss": 2.2996610403060913, "train/lm_loss": 2.2996610403060913, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.5386257564258542e-07, "perf/tokens_per_sec": 26679.839770657232, "train/loss_code": 1.5743086338043213, "train/loss_prose": 3.8178744316101074, "train/loss_math": 2.0128710667292276} +{"step": 4841, "train/loss": 2.3233038783073425, "train/lm_loss": 2.3233038783073425, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.5193524000562675e-07, "perf/tokens_per_sec": 25781.70203148753, "train/loss_prose": 3.3926875591278076, "train/loss_code": 0.8440124690532684, "train/loss_math": 2.2401144901911416} +{"step": 4842, "train/loss": 2.0810168385505676, "train/lm_loss": 2.0810168385505676, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.5002001501357444e-07, "perf/tokens_per_sec": 25913.963153020253, "train/loss_math": 2.0587730010350547, "train/loss_code": 1.2928731242815654, "train/loss_prose": 3.296597957611084} +{"step": 4843, "train/loss": 2.2680572271347046, "train/lm_loss": 2.2680572271347046, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.4811690159988455e-07, "perf/tokens_per_sec": 25954.875012275028, "train/loss_math": 2.2766422430674234, "train/loss_code": 1.6309171915054321, "train/loss_prose": 3.2108899354934692} +{"step": 4844, "train/loss": 2.077307015657425, "train/lm_loss": 2.077307015657425, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.4622590069211516e-07, "perf/tokens_per_sec": 25791.842905762973, "train/loss_math": 1.9505012035369873, "train/loss_code": 1.5077842275301616, "train/loss_prose": 3.1217997074127197} +{"step": 4845, "train/loss": 1.9793443083763123, "train/lm_loss": 1.9793443083763123, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.4434701321191235e-07, "perf/tokens_per_sec": 25697.401347702846, "train/loss_code": 1.5125315189361572, "train/loss_prose": 3.441594123840332, "train/loss_math": 1.873619270324707} +{"step": 4846, "train/loss": 2.3601429164409637, "train/lm_loss": 2.3601429164409637, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.424802400750269e-07, "perf/tokens_per_sec": 26657.898329911848, "train/loss_code": 1.0585801402727764, "train/loss_prose": 3.141080617904663} +{"step": 4847, "train/loss": 2.238015115261078, "train/lm_loss": 2.238015115261078, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.406255821913005e-07, "perf/tokens_per_sec": 25922.91748814372, "train/loss_prose": 3.561854600906372, "train/loss_math": 2.151434858640035, "train/loss_code": 1.4420358339945476} +{"step": 4848, "train/loss": 2.22366800904274, "train/lm_loss": 2.22366800904274, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.387830404646684e-07, "perf/tokens_per_sec": 26159.993153886797, "train/loss_prose": 3.309109926223755, "train/loss_math": 2.022586166858673, "train/loss_code": 1.2722808122634888} +{"step": 4849, "train/loss": 1.9199785590171814, "train/lm_loss": 1.9199785590171814, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.3695261579316777e-07, "perf/tokens_per_sec": 26070.27675069008, "train/loss_code": 1.607580876350403, "train/loss_prose": 3.262320041656494, "train/loss_math": 2.0298019647598267} +{"step": 4850, "train/loss": 2.3332135379314423, "train/lm_loss": 2.3332135379314423, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.3513430906891834e-07, "perf/tokens_per_sec": 26266.945112675043, "train/loss_math": 2.089168816804886, "train/loss_prose": 3.58730149269104, "train/loss_code": 1.5672153234481812} +{"step": 4851, "train/loss": 2.248500734567642, "train/lm_loss": 2.248500734567642, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.333281211781473e-07, "perf/tokens_per_sec": 26091.41966044447, "train/loss_math": 2.2261199355125427, "train/loss_prose": 3.1310179233551025, "train/loss_code": 1.4107451438903809} +{"step": 4852, "train/loss": 2.332820236682892, "train/lm_loss": 2.332820236682892, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.3153405300116716e-07, "perf/tokens_per_sec": 25009.271819955164, "train/loss_code": 1.5706761280695598, "train/loss_prose": 3.338904778162638, "train/loss_math": 1.9669097065925598} +{"step": 4853, "train/loss": 2.248961865901947, "train/lm_loss": 2.248961865901947, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.2975210541238402e-07, "perf/tokens_per_sec": 26528.10979447507, "train/loss_math": 2.0708024501800537, "train/loss_prose": 3.731045365333557, "train/loss_code": 1.4390655358632405} +{"step": 4854, "train/loss": 2.2951853573322296, "train/lm_loss": 2.2951853573322296, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.2798227928029482e-07, "perf/tokens_per_sec": 26458.532609080674, "train/loss_prose": 3.107419967651367, "train/loss_code": 0.9863404631614685, "train/loss_math": 1.979560911655426} +{"step": 4855, "train/loss": 1.646009385585785, "train/lm_loss": 1.646009385585785, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.2622457546749567e-07, "perf/tokens_per_sec": 26297.381850688515, "train/loss_code": 1.428515863418579, "train/loss_prose": 2.423577308654785, "train/loss_math": 1.8009589910507202} +{"step": 4856, "train/loss": 2.328708291053772, "train/lm_loss": 2.328708291053772, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.2447899483066794e-07, "perf/tokens_per_sec": 27033.75339930794, "train/loss_math": 2.132530371348063, "train/loss_prose": 3.374270439147949, "train/loss_code": 1.827844460805257} +{"step": 4857, "train/loss": 2.067051976919174, "train/lm_loss": 2.067051976919174, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.2274553822058943e-07, "perf/tokens_per_sec": 26280.043969693586, "train/loss_math": 2.1820772886276245, "train/loss_code": 1.4138223528862, "train/loss_prose": 3.2584853172302246} +{"step": 4858, "train/loss": 2.3948414623737335, "train/lm_loss": 2.3948414623737335, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.2102420648212043e-07, "perf/tokens_per_sec": 26272.207194174192, "train/loss_math": 1.9661179184913635, "train/loss_prose": 3.277576287587484, "train/loss_code": 1.7979224522908528} +{"step": 4859, "train/loss": 2.298024445772171, "train/lm_loss": 2.298024445772171, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.193150004542204e-07, "perf/tokens_per_sec": 25542.70359830745, "train/loss_math": 1.92971533536911, "train/loss_code": 1.6687783002853394, "train/loss_prose": 3.6638888120651245} +{"step": 4860, "train/loss": 2.358134150505066, "train/lm_loss": 2.358134150505066, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.1761792096993407e-07, "perf/tokens_per_sec": 26110.61254626006, "train/loss_prose": 3.303051312764486, "train/loss_code": 1.5214173793792725, "train/loss_math": 1.9710280895233154} +{"step": 4861, "train/loss": 2.593385934829712, "train/lm_loss": 2.593385934829712, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.1593296885640259e-07, "perf/tokens_per_sec": 25552.58128251732, "train/loss_math": 2.268330478668213, "train/loss_prose": 3.1351449489593506} +{"step": 4862, "train/loss": 2.406036913394928, "train/lm_loss": 2.406036913394928, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.1426014493484404e-07, "perf/tokens_per_sec": 25632.41403676295, "train/loss_code": 1.1053760051727295, "train/loss_prose": 3.5395756562550864, "train/loss_math": 2.1396056016286216} +{"step": 4863, "train/loss": 2.090067893266678, "train/lm_loss": 2.090067893266678, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.125994500205757e-07, "perf/tokens_per_sec": 26029.94706698747, "train/loss_code": 1.2068734765052795, "train/loss_prose": 3.3619370460510254, "train/loss_math": 1.8072386980056763} +{"step": 4864, "train/loss": 2.7071571946144104, "train/lm_loss": 2.7071571946144104, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.109508849230001e-07, "perf/tokens_per_sec": 25827.04943106395, "train/loss_prose": 3.233239936828613, "train/loss_code": 1.7020909786224365, "train/loss_math": 2.0868759155273438} +{"step": 4865, "train/loss": 2.1006402671337128, "train/lm_loss": 2.1006402671337128, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.0931445044560785e-07, "perf/tokens_per_sec": 25876.29767909833, "train/loss_code": 0.9776769876480103, "train/loss_prose": 3.7463475465774536, "train/loss_math": 2.1264652808507285} +{"step": 4866, "train/loss": 2.3230772018432617, "train/lm_loss": 2.3230772018432617, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.076901473859776e-07, "perf/tokens_per_sec": 25775.706412768523, "train/loss_code": 1.6927457749843597, "train/loss_prose": 3.653247117996216, "train/loss_math": 2.2535699605941772} +{"step": 4867, "train/loss": 2.1099452078342438, "train/lm_loss": 2.1099452078342438, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.0607797653577334e-07, "perf/tokens_per_sec": 26469.45771710744, "train/loss_math": 2.0887707471847534, "train/loss_code": 1.3543767929077148, "train/loss_prose": 3.726954698562622} +{"step": 4868, "train/loss": 2.8310141265392303, "train/lm_loss": 2.8310141265392303, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.0447793868074707e-07, "perf/tokens_per_sec": 25593.884212862886, "train/loss_prose": 3.4251596927642822, "train/loss_math": 2.228501558303833, "train/loss_code": 1.0653114318847656} +{"step": 4869, "train/loss": 2.4807780981063843, "train/lm_loss": 2.4807780981063843, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.0289003460074165e-07, "perf/tokens_per_sec": 25617.813289751903, "train/loss_code": 1.3216446240743, "train/loss_prose": 3.4565370082855225, "train/loss_math": 2.055143117904663} +{"step": 4870, "train/loss": 2.0774877965450287, "train/lm_loss": 2.0774877965450287, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.0131426506967689e-07, "perf/tokens_per_sec": 25660.247349939433, "train/loss_math": 1.7645426988601685, "train/loss_prose": 3.392758011817932, "train/loss_code": 1.6139686822891235} +{"step": 4871, "train/loss": 2.184946119785309, "train/lm_loss": 2.184946119785309, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 9.975063085557179e-08, "perf/tokens_per_sec": 26287.64407666625, "train/loss_code": 1.4297235310077667, "train/loss_prose": 3.4760899543762207, "train/loss_math": 2.4042474031448364} +{"step": 4872, "train/loss": 2.2382243275642395, "train/lm_loss": 2.2382243275642395, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 9.819913272051396e-08, "perf/tokens_per_sec": 25926.907874264103, "train/loss_code": 1.649265170097351, "train/loss_prose": 3.596060872077942, "train/loss_math": 2.058306038379669} +{"step": 4873, "train/loss": 2.103050470352173, "train/lm_loss": 2.103050470352173, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 9.665977142068738e-08, "perf/tokens_per_sec": 25720.754297554562, "train/loss_code": 1.158182978630066, "train/loss_prose": 3.117444157600403, "train/loss_math": 2.0682871341705322} +{"step": 4874, "train/loss": 1.789359301328659, "train/lm_loss": 1.789359301328659, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 9.513254770636137e-08, "perf/tokens_per_sec": 26655.623454644756, "train/loss_code": 1.6765095165797643, "train/loss_math": 2.579308032989502} +{"step": 4875, "train/loss": 2.0949082374572754, "train/lm_loss": 2.0949082374572754, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 9.361746232188495e-08, "perf/tokens_per_sec": 25626.908136232367, "train/loss_math": 2.1722405433654783, "train/loss_prose": 3.601940870285034, "train/loss_code": 1.1480612456798553} +{"step": 4876, "train/loss": 2.421840026974678, "train/lm_loss": 2.421840026974678, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 9.211451600568966e-08, "perf/tokens_per_sec": 26052.920867062367, "train/loss_code": 1.053542137145996, "train/loss_prose": 3.6004854440689087, "train/loss_math": 1.8121529817581177} +{"step": 4877, "train/loss": 2.726331412792206, "train/lm_loss": 2.726331412792206, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 9.062370949029231e-08, "perf/tokens_per_sec": 26615.855275572252, "train/loss_prose": 3.5681965947151184, "train/loss_code": 1.677372137705485, "train/loss_math": 2.505748987197876} +{"step": 4878, "train/loss": 2.6336943209171295, "train/lm_loss": 2.6336943209171295, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.914504350230058e-08, "perf/tokens_per_sec": 26492.723981649255, "train/loss_code": 0.8716689646244049, "train/loss_prose": 3.5091265201568604, "train/loss_math": 1.7805832624435425} +{"step": 4879, "train/loss": 2.2331475019454956, "train/lm_loss": 2.2331475019454956, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.767851876239074e-08, "perf/tokens_per_sec": 25672.057417383312, "train/loss_math": 2.1686365604400635, "train/loss_code": 1.3783273249864578, "train/loss_prose": 3.394411245981852} +{"step": 4880, "train/loss": 2.3525602519512177, "train/lm_loss": 2.3525602519512177, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.622413598533551e-08, "perf/tokens_per_sec": 25845.03868360798, "train/loss_math": 2.1968370974063873, "train/loss_code": 1.4628580212593079, "train/loss_prose": 3.553708553314209} +{"step": 4881, "train/loss": 2.3485855162143707, "train/lm_loss": 2.3485855162143707, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.478189587997898e-08, "perf/tokens_per_sec": 26116.68559406031, "train/loss_prose": 3.5938186645507812, "train/loss_code": 1.388391653696696, "train/loss_math": 1.921026349067688} +{"step": 4882, "train/loss": 2.226871222257614, "train/lm_loss": 2.226871222257614, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.335179914925328e-08, "perf/tokens_per_sec": 25965.23108778219, "train/loss_code": 1.7403859049081802, "train/loss_math": 1.7627719640731812, "train/loss_prose": 3.663941502571106} +{"step": 4883, "train/loss": 2.6438978910446167, "train/lm_loss": 2.6438978910446167, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.193384649017032e-08, "perf/tokens_per_sec": 25856.824920269166, "train/loss_prose": 3.636550505956014, "train/loss_math": 1.9487484991550446, "train/loss_code": 2.446537733078003} +{"step": 4884, "train/loss": 1.573338359594345, "train/lm_loss": 1.573338359594345, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.052803859382174e-08, "perf/tokens_per_sec": 26931.535977702148, "train/loss_code": 1.3947725296020508, "train/loss_math": 2.1090363264083862} +{"step": 4885, "train/loss": 2.1833966076374054, "train/lm_loss": 2.1833966076374054, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.913437614538166e-08, "perf/tokens_per_sec": 25389.069292883913, "train/loss_prose": 3.0664645036061606, "train/loss_code": 1.5289571285247803, "train/loss_math": 2.1519505977630615} +{"step": 4886, "train/loss": 1.939975380897522, "train/lm_loss": 1.939975380897522, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.775285982410396e-08, "perf/tokens_per_sec": 24871.11104934304, "train/loss_code": 1.4991174042224884, "train/loss_math": 2.124025821685791, "train/loss_prose": 3.151254653930664} +{"step": 4887, "train/loss": 2.9907054007053375, "train/lm_loss": 2.9907054007053375, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.638349030332503e-08, "perf/tokens_per_sec": 24872.55135830066, "train/loss_math": 2.0079360604286194, "train/loss_prose": 3.7013096809387207, "train/loss_code": 1.403221845626831} +{"step": 4888, "train/loss": 2.2569274604320526, "train/lm_loss": 2.2569274604320526, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.502626825045545e-08, "perf/tokens_per_sec": 24758.886077992527, "train/loss_math": 2.038951486349106, "train/loss_code": 1.6811919808387756, "train/loss_prose": 3.2686150074005127} +{"step": 4889, "train/loss": 2.3872834146022797, "train/lm_loss": 2.3872834146022797, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.368119432699383e-08, "perf/tokens_per_sec": 26145.8198846714, "train/loss_math": 2.0878567695617676, "train/loss_prose": 3.244545857111613, "train/loss_code": 1.0132026672363281} +{"step": 4890, "train/loss": 2.0498011708259583, "train/lm_loss": 2.0498011708259583, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.234826918850745e-08, "perf/tokens_per_sec": 26127.01228800156, "train/loss_code": 1.4571938514709473, "train/loss_math": 2.062761163711548, "train/loss_prose": 3.1702158451080322} +{"step": 4891, "train/loss": 1.653421938419342, "train/lm_loss": 1.653421938419342, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.102749348465165e-08, "perf/tokens_per_sec": 25198.774350444575, "train/loss_math": 1.9537169933319092, "train/loss_code": 1.3567938327789306, "train/loss_prose": 2.5359725952148438} +{"step": 4892, "train/loss": 2.2094867825508118, "train/lm_loss": 2.2094867825508118, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.971886785915593e-08, "perf/tokens_per_sec": 25284.108690117195, "train/loss_math": 2.154430478811264, "train/loss_code": 1.2614838480949402, "train/loss_prose": 3.2676024436950684} +{"step": 4893, "train/loss": 2.3220541179180145, "train/lm_loss": 2.3220541179180145, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.8422392949824e-08, "perf/tokens_per_sec": 25865.506148750377, "train/loss_prose": 3.25008761882782, "train/loss_math": 2.2118894159793854, "train/loss_code": 1.6143497824668884} +{"step": 4894, "train/loss": 2.8527868390083313, "train/lm_loss": 2.8527868390083313, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.71380693885476e-08, "perf/tokens_per_sec": 26058.374212023955, "train/loss_prose": 3.529392957687378, "train/loss_math": 2.072188973426819, "train/loss_code": 1.0309526920318604} +{"step": 4895, "train/loss": 2.0881125032901764, "train/lm_loss": 2.0881125032901764, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.586589780128716e-08, "perf/tokens_per_sec": 26159.276159094683, "train/loss_code": 1.8147695461908977, "train/loss_math": 2.2521183013916017} +{"step": 4896, "train/loss": 2.212824046611786, "train/lm_loss": 2.212824046611786, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.460587880808555e-08, "perf/tokens_per_sec": 25700.707424090335, "train/loss_math": 2.167162239551544, "train/loss_code": 1.3595672249794006, "train/loss_prose": 3.157404661178589} +{"step": 4897, "train/loss": 1.755634993314743, "train/lm_loss": 1.755634993314743, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.335801302306265e-08, "perf/tokens_per_sec": 25143.197577570576, "train/loss_math": 2.008726477622986, "train/loss_code": 1.3338158925374348} +{"step": 4898, "train/loss": 1.733655333518982, "train/lm_loss": 1.733655333518982, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.212230105440698e-08, "perf/tokens_per_sec": 24904.06380591498, "train/loss_math": 2.0603296160697937, "train/loss_code": 1.4069810211658478} +{"step": 4899, "train/loss": 2.2785219848155975, "train/lm_loss": 2.2785219848155975, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.089874350439506e-08, "perf/tokens_per_sec": 25734.971492085464, "train/loss_prose": 3.4731178283691406, "train/loss_code": 1.5951076745986938, "train/loss_math": 2.165538946787516} +{"step": 4900, "train/loss": 2.378246545791626, "train/lm_loss": 2.378246545791626, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.968734096936935e-08, "perf/tokens_per_sec": 24848.73569017238, "train/loss_code": 1.3383138179779053, "train/loss_math": 2.274543857574463, "train/loss_prose": 3.1574695110321045} +{"step": 4901, "train/loss": 2.3232466280460358, "train/lm_loss": 2.3232466280460358, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.848809403975753e-08, "perf/tokens_per_sec": 25198.84827185628, "train/loss_math": 2.4108128547668457, "train/loss_code": 1.3852413098017375, "train/loss_prose": 3.5989049673080444} +{"step": 4902, "train/loss": 2.9491477012634277, "train/lm_loss": 2.9491477012634277, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.7301003300055945e-08, "perf/tokens_per_sec": 24831.99851989755, "train/loss_math": 2.2100096543629966, "train/loss_prose": 3.3926304817199706} +{"step": 4903, "train/loss": 2.5436545312404633, "train/lm_loss": 2.5436545312404633, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.612606932883513e-08, "perf/tokens_per_sec": 25899.469469901058, "train/loss_prose": 3.033689558506012, "train/loss_math": 2.1162229776382446, "train/loss_code": 1.9910152554512024} +{"step": 4904, "train/loss": 2.0626815855503082, "train/lm_loss": 2.0626815855503082, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.496329269875089e-08, "perf/tokens_per_sec": 24988.827936500547, "train/loss_code": 1.3599528074264526, "train/loss_math": 2.0776809453964233, "train/loss_prose": 2.735411286354065} +{"step": 4905, "train/loss": 2.1876686811447144, "train/lm_loss": 2.1876686811447144, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.381267397651934e-08, "perf/tokens_per_sec": 24870.282947223353, "train/loss_code": 1.554891308148702, "train/loss_math": 2.2738139033317566, "train/loss_prose": 3.7414190769195557} +{"step": 4906, "train/loss": 1.8771946132183075, "train/lm_loss": 1.8771946132183075, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.26742137229419e-08, "perf/tokens_per_sec": 25132.567526368915, "train/loss_code": 1.4114975094795228, "train/loss_prose": 3.1304043531417847, "train/loss_math": 1.6992605924606323} +{"step": 4907, "train/loss": 2.1938659250736237, "train/lm_loss": 2.1938659250736237, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.1547912492888596e-08, "perf/tokens_per_sec": 25351.97782939376, "train/loss_prose": 3.5442135334014893, "train/loss_math": 2.2849316199620566, "train/loss_code": 1.2025684316953023} +{"step": 4908, "train/loss": 2.034851759672165, "train/lm_loss": 2.034851759672165, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.043377083530365e-08, "perf/tokens_per_sec": 25550.035148668725, "train/loss_code": 1.3490121960639954, "train/loss_prose": 3.1779178778330484} +{"step": 4909, "train/loss": 2.6411821246147156, "train/lm_loss": 2.6411821246147156, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.9331789293211026e-08, "perf/tokens_per_sec": 25897.90777681803, "train/loss_prose": 3.482303428649902, "train/loss_code": 1.2393134037653606} +{"step": 4910, "train/loss": 2.146477222442627, "train/lm_loss": 2.146477222442627, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.824196840370054e-08, "perf/tokens_per_sec": 25230.155969958556, "train/loss_code": 1.2944228649139404, "train/loss_math": 2.0178606510162354, "train/loss_prose": 2.894045829772949} +{"step": 4911, "train/loss": 2.1428710520267487, "train/lm_loss": 2.1428710520267487, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.716430869793342e-08, "perf/tokens_per_sec": 24449.90035522361, "train/loss_math": 2.1983250776926675, "train/loss_code": 1.6104104220867157, "train/loss_prose": 4.106351852416992} +{"step": 4912, "train/loss": 2.719026505947113, "train/lm_loss": 2.719026505947113, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.609881070115618e-08, "perf/tokens_per_sec": 25489.268882677254, "train/loss_math": 2.441305994987488, "train/loss_prose": 3.2008638858795164, "train/loss_code": 0.8652806282043457} +{"step": 4913, "train/loss": 2.3614537715911865, "train/lm_loss": 2.3614537715911865, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.504547493267286e-08, "perf/tokens_per_sec": 26198.249955776773, "train/loss_prose": 3.063788414001465, "train/loss_math": 1.8328409790992737, "train/loss_code": 1.485397219657898} +{"step": 4914, "train/loss": 2.047721654176712, "train/lm_loss": 2.047721654176712, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.400430190586724e-08, "perf/tokens_per_sec": 25706.24508502727, "train/loss_prose": 3.8047953844070435, "train/loss_math": 2.071107268333435, "train/loss_code": 1.1574919819831848} +{"step": 4915, "train/loss": 2.5091720521450043, "train/lm_loss": 2.5091720521450043, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.2975292128200064e-08, "perf/tokens_per_sec": 25017.793908892596, "train/loss_prose": 3.285068988800049, "train/loss_code": 1.3000475764274597, "train/loss_math": 2.1665027141571045} +{"step": 4916, "train/loss": 3.1369770765304565, "train/lm_loss": 3.1369770765304565, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.1958446101192373e-08, "perf/tokens_per_sec": 25012.29394764849, "train/loss_code": 2.097407102584839, "train/loss_prose": 3.3755467335383096, "train/loss_math": 2.745129108428955} +{"step": 4917, "train/loss": 2.1975481510162354, "train/lm_loss": 2.1975481510162354, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.095376432044218e-08, "perf/tokens_per_sec": 24968.381161663507, "train/loss_code": 1.2822200655937195, "train/loss_math": 2.0810643434524536, "train/loss_prose": 3.3458434343338013} +{"step": 4918, "train/loss": 2.089579790830612, "train/lm_loss": 2.089579790830612, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.996124727562445e-08, "perf/tokens_per_sec": 25915.68315648787, "train/loss_math": 2.0519214073816934, "train/loss_code": 1.392384926478068, "train/loss_prose": 3.191859483718872} +{"step": 4919, "train/loss": 2.226248174905777, "train/lm_loss": 2.226248174905777, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.8980895450474455e-08, "perf/tokens_per_sec": 25466.409703991496, "train/loss_code": 1.2760465343793232, "train/loss_math": 2.143063227335612, "train/loss_prose": 3.776327967643738} +{"step": 4920, "train/loss": 2.0870296359062195, "train/lm_loss": 2.0870296359062195, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.80127093228072e-08, "perf/tokens_per_sec": 25497.667174249236, "train/loss_code": 1.017980545759201, "train/loss_math": 2.1134966214497886, "train/loss_prose": 2.773261864980062} +{"step": 4921, "train/loss": 1.7843969613313675, "train/lm_loss": 1.7843969613313675, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.705668936450357e-08, "perf/tokens_per_sec": 26015.914325519414, "train/loss_code": 1.2537900408109028, "train/loss_math": 1.9767855405807495, "train/loss_prose": 4.775649547576904} +{"step": 4922, "train/loss": 1.9455182403326035, "train/lm_loss": 1.9455182403326035, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.6112836041518605e-08, "perf/tokens_per_sec": 25524.63961252173, "train/loss_math": 2.03006104628245, "train/loss_code": 1.5417229533195496, "train/loss_prose": 3.30707049369812} +{"step": 4923, "train/loss": 2.86661359667778, "train/lm_loss": 2.86661359667778, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.518114981387044e-08, "perf/tokens_per_sec": 25771.414489405586, "train/loss_math": 2.0409220457077026, "train/loss_prose": 3.6923053860664368} +{"step": 4924, "train/loss": 2.0716504752635956, "train/lm_loss": 2.0716504752635956, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.426163113565417e-08, "perf/tokens_per_sec": 25431.08712656133, "train/loss_prose": 3.206386089324951, "train/loss_math": 1.9434754550457, "train/loss_code": 1.1932652592658997} +{"step": 4925, "train/loss": 2.64459490776062, "train/lm_loss": 2.64459490776062, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.335428045503075e-08, "perf/tokens_per_sec": 26882.314938575477, "train/loss_math": 2.285906950632731, "train/loss_prose": 3.797849973042806, "train/loss_code": 1.452743649482727} +{"step": 4926, "train/loss": 2.4686334431171417, "train/lm_loss": 2.4686334431171417, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.2459098214232544e-08, "perf/tokens_per_sec": 25236.08579746932, "train/loss_math": 2.022989571094513, "train/loss_code": 1.455555001894633, "train/loss_prose": 3.7788072427113852} +{"step": 4927, "train/loss": 2.8837843537330627, "train/lm_loss": 2.8837843537330627, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.157608484956332e-08, "perf/tokens_per_sec": 25433.19543442538, "train/loss_code": 1.3525066375732422, "train/loss_math": 1.928216576576233, "train/loss_prose": 3.572267007827759} +{"step": 4928, "train/loss": 2.420454978942871, "train/lm_loss": 2.420454978942871, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.0705240791387144e-08, "perf/tokens_per_sec": 25338.965348184815, "train/loss_code": 1.1301332116127014, "train/loss_prose": 3.4540221095085144, "train/loss_math": 2.1571505069732666} +{"step": 4929, "train/loss": 2.6701612174510956, "train/lm_loss": 2.6701612174510956, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.9846566464150626e-08, "perf/tokens_per_sec": 25930.58637682953, "train/loss_prose": 3.6378156542778015, "train/loss_math": 2.2884109020233154, "train/loss_code": 1.5072055260340373} +{"step": 4930, "train/loss": 2.177090048789978, "train/lm_loss": 2.177090048789978, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.9000062286352324e-08, "perf/tokens_per_sec": 25315.591434482514, "train/loss_prose": 3.518023729324341, "train/loss_math": 2.2075050671895347, "train/loss_code": 1.252719243367513} +{"step": 4931, "train/loss": 2.0462082028388977, "train/lm_loss": 2.0462082028388977, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.816572867057332e-08, "perf/tokens_per_sec": 25358.189641735353, "train/loss_math": 2.165483832359314, "train/loss_prose": 3.266187906265259, "train/loss_code": 1.1380294561386108} +{"step": 4932, "train/loss": 2.8075662553310394, "train/lm_loss": 2.8075662553310394, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.7343566023460553e-08, "perf/tokens_per_sec": 25461.9182603953, "train/loss_prose": 3.84311306476593, "train/loss_math": 2.0913009643554688, "train/loss_code": 1.4527370929718018} +{"step": 4933, "train/loss": 2.2727775275707245, "train/lm_loss": 2.2727775275707245, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.6533574745718494e-08, "perf/tokens_per_sec": 25648.256687557663, "train/loss_code": 1.308543086051941, "train/loss_math": 1.9901258945465088, "train/loss_prose": 3.4615241289138794} +{"step": 4934, "train/loss": 2.275659143924713, "train/lm_loss": 2.275659143924713, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.5735755232134118e-08, "perf/tokens_per_sec": 25448.830402547865, "train/loss_prose": 3.054824193318685, "train/loss_math": 2.102371424436569, "train/loss_code": 0.6313146948814392} +{"step": 4935, "train/loss": 2.8249391317367554, "train/lm_loss": 2.8249391317367554, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.4950107871549167e-08, "perf/tokens_per_sec": 25768.554001130946, "train/loss_prose": 3.50972318649292, "train/loss_code": 1.6836326917012532} +{"step": 4936, "train/loss": 2.7502973079681396, "train/lm_loss": 2.7502973079681396, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.4176633046882335e-08, "perf/tokens_per_sec": 25086.21690835576, "train/loss_prose": 3.3260863423347473, "train/loss_math": 2.1745080947875977} +{"step": 4937, "train/loss": 2.7648820877075195, "train/lm_loss": 2.7648820877075195, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.3415331135115405e-08, "perf/tokens_per_sec": 25612.046376344904, "train/loss_prose": 3.0979702949523924, "train/loss_math": 2.4089508056640625, "train/loss_code": 2.1101269721984863} +{"step": 4938, "train/loss": 2.0636702179908752, "train/lm_loss": 2.0636702179908752, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.2666202507296007e-08, "perf/tokens_per_sec": 23415.418562653078, "train/loss_prose": 3.315667152404785, "train/loss_code": 1.2114590406417847, "train/loss_math": 2.389829456806183} +{"step": 4939, "train/loss": 1.6907947063446045, "train/lm_loss": 1.6907947063446045, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.192924752854042e-08, "perf/tokens_per_sec": 26950.25175303467, "train/loss_math": 2.054013729095459, "train/loss_prose": 3.21516752243042, "train/loss_code": 1.376196026802063} +{"step": 4940, "train/loss": 2.4472659528255463, "train/lm_loss": 2.4472659528255463, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.120446655803632e-08, "perf/tokens_per_sec": 26059.876258634864, "train/loss_math": 2.0191519260406494, "train/loss_code": 1.5540913939476013, "train/loss_prose": 3.10791015625} +{"step": 4941, "train/loss": 2.2190683484077454, "train/lm_loss": 2.2190683484077454, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.0491859949026137e-08, "perf/tokens_per_sec": 25839.790849515914, "train/loss_math": 2.3380703926086426, "train/loss_prose": 3.2746598720550537, "train/loss_code": 1.3963385820388794} +{"step": 4942, "train/loss": 1.8738969564437866, "train/lm_loss": 1.8738969564437866, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.9791428048829275e-08, "perf/tokens_per_sec": 25853.71199829045, "train/loss_code": 1.2569437980651856, "train/loss_prose": 3.187469244003296, "train/loss_math": 2.3315179347991943} +{"step": 4943, "train/loss": 2.754004865884781, "train/lm_loss": 2.754004865884781, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.9103171198828207e-08, "perf/tokens_per_sec": 26913.689403006585, "train/loss_prose": 3.3112178325653074, "train/loss_math": 2.2619255781173706, "train/loss_code": 0.9520978331565857} +{"step": 4944, "train/loss": 2.1554539501667023, "train/lm_loss": 2.1554539501667023, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.842708973447127e-08, "perf/tokens_per_sec": 25762.29406727939, "train/loss_code": 1.4992163976033528, "train/loss_math": 2.018478433291117, "train/loss_prose": 3.3452733755111694} +{"step": 4945, "train/loss": 2.069308876991272, "train/lm_loss": 2.069308876991272, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.7763183985269883e-08, "perf/tokens_per_sec": 26820.6642208366, "train/loss_prose": 3.1574230194091797, "train/loss_math": 2.1882088780403137, "train/loss_code": 1.4658018350601196} +{"step": 4946, "train/loss": 2.050673723220825, "train/lm_loss": 2.050673723220825, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.7111454274804096e-08, "perf/tokens_per_sec": 25618.080693095144, "train/loss_code": 1.1114387810230255, "train/loss_prose": 3.421159267425537, "train/loss_math": 1.696157455444336} +{"step": 4947, "train/loss": 2.026360362768173, "train/lm_loss": 2.026360362768173, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.647190092071982e-08, "perf/tokens_per_sec": 25794.631108441874, "train/loss_math": 2.208500623703003, "train/loss_prose": 3.183990240097046, "train/loss_code": 1.356475368142128} +{"step": 4948, "train/loss": 2.1142925918102264, "train/lm_loss": 2.1142925918102264, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.584452423472882e-08, "perf/tokens_per_sec": 24038.783029441438, "train/loss_math": 2.003860354423523, "train/loss_code": 1.3247493505477905, "train/loss_prose": 3.4642558097839355} +{"step": 4949, "train/loss": 2.4444072246551514, "train/lm_loss": 2.4444072246551514, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.522932452260595e-08, "perf/tokens_per_sec": 24642.64901027024, "train/loss_math": 1.9405919710795085, "train/loss_prose": 3.5060695012410483, "train/loss_code": 1.6076366305351257} +{"step": 4950, "train/loss": 2.607759416103363, "train/lm_loss": 2.607759416103363, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.4626302084191912e-08, "perf/tokens_per_sec": 26356.2433690838, "train/loss_math": 2.0611830949783325, "train/loss_prose": 3.1543357968330383} +{"step": 4951, "train/loss": 2.8345507979393005, "train/lm_loss": 2.8345507979393005, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.4035457213393276e-08, "perf/tokens_per_sec": 26061.259875821248, "train/loss_math": 2.151206851005554, "train/loss_prose": 3.5178945660591125} +{"step": 4952, "train/loss": 2.1728715300559998, "train/lm_loss": 2.1728715300559998, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.3456790198179692e-08, "perf/tokens_per_sec": 24223.23500241811, "train/loss_code": 1.1822336316108704, "train/loss_math": 2.039530078570048, "train/loss_prose": 3.8588404655456543} +{"step": 4953, "train/loss": 2.5443730652332306, "train/lm_loss": 2.5443730652332306, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.2890301320583887e-08, "perf/tokens_per_sec": 23086.31928165793, "train/loss_prose": 3.1986260414123535, "train/loss_math": 2.0713096857070923, "train/loss_code": 1.3465511798858643} +{"step": 4954, "train/loss": 1.946065992116928, "train/lm_loss": 1.946065992116928, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.233599085671e-08, "perf/tokens_per_sec": 24729.377059657556, "train/loss_code": 1.632114863395691, "train/loss_prose": 3.4402003288269043, "train/loss_math": 1.9838764071464539} +{"step": 4955, "train/loss": 2.44182550907135, "train/lm_loss": 2.44182550907135, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.179385907672248e-08, "perf/tokens_per_sec": 25124.26101351865, "train/loss_code": 1.8978213866551716, "train/loss_prose": 3.3099350929260254, "train/loss_math": 1.95566725730896} +{"step": 4956, "train/loss": 2.095588207244873, "train/lm_loss": 2.095588207244873, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.1263906244846079e-08, "perf/tokens_per_sec": 24710.4542471956, "train/loss_prose": 3.3390166759490967, "train/loss_math": 2.1523070335388184, "train/loss_code": 1.4455143809318542} +{"step": 4957, "train/loss": 1.8067865073680878, "train/lm_loss": 1.8067865073680878, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.0746132619374183e-08, "perf/tokens_per_sec": 25911.38357585849, "train/loss_math": 1.9318675994873047, "train/loss_code": 1.5825741291046143, "train/loss_prose": 3.026979684829712} +{"step": 4958, "train/loss": 1.9206449091434479, "train/lm_loss": 1.9206449091434479, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.0240538452666038e-08, "perf/tokens_per_sec": 24382.338968184922, "train/loss_code": 1.5419398844242096, "train/loss_math": 1.98334538936615, "train/loss_prose": 3.247363567352295} +{"step": 4959, "train/loss": 2.4433970749378204, "train/lm_loss": 2.4433970749378204, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 9.747123991141194e-09, "perf/tokens_per_sec": 25381.22983779823, "train/loss_prose": 3.2590502897898355, "train/loss_math": 2.08944563070933, "train/loss_code": 1.7508440613746643} +{"step": 4960, "train/loss": 2.5760878324508667, "train/lm_loss": 2.5760878324508667, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 9.265889475282286e-09, "perf/tokens_per_sec": 25834.38975037594, "train/loss_math": 2.174450635910034, "train/loss_prose": 3.9693132638931274, "train/loss_code": 1.9861364960670471} +{"step": 4961, "train/loss": 2.571596086025238, "train/lm_loss": 2.571596086025238, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.796835139637805e-09, "perf/tokens_per_sec": 24668.939994285032, "train/loss_prose": 3.4004282355308533, "train/loss_code": 1.5622248252232869, "train/loss_math": 2.284381628036499} +{"step": 4962, "train/loss": 2.1479720771312714, "train/lm_loss": 2.1479720771312714, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.339961212819326e-09, "perf/tokens_per_sec": 24835.73189926851, "train/loss_prose": 3.2858262062072754, "train/loss_code": 1.3085511028766632, "train/loss_math": 2.092092990875244} +{"step": 4963, "train/loss": 2.1172720193862915, "train/lm_loss": 2.1172720193862915, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.895267917501504e-09, "perf/tokens_per_sec": 25594.303635968165, "train/loss_prose": 3.1675474643707275, "train/loss_code": 1.602065771818161, "train/loss_math": 2.4541216691335044} +{"step": 4964, "train/loss": 2.2197399735450745, "train/lm_loss": 2.2197399735450745, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.462755470422078e-09, "perf/tokens_per_sec": 25832.68052038662, "train/loss_code": 1.4372802674770355, "train/loss_prose": 3.3644231160481772, "train/loss_math": 1.9155296087265015} +{"step": 4965, "train/loss": 2.0638472139835358, "train/lm_loss": 2.0638472139835358, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.042424082381871e-09, "perf/tokens_per_sec": 24981.560594559855, "train/loss_code": 1.4973586797714233, "train/loss_prose": 2.968393921852112, "train/loss_math": 2.292277455329895} +{"step": 4966, "train/loss": 1.9858489036560059, "train/lm_loss": 1.9858489036560059, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.634273958247561e-09, "perf/tokens_per_sec": 25182.2268762954, "train/loss_code": 1.3241792519887288, "train/loss_prose": 3.0692086219787598, "train/loss_math": 1.9252785841623943} +{"step": 4967, "train/loss": 2.0465313494205475, "train/lm_loss": 2.0465313494205475, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.238305296946134e-09, "perf/tokens_per_sec": 25814.902410511164, "train/loss_code": 1.8190225958824158, "train/loss_math": 1.9963173866271973, "train/loss_prose": 3.1072092056274414} +{"step": 4968, "train/loss": 2.490622341632843, "train/lm_loss": 2.490622341632843, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.854518291467659e-09, "perf/tokens_per_sec": 26243.39202854716, "train/loss_prose": 3.38878071308136, "train/loss_code": 2.026265859603882, "train/loss_math": 2.224230146408081} +{"step": 4969, "train/loss": 2.2416878640651703, "train/lm_loss": 2.2416878640651703, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.48291312886251e-09, "perf/tokens_per_sec": 25338.741112199266, "train/loss_math": 2.0513323545455933, "train/loss_prose": 3.2190728982289634, "train/loss_code": 1.3912067413330078} +{"step": 4970, "train/loss": 1.9314570426940918, "train/lm_loss": 1.9314570426940918, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.123489990249697e-09, "perf/tokens_per_sec": 27073.373156814017, "train/loss_code": 1.256437361240387, "train/loss_math": 2.3761003017425537, "train/loss_prose": 3.2976062297821045} +{"step": 4971, "train/loss": 2.021718055009842, "train/lm_loss": 2.021718055009842, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.776249050805759e-09, "perf/tokens_per_sec": 26485.37232272934, "train/loss_math": 2.033367156982422, "train/loss_prose": 3.1121689081192017, "train/loss_code": 1.583207869529724} +{"step": 4972, "train/loss": 2.3830263316631317, "train/lm_loss": 2.3830263316631317, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.441190479775869e-09, "perf/tokens_per_sec": 26216.920242271826, "train/loss_code": 1.752793550491333, "train/loss_math": 2.2544648249944053, "train/loss_prose": 3.521217465400696} +{"step": 4973, "train/loss": 2.2240573465824127, "train/lm_loss": 2.2240573465824127, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.118314440457183e-09, "perf/tokens_per_sec": 26384.537106033473, "train/loss_prose": 3.7912923097610474, "train/loss_code": 1.1426639358202617, "train/loss_math": 2.260627269744873} +{"step": 4974, "train/loss": 2.17435085773468, "train/lm_loss": 2.17435085773468, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.807621090218261e-09, "perf/tokens_per_sec": 27075.93325841953, "train/loss_code": 1.2127909859021504, "train/loss_prose": 3.1837677160898843, "train/loss_math": 2.102565288543701} +{"step": 4975, "train/loss": 2.6096595227718353, "train/lm_loss": 2.6096595227718353, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.5091105804907487e-09, "perf/tokens_per_sec": 25988.83770013675, "train/loss_math": 2.1301631927490234, "train/loss_prose": 3.3524723649024963, "train/loss_code": 1.7790743907292683} +{"step": 4976, "train/loss": 2.5757809579372406, "train/lm_loss": 2.5757809579372406, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.2227830567582716e-09, "perf/tokens_per_sec": 26325.91234844863, "train/loss_prose": 3.4064244429270425, "train/loss_math": 2.0325706799825034, "train/loss_code": 2.1446308493614197} +{"step": 4977, "train/loss": 2.6318052411079407, "train/lm_loss": 2.6318052411079407, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.9486386585786395e-09, "perf/tokens_per_sec": 26572.17723993714, "train/loss_math": 1.9689965546131134, "train/loss_prose": 3.294614017009735} +{"step": 4978, "train/loss": 2.041984438896179, "train/lm_loss": 2.041984438896179, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.6866775195644176e-09, "perf/tokens_per_sec": 26004.4156136665, "train/loss_code": 1.3134597837924957, "train/loss_math": 2.1615198850631714, "train/loss_prose": 3.379498243331909} +{"step": 4979, "train/loss": 2.4251743257045746, "train/lm_loss": 2.4251743257045746, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.4368997673940297e-09, "perf/tokens_per_sec": 26252.17435366135, "train/loss_math": 2.12997297445933, "train/loss_prose": 3.3107781410217285} +{"step": 4980, "train/loss": 2.303121715784073, "train/lm_loss": 2.303121715784073, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.1993055238062056e-09, "perf/tokens_per_sec": 26168.95890619621, "train/loss_prose": 2.805672566095988, "train/loss_code": 2.0488717555999756, "train/loss_math": 1.9897710978984833} +{"step": 4981, "train/loss": 2.7602092921733856, "train/lm_loss": 2.7602092921733856, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.973894904597207e-09, "perf/tokens_per_sec": 26469.376153036683, "train/loss_prose": 3.467513084411621, "train/loss_math": 2.0529054701328278} +{"step": 4982, "train/loss": 2.3407374024391174, "train/lm_loss": 2.3407374024391174, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.7606680196319282e-09, "perf/tokens_per_sec": 26486.96483589674, "train/loss_prose": 3.477571725845337, "train/loss_code": 1.5739479959011078, "train/loss_math": 1.9973924160003662} +{"step": 4983, "train/loss": 2.1800026297569275, "train/lm_loss": 2.1800026297569275, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.5596249728383473e-09, "perf/tokens_per_sec": 26379.35144995409, "train/loss_math": 1.983775556087494, "train/loss_code": 1.4345403760671616, "train/loss_prose": 3.867154121398926} +{"step": 4984, "train/loss": 2.200680524110794, "train/lm_loss": 2.200680524110794, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.3707658621964215e-09, "perf/tokens_per_sec": 26402.66239221008, "train/loss_prose": 3.274472236633301, "train/loss_code": 1.55640549659729} +{"step": 4985, "train/loss": 2.4258507788181305, "train/lm_loss": 2.4258507788181305, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.1940907797575173e-09, "perf/tokens_per_sec": 26135.080731848684, "train/loss_code": 1.2234111626942952, "train/loss_prose": 3.420280396938324, "train/loss_math": 2.0554513931274414} +{"step": 4986, "train/loss": 2.476970672607422, "train/lm_loss": 2.476970672607422, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.0295998116305329e-09, "perf/tokens_per_sec": 26022.14047215852, "train/loss_math": 2.027575969696045, "train/loss_code": 2.257026433944702, "train/loss_prose": 3.595704197883606} +{"step": 4987, "train/loss": 2.7968153953552246, "train/lm_loss": 2.7968153953552246, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 8.772930379846722e-10, "perf/tokens_per_sec": 26064.857968200024, "train/loss_code": 1.4609102010726929, "train/loss_prose": 3.5747682094573974, "train/loss_math": 1.5788612365722656} +{"step": 4988, "train/loss": 2.1687423288822174, "train/lm_loss": 2.1687423288822174, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 7.371705330522228e-10, "perf/tokens_per_sec": 26784.70295569421, "train/loss_math": 2.0247284173965454, "train/loss_code": 1.5494810342788696, "train/loss_prose": 3.3136552572250366} +{"step": 4989, "train/loss": 2.506620466709137, "train/lm_loss": 2.506620466709137, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.092323651313292e-10, "perf/tokens_per_sec": 25849.666320095937, "train/loss_code": 1.1975356936454773, "train/loss_prose": 3.6444880962371826, "train/loss_math": 2.241476058959961} +{"step": 4990, "train/loss": 2.439573347568512, "train/lm_loss": 2.439573347568512, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 4.934785965721167e-10, "perf/tokens_per_sec": 25686.643268418495, "train/loss_prose": 3.8759621381759644, "train/loss_math": 2.12176251411438, "train/loss_code": 1.1558505296707153} +{"step": 4991, "train/loss": 2.0812396705150604, "train/lm_loss": 2.0812396705150604, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 3.899092837933438e-10, "perf/tokens_per_sec": 26441.836265608574, "train/loss_code": 1.401096060872078, "train/loss_math": 2.345147728919983, "train/loss_prose": 3.1776185035705566} +{"step": 4992, "train/loss": 2.6626123189926147, "train/lm_loss": 2.6626123189926147, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.985244772768514e-10, "perf/tokens_per_sec": 26387.211756682867, "train/loss_code": 1.2740432024002075, "train/loss_prose": 3.5047956705093384, "train/loss_math": 2.3668147325515747} +{"step": 4993, "train/loss": 2.1867487132549286, "train/lm_loss": 2.1867487132549286, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.1932422155923616e-10, "perf/tokens_per_sec": 26285.55260363594, "train/loss_math": 2.065781021118164, "train/loss_code": 1.9306012392044067, "train/loss_prose": 3.3038814067840576} +{"step": 4994, "train/loss": 2.452942818403244, "train/lm_loss": 2.452942818403244, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 1.5230855524017708e-10, "perf/tokens_per_sec": 26058.888050589212, "train/loss_prose": 3.266568660736084, "train/loss_math": 2.20454732577006, "train/loss_code": 1.6050975620746613} +{"step": 4995, "train/loss": 2.1174862682819366, "train/lm_loss": 2.1174862682819366, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 9.747751098521107e-11, "perf/tokens_per_sec": 25934.26592335294, "train/loss_math": 2.189477252960205, "train/loss_code": 1.1856018900871277, "train/loss_prose": 3.621300458908081} +{"step": 4996, "train/loss": 2.033608317375183, "train/lm_loss": 2.033608317375183, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 5.483111551740638e-11, "perf/tokens_per_sec": 25715.633793414174, "train/loss_prose": 3.1267184019088745, "train/loss_code": 1.4371269643306732, "train/loss_math": 2.1334614753723145} +{"step": 4997, "train/loss": 2.532929867506027, "train/lm_loss": 2.532929867506027, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 2.4369389622913574e-11, "perf/tokens_per_sec": 25770.87326724059, "train/loss_code": 0.5465366840362549, "train/loss_math": 2.053694933652878, "train/loss_prose": 3.834040721257528} +{"step": 4998, "train/loss": 2.063606470823288, "train/lm_loss": 2.063606470823288, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 6.092348148190041e-12, "perf/tokens_per_sec": 26047.667266566095, "train/loss_prose": 3.2372894287109375, "train/loss_code": 1.5740194439888, "train/loss_math": 2.164175510406494} +{"step": 4999, "train/loss": 2.1143151819705963, "train/lm_loss": 2.1143151819705963, "train/lb_loss": 0.0, "train/z_loss": 0.0, "train/lr": 0.0, "perf/tokens_per_sec": 25697.824170498436, "train/loss_math": 2.1222645044326782, "train/loss_prose": 3.086040735244751, "train/loss_code": 1.6244775354862213}