| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 500, |
| "global_step": 4523, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.001105460977227504, |
| "grad_norm": 2.395341396331787, |
| "learning_rate": 5.000000000000001e-07, |
| "loss": 4.6826, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.002210921954455008, |
| "grad_norm": 2.2102696895599365, |
| "learning_rate": 1.0000000000000002e-06, |
| "loss": 4.4984, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.0033163829316825116, |
| "grad_norm": 2.5083913803100586, |
| "learning_rate": 1.5e-06, |
| "loss": 4.5731, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.004421843908910016, |
| "grad_norm": 2.1317508220672607, |
| "learning_rate": 2.0000000000000003e-06, |
| "loss": 4.5149, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.0055273048861375195, |
| "grad_norm": 2.2241172790527344, |
| "learning_rate": 2.5e-06, |
| "loss": 4.4769, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.006632765863365023, |
| "grad_norm": 2.1349635124206543, |
| "learning_rate": 3e-06, |
| "loss": 4.5924, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.007738226840592527, |
| "grad_norm": 2.366008758544922, |
| "learning_rate": 3.5000000000000004e-06, |
| "loss": 4.5941, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.008843687817820032, |
| "grad_norm": 2.4122307300567627, |
| "learning_rate": 4.000000000000001e-06, |
| "loss": 4.4631, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.009949148795047534, |
| "grad_norm": 2.023873805999756, |
| "learning_rate": 4.5e-06, |
| "loss": 4.5361, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.011054609772275039, |
| "grad_norm": 2.2571287155151367, |
| "learning_rate": 5e-06, |
| "loss": 4.5539, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.012160070749502542, |
| "grad_norm": 2.1223011016845703, |
| "learning_rate": 5.500000000000001e-06, |
| "loss": 4.4691, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.013265531726730046, |
| "grad_norm": 2.125227451324463, |
| "learning_rate": 6e-06, |
| "loss": 4.4713, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.014370992703957551, |
| "grad_norm": 2.272958278656006, |
| "learning_rate": 6.5000000000000004e-06, |
| "loss": 4.5511, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.015476453681185054, |
| "grad_norm": 2.1949267387390137, |
| "learning_rate": 7.000000000000001e-06, |
| "loss": 4.4926, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.016581914658412557, |
| "grad_norm": 2.155870199203491, |
| "learning_rate": 7.5e-06, |
| "loss": 4.6169, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.017687375635640063, |
| "grad_norm": 2.253253698348999, |
| "learning_rate": 8.000000000000001e-06, |
| "loss": 4.5112, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.018792836612867566, |
| "grad_norm": 2.2717690467834473, |
| "learning_rate": 8.500000000000002e-06, |
| "loss": 4.5192, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.01989829759009507, |
| "grad_norm": 2.4532206058502197, |
| "learning_rate": 9e-06, |
| "loss": 4.5964, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.021003758567322575, |
| "grad_norm": 2.420793056488037, |
| "learning_rate": 9.5e-06, |
| "loss": 4.5466, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.022109219544550078, |
| "grad_norm": 2.122037649154663, |
| "learning_rate": 1e-05, |
| "loss": 4.3679, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.02321468052177758, |
| "grad_norm": 2.5729713439941406, |
| "learning_rate": 1.05e-05, |
| "loss": 4.4623, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.024320141499005084, |
| "grad_norm": 2.364610195159912, |
| "learning_rate": 1.1000000000000001e-05, |
| "loss": 4.3146, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.02542560247623259, |
| "grad_norm": 2.4596588611602783, |
| "learning_rate": 1.1500000000000002e-05, |
| "loss": 4.4393, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.026531063453460093, |
| "grad_norm": 2.401916265487671, |
| "learning_rate": 1.2e-05, |
| "loss": 4.3398, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.027636524430687596, |
| "grad_norm": 2.1921896934509277, |
| "learning_rate": 1.25e-05, |
| "loss": 4.3521, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.028741985407915102, |
| "grad_norm": 2.193477153778076, |
| "learning_rate": 1.3000000000000001e-05, |
| "loss": 4.5969, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.029847446385142605, |
| "grad_norm": 2.442413568496704, |
| "learning_rate": 1.3500000000000001e-05, |
| "loss": 4.5048, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.030952907362370108, |
| "grad_norm": 2.314326047897339, |
| "learning_rate": 1.4000000000000001e-05, |
| "loss": 4.5778, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.03205836833959761, |
| "grad_norm": 2.1947810649871826, |
| "learning_rate": 1.45e-05, |
| "loss": 4.648, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.033163829316825114, |
| "grad_norm": 2.268144369125366, |
| "learning_rate": 1.5e-05, |
| "loss": 4.5153, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.03426929029405262, |
| "grad_norm": 2.311060905456543, |
| "learning_rate": 1.55e-05, |
| "loss": 4.6327, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.035374751271280126, |
| "grad_norm": 2.2926158905029297, |
| "learning_rate": 1.6000000000000003e-05, |
| "loss": 4.3874, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.03648021224850763, |
| "grad_norm": 2.1994051933288574, |
| "learning_rate": 1.65e-05, |
| "loss": 4.4664, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.03758567322573513, |
| "grad_norm": 2.3250937461853027, |
| "learning_rate": 1.7000000000000003e-05, |
| "loss": 4.4617, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.038691134202962635, |
| "grad_norm": 2.3668570518493652, |
| "learning_rate": 1.75e-05, |
| "loss": 4.2885, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.03979659518019014, |
| "grad_norm": 2.1874375343322754, |
| "learning_rate": 1.8e-05, |
| "loss": 4.4582, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.04090205615741764, |
| "grad_norm": 2.365499973297119, |
| "learning_rate": 1.85e-05, |
| "loss": 4.4125, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.04200751713464515, |
| "grad_norm": 2.272512435913086, |
| "learning_rate": 1.9e-05, |
| "loss": 4.3934, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.04311297811187265, |
| "grad_norm": 2.343959093093872, |
| "learning_rate": 1.9500000000000003e-05, |
| "loss": 4.5141, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.044218439089100156, |
| "grad_norm": 2.5027010440826416, |
| "learning_rate": 2e-05, |
| "loss": 4.3837, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.04532390006632766, |
| "grad_norm": 2.2487406730651855, |
| "learning_rate": 2.05e-05, |
| "loss": 4.5956, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.04642936104355516, |
| "grad_norm": 2.242449998855591, |
| "learning_rate": 2.1e-05, |
| "loss": 4.6371, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.047534822020782665, |
| "grad_norm": 2.183947801589966, |
| "learning_rate": 2.15e-05, |
| "loss": 4.314, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.04864028299801017, |
| "grad_norm": 2.2349512577056885, |
| "learning_rate": 2.2000000000000003e-05, |
| "loss": 4.4111, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.04974574397523768, |
| "grad_norm": 2.2303688526153564, |
| "learning_rate": 2.25e-05, |
| "loss": 4.3312, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.05085120495246518, |
| "grad_norm": 2.2685301303863525, |
| "learning_rate": 2.3000000000000003e-05, |
| "loss": 4.4618, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.05195666592969268, |
| "grad_norm": 2.287493944168091, |
| "learning_rate": 2.35e-05, |
| "loss": 4.4438, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.053062126906920186, |
| "grad_norm": 2.350281238555908, |
| "learning_rate": 2.4e-05, |
| "loss": 4.3578, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.05416758788414769, |
| "grad_norm": 2.4053986072540283, |
| "learning_rate": 2.45e-05, |
| "loss": 4.4378, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.05527304886137519, |
| "grad_norm": 2.5036556720733643, |
| "learning_rate": 2.5e-05, |
| "loss": 4.4291, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.056378509838602694, |
| "grad_norm": 2.125025987625122, |
| "learning_rate": 2.5500000000000003e-05, |
| "loss": 4.3374, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.057483970815830204, |
| "grad_norm": 2.461651563644409, |
| "learning_rate": 2.6000000000000002e-05, |
| "loss": 4.5828, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.05858943179305771, |
| "grad_norm": 2.3358347415924072, |
| "learning_rate": 2.6500000000000004e-05, |
| "loss": 4.4477, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.05969489277028521, |
| "grad_norm": 2.2937681674957275, |
| "learning_rate": 2.7000000000000002e-05, |
| "loss": 4.4706, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.06080035374751271, |
| "grad_norm": 2.173781633377075, |
| "learning_rate": 2.7500000000000004e-05, |
| "loss": 4.3496, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.061905814724740216, |
| "grad_norm": 2.373222827911377, |
| "learning_rate": 2.8000000000000003e-05, |
| "loss": 4.5112, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.06301127570196773, |
| "grad_norm": 2.2600908279418945, |
| "learning_rate": 2.8499999999999998e-05, |
| "loss": 4.4922, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.06411673667919522, |
| "grad_norm": 2.241600275039673, |
| "learning_rate": 2.9e-05, |
| "loss": 4.4719, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.06522219765642273, |
| "grad_norm": 2.2558817863464355, |
| "learning_rate": 2.95e-05, |
| "loss": 4.5555, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.06632765863365023, |
| "grad_norm": 2.5431759357452393, |
| "learning_rate": 3e-05, |
| "loss": 4.3901, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.06743311961087774, |
| "grad_norm": 2.1968157291412354, |
| "learning_rate": 3.05e-05, |
| "loss": 4.3923, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.06853858058810525, |
| "grad_norm": 2.1212503910064697, |
| "learning_rate": 3.1e-05, |
| "loss": 4.3401, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.06964404156533274, |
| "grad_norm": 2.430278778076172, |
| "learning_rate": 3.15e-05, |
| "loss": 4.609, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.07074950254256025, |
| "grad_norm": 2.741177797317505, |
| "learning_rate": 3.2000000000000005e-05, |
| "loss": 4.6077, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.07185496351978775, |
| "grad_norm": 2.3611228466033936, |
| "learning_rate": 3.2500000000000004e-05, |
| "loss": 4.3933, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.07296042449701526, |
| "grad_norm": 2.265152931213379, |
| "learning_rate": 3.3e-05, |
| "loss": 4.23, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.07406588547424275, |
| "grad_norm": 2.3802292346954346, |
| "learning_rate": 3.35e-05, |
| "loss": 4.3844, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.07517134645147026, |
| "grad_norm": 2.5243539810180664, |
| "learning_rate": 3.4000000000000007e-05, |
| "loss": 4.4695, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.07627680742869777, |
| "grad_norm": 2.522508144378662, |
| "learning_rate": 3.45e-05, |
| "loss": 4.3575, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.07738226840592527, |
| "grad_norm": 2.1260106563568115, |
| "learning_rate": 3.5e-05, |
| "loss": 4.4234, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.07848772938315278, |
| "grad_norm": 2.445948839187622, |
| "learning_rate": 3.55e-05, |
| "loss": 4.3261, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.07959319036038028, |
| "grad_norm": 2.191976308822632, |
| "learning_rate": 3.6e-05, |
| "loss": 4.3506, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.08069865133760779, |
| "grad_norm": 2.582002878189087, |
| "learning_rate": 3.65e-05, |
| "loss": 4.379, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.08180411231483528, |
| "grad_norm": 2.395965099334717, |
| "learning_rate": 3.7e-05, |
| "loss": 4.4162, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.08290957329206279, |
| "grad_norm": 2.313727617263794, |
| "learning_rate": 3.7500000000000003e-05, |
| "loss": 4.4531, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.0840150342692903, |
| "grad_norm": 2.2551207542419434, |
| "learning_rate": 3.8e-05, |
| "loss": 4.3614, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.0851204952465178, |
| "grad_norm": 2.3821234703063965, |
| "learning_rate": 3.85e-05, |
| "loss": 4.2572, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.0862259562237453, |
| "grad_norm": 2.212198495864868, |
| "learning_rate": 3.9000000000000006e-05, |
| "loss": 4.6126, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.0873314172009728, |
| "grad_norm": 2.08597993850708, |
| "learning_rate": 3.9500000000000005e-05, |
| "loss": 4.4071, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.08843687817820031, |
| "grad_norm": 2.2900874614715576, |
| "learning_rate": 4e-05, |
| "loss": 4.4119, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.08954233915542781, |
| "grad_norm": 2.6229662895202637, |
| "learning_rate": 4.05e-05, |
| "loss": 4.5127, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.09064780013265532, |
| "grad_norm": 2.313673496246338, |
| "learning_rate": 4.1e-05, |
| "loss": 4.5682, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.09175326110988283, |
| "grad_norm": 2.5078179836273193, |
| "learning_rate": 4.15e-05, |
| "loss": 4.3089, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.09285872208711032, |
| "grad_norm": 2.276742696762085, |
| "learning_rate": 4.2e-05, |
| "loss": 4.4486, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.09396418306433783, |
| "grad_norm": 2.0925698280334473, |
| "learning_rate": 4.25e-05, |
| "loss": 4.2959, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.09506964404156533, |
| "grad_norm": 2.5252251625061035, |
| "learning_rate": 4.3e-05, |
| "loss": 4.4345, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.09617510501879284, |
| "grad_norm": 2.374155282974243, |
| "learning_rate": 4.35e-05, |
| "loss": 4.3959, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.09728056599602034, |
| "grad_norm": 2.4412851333618164, |
| "learning_rate": 4.4000000000000006e-05, |
| "loss": 4.3762, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.09838602697324784, |
| "grad_norm": 2.302851676940918, |
| "learning_rate": 4.4500000000000004e-05, |
| "loss": 4.5145, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.09949148795047535, |
| "grad_norm": 2.3877639770507812, |
| "learning_rate": 4.5e-05, |
| "loss": 4.3736, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.10059694892770285, |
| "grad_norm": 2.413830280303955, |
| "learning_rate": 4.55e-05, |
| "loss": 4.377, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.10170240990493036, |
| "grad_norm": 2.5087687969207764, |
| "learning_rate": 4.600000000000001e-05, |
| "loss": 4.4129, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.10280787088215786, |
| "grad_norm": 2.449108600616455, |
| "learning_rate": 4.6500000000000005e-05, |
| "loss": 4.4224, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.10391333185938537, |
| "grad_norm": 2.255720376968384, |
| "learning_rate": 4.7e-05, |
| "loss": 4.3521, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.10501879283661286, |
| "grad_norm": 2.461012601852417, |
| "learning_rate": 4.75e-05, |
| "loss": 4.3307, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.10612425381384037, |
| "grad_norm": 2.6323764324188232, |
| "learning_rate": 4.8e-05, |
| "loss": 4.4938, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.10722971479106788, |
| "grad_norm": 2.4425625801086426, |
| "learning_rate": 4.85e-05, |
| "loss": 4.5443, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.10833517576829538, |
| "grad_norm": 2.523211717605591, |
| "learning_rate": 4.9e-05, |
| "loss": 4.3785, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.10944063674552289, |
| "grad_norm": 2.4563889503479004, |
| "learning_rate": 4.9500000000000004e-05, |
| "loss": 4.4033, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.11054609772275038, |
| "grad_norm": 2.4348998069763184, |
| "learning_rate": 5e-05, |
| "loss": 4.3472, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.1116515586999779, |
| "grad_norm": 2.430751323699951, |
| "learning_rate": 4.993785732040766e-05, |
| "loss": 4.5632, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.11275701967720539, |
| "grad_norm": 2.5139589309692383, |
| "learning_rate": 4.9875714640815315e-05, |
| "loss": 4.3744, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.1138624806544329, |
| "grad_norm": 2.3713083267211914, |
| "learning_rate": 4.981357196122297e-05, |
| "loss": 4.4622, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.11496794163166041, |
| "grad_norm": 2.348144769668579, |
| "learning_rate": 4.975142928163063e-05, |
| "loss": 4.4778, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.1160734026088879, |
| "grad_norm": 2.4068586826324463, |
| "learning_rate": 4.968928660203828e-05, |
| "loss": 4.4139, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.11717886358611541, |
| "grad_norm": 2.3237857818603516, |
| "learning_rate": 4.962714392244594e-05, |
| "loss": 4.3972, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.11828432456334291, |
| "grad_norm": 2.39794659614563, |
| "learning_rate": 4.9565001242853596e-05, |
| "loss": 4.2682, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.11938978554057042, |
| "grad_norm": 2.433943748474121, |
| "learning_rate": 4.950285856326125e-05, |
| "loss": 4.6123, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.12049524651779792, |
| "grad_norm": 2.3196094036102295, |
| "learning_rate": 4.944071588366891e-05, |
| "loss": 4.4894, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.12160070749502543, |
| "grad_norm": 2.388373851776123, |
| "learning_rate": 4.9378573204076564e-05, |
| "loss": 4.4735, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.12270616847225294, |
| "grad_norm": 2.471214532852173, |
| "learning_rate": 4.931643052448422e-05, |
| "loss": 4.3991, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.12381162944948043, |
| "grad_norm": 2.5611140727996826, |
| "learning_rate": 4.925428784489187e-05, |
| "loss": 4.3706, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.12491709042670794, |
| "grad_norm": 2.365116834640503, |
| "learning_rate": 4.919214516529953e-05, |
| "loss": 4.3329, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.12602255140393545, |
| "grad_norm": 2.5337095260620117, |
| "learning_rate": 4.913000248570719e-05, |
| "loss": 4.3502, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.12712801238116295, |
| "grad_norm": 2.3166821002960205, |
| "learning_rate": 4.906785980611484e-05, |
| "loss": 4.4643, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.12823347335839044, |
| "grad_norm": 2.7000489234924316, |
| "learning_rate": 4.90057171265225e-05, |
| "loss": 4.2057, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.12933893433561794, |
| "grad_norm": 2.541940450668335, |
| "learning_rate": 4.894357444693015e-05, |
| "loss": 4.3045, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.13044439531284546, |
| "grad_norm": 2.4047327041625977, |
| "learning_rate": 4.888143176733781e-05, |
| "loss": 4.2141, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.13154985629007296, |
| "grad_norm": 2.3533935546875, |
| "learning_rate": 4.881928908774547e-05, |
| "loss": 4.3564, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.13265531726730045, |
| "grad_norm": 2.469710350036621, |
| "learning_rate": 4.875714640815312e-05, |
| "loss": 4.3387, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.13376077824452798, |
| "grad_norm": 2.4111387729644775, |
| "learning_rate": 4.8695003728560775e-05, |
| "loss": 4.3647, |
| "step": 605 |
| }, |
| { |
| "epoch": 0.13486623922175547, |
| "grad_norm": 2.5026888847351074, |
| "learning_rate": 4.863286104896843e-05, |
| "loss": 4.4231, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.13597170019898297, |
| "grad_norm": 2.4435007572174072, |
| "learning_rate": 4.857071836937609e-05, |
| "loss": 4.45, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.1370771611762105, |
| "grad_norm": 2.518418550491333, |
| "learning_rate": 4.850857568978375e-05, |
| "loss": 4.4376, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.138182622153438, |
| "grad_norm": 2.4196436405181885, |
| "learning_rate": 4.84464330101914e-05, |
| "loss": 4.585, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.13928808313066549, |
| "grad_norm": 2.3251471519470215, |
| "learning_rate": 4.8384290330599056e-05, |
| "loss": 4.387, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.14039354410789298, |
| "grad_norm": 2.599461078643799, |
| "learning_rate": 4.832214765100672e-05, |
| "loss": 4.4816, |
| "step": 635 |
| }, |
| { |
| "epoch": 0.1414990050851205, |
| "grad_norm": 2.5266942977905273, |
| "learning_rate": 4.826000497141437e-05, |
| "loss": 4.4737, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.142604466062348, |
| "grad_norm": 2.3561177253723145, |
| "learning_rate": 4.8197862291822025e-05, |
| "loss": 4.355, |
| "step": 645 |
| }, |
| { |
| "epoch": 0.1437099270395755, |
| "grad_norm": 2.291571855545044, |
| "learning_rate": 4.813571961222968e-05, |
| "loss": 4.4803, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.14481538801680302, |
| "grad_norm": 2.5574657917022705, |
| "learning_rate": 4.807357693263734e-05, |
| "loss": 4.3132, |
| "step": 655 |
| }, |
| { |
| "epoch": 0.14592084899403052, |
| "grad_norm": 2.816318988800049, |
| "learning_rate": 4.801143425304499e-05, |
| "loss": 4.4246, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.147026309971258, |
| "grad_norm": 2.3737952709198, |
| "learning_rate": 4.794929157345265e-05, |
| "loss": 4.5105, |
| "step": 665 |
| }, |
| { |
| "epoch": 0.1481317709484855, |
| "grad_norm": 2.4100232124328613, |
| "learning_rate": 4.7887148893860305e-05, |
| "loss": 4.5111, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.14923723192571303, |
| "grad_norm": 2.36722731590271, |
| "learning_rate": 4.782500621426796e-05, |
| "loss": 4.3462, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.15034269290294053, |
| "grad_norm": 2.747675657272339, |
| "learning_rate": 4.776286353467562e-05, |
| "loss": 4.518, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.15144815388016802, |
| "grad_norm": 2.8760783672332764, |
| "learning_rate": 4.7700720855083274e-05, |
| "loss": 4.544, |
| "step": 685 |
| }, |
| { |
| "epoch": 0.15255361485739555, |
| "grad_norm": 2.1986746788024902, |
| "learning_rate": 4.763857817549093e-05, |
| "loss": 4.4212, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.15365907583462304, |
| "grad_norm": 2.2483763694763184, |
| "learning_rate": 4.7576435495898586e-05, |
| "loss": 4.4373, |
| "step": 695 |
| }, |
| { |
| "epoch": 0.15476453681185054, |
| "grad_norm": 2.5549709796905518, |
| "learning_rate": 4.751429281630624e-05, |
| "loss": 4.4253, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.15586999778907804, |
| "grad_norm": 2.2713725566864014, |
| "learning_rate": 4.74521501367139e-05, |
| "loss": 4.2794, |
| "step": 705 |
| }, |
| { |
| "epoch": 0.15697545876630556, |
| "grad_norm": 2.340376615524292, |
| "learning_rate": 4.7390007457121555e-05, |
| "loss": 4.5125, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.15808091974353305, |
| "grad_norm": 2.421940803527832, |
| "learning_rate": 4.7327864777529204e-05, |
| "loss": 4.2371, |
| "step": 715 |
| }, |
| { |
| "epoch": 0.15918638072076055, |
| "grad_norm": 2.4546539783477783, |
| "learning_rate": 4.726572209793687e-05, |
| "loss": 4.4549, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.16029184169798807, |
| "grad_norm": 2.427361011505127, |
| "learning_rate": 4.720357941834452e-05, |
| "loss": 4.4328, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.16139730267521557, |
| "grad_norm": 2.4004828929901123, |
| "learning_rate": 4.714143673875217e-05, |
| "loss": 4.2623, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.16250276365244307, |
| "grad_norm": 2.3959038257598877, |
| "learning_rate": 4.7079294059159836e-05, |
| "loss": 4.2597, |
| "step": 735 |
| }, |
| { |
| "epoch": 0.16360822462967056, |
| "grad_norm": 2.257460594177246, |
| "learning_rate": 4.7017151379567485e-05, |
| "loss": 4.382, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.16471368560689809, |
| "grad_norm": 2.546736478805542, |
| "learning_rate": 4.695500869997515e-05, |
| "loss": 4.4304, |
| "step": 745 |
| }, |
| { |
| "epoch": 0.16581914658412558, |
| "grad_norm": 2.665574789047241, |
| "learning_rate": 4.6892866020382804e-05, |
| "loss": 4.443, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.16692460756135308, |
| "grad_norm": 2.587796926498413, |
| "learning_rate": 4.6830723340790454e-05, |
| "loss": 4.3819, |
| "step": 755 |
| }, |
| { |
| "epoch": 0.1680300685385806, |
| "grad_norm": 2.6442179679870605, |
| "learning_rate": 4.6768580661198117e-05, |
| "loss": 4.3984, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.1691355295158081, |
| "grad_norm": 2.596620798110962, |
| "learning_rate": 4.670643798160577e-05, |
| "loss": 4.2336, |
| "step": 765 |
| }, |
| { |
| "epoch": 0.1702409904930356, |
| "grad_norm": 2.4057729244232178, |
| "learning_rate": 4.664429530201342e-05, |
| "loss": 4.3909, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.1713464514702631, |
| "grad_norm": 2.406342029571533, |
| "learning_rate": 4.6582152622421085e-05, |
| "loss": 4.3091, |
| "step": 775 |
| }, |
| { |
| "epoch": 0.1724519124474906, |
| "grad_norm": 2.4423723220825195, |
| "learning_rate": 4.6520009942828734e-05, |
| "loss": 4.3409, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.1735573734247181, |
| "grad_norm": 2.342496633529663, |
| "learning_rate": 4.645786726323639e-05, |
| "loss": 4.3805, |
| "step": 785 |
| }, |
| { |
| "epoch": 0.1746628344019456, |
| "grad_norm": 2.482818365097046, |
| "learning_rate": 4.6395724583644054e-05, |
| "loss": 4.323, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.17576829537917313, |
| "grad_norm": 2.6542818546295166, |
| "learning_rate": 4.63335819040517e-05, |
| "loss": 4.3603, |
| "step": 795 |
| }, |
| { |
| "epoch": 0.17687375635640062, |
| "grad_norm": 2.499776840209961, |
| "learning_rate": 4.627143922445936e-05, |
| "loss": 4.4008, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.17797921733362812, |
| "grad_norm": 2.341139316558838, |
| "learning_rate": 4.6209296544867015e-05, |
| "loss": 4.3715, |
| "step": 805 |
| }, |
| { |
| "epoch": 0.17908467831085562, |
| "grad_norm": 2.29777455329895, |
| "learning_rate": 4.614715386527467e-05, |
| "loss": 4.4741, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.18019013928808314, |
| "grad_norm": 2.515763282775879, |
| "learning_rate": 4.608501118568233e-05, |
| "loss": 4.3415, |
| "step": 815 |
| }, |
| { |
| "epoch": 0.18129560026531064, |
| "grad_norm": 2.4565176963806152, |
| "learning_rate": 4.6022868506089984e-05, |
| "loss": 4.2374, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.18240106124253813, |
| "grad_norm": 2.6354682445526123, |
| "learning_rate": 4.596072582649764e-05, |
| "loss": 4.4921, |
| "step": 825 |
| }, |
| { |
| "epoch": 0.18350652221976566, |
| "grad_norm": 2.610104560852051, |
| "learning_rate": 4.5898583146905296e-05, |
| "loss": 4.4667, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.18461198319699315, |
| "grad_norm": 2.362448215484619, |
| "learning_rate": 4.583644046731295e-05, |
| "loss": 4.3195, |
| "step": 835 |
| }, |
| { |
| "epoch": 0.18571744417422065, |
| "grad_norm": 2.380387306213379, |
| "learning_rate": 4.577429778772061e-05, |
| "loss": 4.4707, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.18682290515144814, |
| "grad_norm": 2.4917492866516113, |
| "learning_rate": 4.5712155108128265e-05, |
| "loss": 4.5139, |
| "step": 845 |
| }, |
| { |
| "epoch": 0.18792836612867567, |
| "grad_norm": 2.3864855766296387, |
| "learning_rate": 4.565001242853592e-05, |
| "loss": 4.5081, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.18903382710590316, |
| "grad_norm": 2.3583791255950928, |
| "learning_rate": 4.558786974894358e-05, |
| "loss": 4.3965, |
| "step": 855 |
| }, |
| { |
| "epoch": 0.19013928808313066, |
| "grad_norm": 2.506446599960327, |
| "learning_rate": 4.552572706935123e-05, |
| "loss": 4.4016, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.19124474906035818, |
| "grad_norm": 2.2975127696990967, |
| "learning_rate": 4.546358438975889e-05, |
| "loss": 4.4601, |
| "step": 865 |
| }, |
| { |
| "epoch": 0.19235021003758568, |
| "grad_norm": 2.433366537094116, |
| "learning_rate": 4.5401441710166546e-05, |
| "loss": 4.2386, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.19345567101481317, |
| "grad_norm": 2.3259806632995605, |
| "learning_rate": 4.53392990305742e-05, |
| "loss": 4.3704, |
| "step": 875 |
| }, |
| { |
| "epoch": 0.19456113199204067, |
| "grad_norm": 2.514643907546997, |
| "learning_rate": 4.527715635098186e-05, |
| "loss": 4.4008, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.1956665929692682, |
| "grad_norm": 2.3121140003204346, |
| "learning_rate": 4.5215013671389514e-05, |
| "loss": 4.2446, |
| "step": 885 |
| }, |
| { |
| "epoch": 0.1967720539464957, |
| "grad_norm": 2.412771224975586, |
| "learning_rate": 4.515287099179717e-05, |
| "loss": 4.4833, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.19787751492372319, |
| "grad_norm": 2.4728493690490723, |
| "learning_rate": 4.509072831220482e-05, |
| "loss": 4.2572, |
| "step": 895 |
| }, |
| { |
| "epoch": 0.1989829759009507, |
| "grad_norm": 2.3301310539245605, |
| "learning_rate": 4.502858563261248e-05, |
| "loss": 4.3495, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.2000884368781782, |
| "grad_norm": 2.5001354217529297, |
| "learning_rate": 4.496644295302014e-05, |
| "loss": 4.3298, |
| "step": 905 |
| }, |
| { |
| "epoch": 0.2011938978554057, |
| "grad_norm": 2.338364601135254, |
| "learning_rate": 4.490430027342779e-05, |
| "loss": 4.3985, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.2022993588326332, |
| "grad_norm": 2.30706524848938, |
| "learning_rate": 4.484215759383545e-05, |
| "loss": 4.3349, |
| "step": 915 |
| }, |
| { |
| "epoch": 0.20340481980986072, |
| "grad_norm": 2.396179437637329, |
| "learning_rate": 4.478001491424311e-05, |
| "loss": 4.3986, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.20451028078708822, |
| "grad_norm": 2.477341890335083, |
| "learning_rate": 4.471787223465076e-05, |
| "loss": 4.3669, |
| "step": 925 |
| }, |
| { |
| "epoch": 0.2056157417643157, |
| "grad_norm": 2.5613510608673096, |
| "learning_rate": 4.465572955505842e-05, |
| "loss": 4.3262, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.20672120274154324, |
| "grad_norm": 2.5783421993255615, |
| "learning_rate": 4.459358687546607e-05, |
| "loss": 4.3136, |
| "step": 935 |
| }, |
| { |
| "epoch": 0.20782666371877073, |
| "grad_norm": 2.4187774658203125, |
| "learning_rate": 4.4531444195873725e-05, |
| "loss": 4.3181, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.20893212469599823, |
| "grad_norm": 2.5953481197357178, |
| "learning_rate": 4.446930151628139e-05, |
| "loss": 4.5064, |
| "step": 945 |
| }, |
| { |
| "epoch": 0.21003758567322572, |
| "grad_norm": 2.513113260269165, |
| "learning_rate": 4.440715883668904e-05, |
| "loss": 4.3423, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.21114304665045325, |
| "grad_norm": 2.44311261177063, |
| "learning_rate": 4.4345016157096694e-05, |
| "loss": 4.376, |
| "step": 955 |
| }, |
| { |
| "epoch": 0.21224850762768074, |
| "grad_norm": 2.427305221557617, |
| "learning_rate": 4.428287347750435e-05, |
| "loss": 4.3677, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.21335396860490824, |
| "grad_norm": 2.3907687664031982, |
| "learning_rate": 4.4220730797912006e-05, |
| "loss": 4.3134, |
| "step": 965 |
| }, |
| { |
| "epoch": 0.21445942958213576, |
| "grad_norm": 2.348848819732666, |
| "learning_rate": 4.415858811831967e-05, |
| "loss": 4.5477, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.21556489055936326, |
| "grad_norm": 2.830244302749634, |
| "learning_rate": 4.409644543872732e-05, |
| "loss": 4.4073, |
| "step": 975 |
| }, |
| { |
| "epoch": 0.21667035153659076, |
| "grad_norm": 2.5423595905303955, |
| "learning_rate": 4.4034302759134975e-05, |
| "loss": 4.4871, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.21777581251381825, |
| "grad_norm": 2.4786319732666016, |
| "learning_rate": 4.397216007954264e-05, |
| "loss": 4.4376, |
| "step": 985 |
| }, |
| { |
| "epoch": 0.21888127349104577, |
| "grad_norm": 2.5218095779418945, |
| "learning_rate": 4.391001739995029e-05, |
| "loss": 4.5045, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.21998673446827327, |
| "grad_norm": 2.492645740509033, |
| "learning_rate": 4.384787472035794e-05, |
| "loss": 4.3694, |
| "step": 995 |
| }, |
| { |
| "epoch": 0.22109219544550077, |
| "grad_norm": 2.3848962783813477, |
| "learning_rate": 4.37857320407656e-05, |
| "loss": 4.2347, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.2221976564227283, |
| "grad_norm": 2.4425323009490967, |
| "learning_rate": 4.3723589361173255e-05, |
| "loss": 4.4254, |
| "step": 1005 |
| }, |
| { |
| "epoch": 0.2233031173999558, |
| "grad_norm": 2.4466652870178223, |
| "learning_rate": 4.366144668158091e-05, |
| "loss": 4.3855, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.22440857837718328, |
| "grad_norm": 2.5668978691101074, |
| "learning_rate": 4.359930400198857e-05, |
| "loss": 4.1885, |
| "step": 1015 |
| }, |
| { |
| "epoch": 0.22551403935441078, |
| "grad_norm": 2.1038079261779785, |
| "learning_rate": 4.3537161322396224e-05, |
| "loss": 4.3574, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.2266195003316383, |
| "grad_norm": 2.3468997478485107, |
| "learning_rate": 4.347501864280388e-05, |
| "loss": 4.3984, |
| "step": 1025 |
| }, |
| { |
| "epoch": 0.2277249613088658, |
| "grad_norm": 2.2503867149353027, |
| "learning_rate": 4.3412875963211536e-05, |
| "loss": 4.3913, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.2288304222860933, |
| "grad_norm": 2.508117437362671, |
| "learning_rate": 4.335073328361919e-05, |
| "loss": 4.4638, |
| "step": 1035 |
| }, |
| { |
| "epoch": 0.22993588326332082, |
| "grad_norm": 2.503089666366577, |
| "learning_rate": 4.328859060402685e-05, |
| "loss": 4.2682, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.2310413442405483, |
| "grad_norm": 2.4912095069885254, |
| "learning_rate": 4.3226447924434505e-05, |
| "loss": 4.4836, |
| "step": 1045 |
| }, |
| { |
| "epoch": 0.2321468052177758, |
| "grad_norm": 2.383793354034424, |
| "learning_rate": 4.3164305244842154e-05, |
| "loss": 4.4063, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.2332522661950033, |
| "grad_norm": 2.299375534057617, |
| "learning_rate": 4.310216256524982e-05, |
| "loss": 4.3989, |
| "step": 1055 |
| }, |
| { |
| "epoch": 0.23435772717223083, |
| "grad_norm": 2.432926893234253, |
| "learning_rate": 4.304001988565747e-05, |
| "loss": 4.3972, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.23546318814945832, |
| "grad_norm": 2.6002376079559326, |
| "learning_rate": 4.297787720606512e-05, |
| "loss": 4.166, |
| "step": 1065 |
| }, |
| { |
| "epoch": 0.23656864912668582, |
| "grad_norm": 2.76485013961792, |
| "learning_rate": 4.2915734526472786e-05, |
| "loss": 4.4923, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.23767411010391334, |
| "grad_norm": 2.4608538150787354, |
| "learning_rate": 4.285359184688044e-05, |
| "loss": 4.4156, |
| "step": 1075 |
| }, |
| { |
| "epoch": 0.23877957108114084, |
| "grad_norm": 2.5879130363464355, |
| "learning_rate": 4.279144916728809e-05, |
| "loss": 4.2349, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.23988503205836834, |
| "grad_norm": 2.4327921867370605, |
| "learning_rate": 4.2729306487695754e-05, |
| "loss": 4.2487, |
| "step": 1085 |
| }, |
| { |
| "epoch": 0.24099049303559583, |
| "grad_norm": 2.4870424270629883, |
| "learning_rate": 4.2667163808103404e-05, |
| "loss": 4.499, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.24209595401282336, |
| "grad_norm": 2.573253631591797, |
| "learning_rate": 4.2605021128511067e-05, |
| "loss": 4.2689, |
| "step": 1095 |
| }, |
| { |
| "epoch": 0.24320141499005085, |
| "grad_norm": 2.4426496028900146, |
| "learning_rate": 4.254287844891872e-05, |
| "loss": 4.3502, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.24430687596727835, |
| "grad_norm": 2.2450709342956543, |
| "learning_rate": 4.248073576932637e-05, |
| "loss": 4.3314, |
| "step": 1105 |
| }, |
| { |
| "epoch": 0.24541233694450587, |
| "grad_norm": 2.6109743118286133, |
| "learning_rate": 4.2418593089734035e-05, |
| "loss": 4.305, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.24651779792173337, |
| "grad_norm": 2.626323938369751, |
| "learning_rate": 4.2356450410141684e-05, |
| "loss": 4.2716, |
| "step": 1115 |
| }, |
| { |
| "epoch": 0.24762325889896086, |
| "grad_norm": 2.320756673812866, |
| "learning_rate": 4.229430773054934e-05, |
| "loss": 4.4438, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.24872871987618836, |
| "grad_norm": 2.481062650680542, |
| "learning_rate": 4.2232165050957004e-05, |
| "loss": 4.4925, |
| "step": 1125 |
| }, |
| { |
| "epoch": 0.24983418085341588, |
| "grad_norm": 2.521596908569336, |
| "learning_rate": 4.217002237136465e-05, |
| "loss": 4.4221, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.2509396418306434, |
| "grad_norm": 2.361933469772339, |
| "learning_rate": 4.210787969177231e-05, |
| "loss": 4.3693, |
| "step": 1135 |
| }, |
| { |
| "epoch": 0.2520451028078709, |
| "grad_norm": 2.357417106628418, |
| "learning_rate": 4.204573701217997e-05, |
| "loss": 4.4775, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.25315056378509837, |
| "grad_norm": 2.688908576965332, |
| "learning_rate": 4.198359433258762e-05, |
| "loss": 4.29, |
| "step": 1145 |
| }, |
| { |
| "epoch": 0.2542560247623259, |
| "grad_norm": 2.2829039096832275, |
| "learning_rate": 4.192145165299528e-05, |
| "loss": 4.401, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.2553614857395534, |
| "grad_norm": 2.6343767642974854, |
| "learning_rate": 4.1859308973402934e-05, |
| "loss": 4.4336, |
| "step": 1155 |
| }, |
| { |
| "epoch": 0.2564669467167809, |
| "grad_norm": 2.3044660091400146, |
| "learning_rate": 4.179716629381059e-05, |
| "loss": 4.3832, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.2575724076940084, |
| "grad_norm": 2.5719525814056396, |
| "learning_rate": 4.1735023614218246e-05, |
| "loss": 4.2833, |
| "step": 1165 |
| }, |
| { |
| "epoch": 0.2586778686712359, |
| "grad_norm": 2.6642727851867676, |
| "learning_rate": 4.16728809346259e-05, |
| "loss": 4.3051, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.2597833296484634, |
| "grad_norm": 2.5633628368377686, |
| "learning_rate": 4.161073825503356e-05, |
| "loss": 4.3245, |
| "step": 1175 |
| }, |
| { |
| "epoch": 0.2608887906256909, |
| "grad_norm": 2.3659725189208984, |
| "learning_rate": 4.1548595575441215e-05, |
| "loss": 4.5625, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.2619942516029184, |
| "grad_norm": 2.5750010013580322, |
| "learning_rate": 4.148645289584887e-05, |
| "loss": 4.2276, |
| "step": 1185 |
| }, |
| { |
| "epoch": 0.2630997125801459, |
| "grad_norm": 2.650841474533081, |
| "learning_rate": 4.142431021625653e-05, |
| "loss": 4.4841, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.26420517355737344, |
| "grad_norm": 2.257554292678833, |
| "learning_rate": 4.136216753666418e-05, |
| "loss": 4.5292, |
| "step": 1195 |
| }, |
| { |
| "epoch": 0.2653106345346009, |
| "grad_norm": 2.3063228130340576, |
| "learning_rate": 4.130002485707184e-05, |
| "loss": 4.3253, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.26641609551182843, |
| "grad_norm": 2.4297571182250977, |
| "learning_rate": 4.123788217747949e-05, |
| "loss": 4.3772, |
| "step": 1205 |
| }, |
| { |
| "epoch": 0.26752155648905596, |
| "grad_norm": 2.431993007659912, |
| "learning_rate": 4.117573949788715e-05, |
| "loss": 4.3032, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.2686270174662834, |
| "grad_norm": 2.3991315364837646, |
| "learning_rate": 4.111359681829481e-05, |
| "loss": 4.3427, |
| "step": 1215 |
| }, |
| { |
| "epoch": 0.26973247844351095, |
| "grad_norm": 2.3820011615753174, |
| "learning_rate": 4.1051454138702464e-05, |
| "loss": 4.3706, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.27083793942073847, |
| "grad_norm": 2.670473337173462, |
| "learning_rate": 4.098931145911012e-05, |
| "loss": 4.3521, |
| "step": 1225 |
| }, |
| { |
| "epoch": 0.27194340039796594, |
| "grad_norm": 2.8199636936187744, |
| "learning_rate": 4.0927168779517776e-05, |
| "loss": 4.3276, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.27304886137519346, |
| "grad_norm": 2.347820520401001, |
| "learning_rate": 4.086502609992543e-05, |
| "loss": 4.3414, |
| "step": 1235 |
| }, |
| { |
| "epoch": 0.274154322352421, |
| "grad_norm": 2.271981716156006, |
| "learning_rate": 4.080288342033309e-05, |
| "loss": 4.3148, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.27525978332964846, |
| "grad_norm": 2.515171527862549, |
| "learning_rate": 4.074074074074074e-05, |
| "loss": 4.3787, |
| "step": 1245 |
| }, |
| { |
| "epoch": 0.276365244306876, |
| "grad_norm": 2.4658026695251465, |
| "learning_rate": 4.06785980611484e-05, |
| "loss": 4.4014, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.27747070528410345, |
| "grad_norm": 2.4536259174346924, |
| "learning_rate": 4.061645538155606e-05, |
| "loss": 4.2641, |
| "step": 1255 |
| }, |
| { |
| "epoch": 0.27857616626133097, |
| "grad_norm": 2.491704225540161, |
| "learning_rate": 4.055431270196371e-05, |
| "loss": 4.3729, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.2796816272385585, |
| "grad_norm": 2.5859057903289795, |
| "learning_rate": 4.049217002237137e-05, |
| "loss": 4.3815, |
| "step": 1265 |
| }, |
| { |
| "epoch": 0.28078708821578596, |
| "grad_norm": 2.5725574493408203, |
| "learning_rate": 4.043002734277902e-05, |
| "loss": 4.3624, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.2818925491930135, |
| "grad_norm": 2.484657049179077, |
| "learning_rate": 4.0367884663186675e-05, |
| "loss": 4.3583, |
| "step": 1275 |
| }, |
| { |
| "epoch": 0.282998010170241, |
| "grad_norm": 2.544689178466797, |
| "learning_rate": 4.030574198359434e-05, |
| "loss": 4.2289, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.2841034711474685, |
| "grad_norm": 2.5880086421966553, |
| "learning_rate": 4.024359930400199e-05, |
| "loss": 4.3604, |
| "step": 1285 |
| }, |
| { |
| "epoch": 0.285208932124696, |
| "grad_norm": 2.614906072616577, |
| "learning_rate": 4.0181456624409644e-05, |
| "loss": 4.2697, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.2863143931019235, |
| "grad_norm": 2.6999433040618896, |
| "learning_rate": 4.011931394481731e-05, |
| "loss": 4.4131, |
| "step": 1295 |
| }, |
| { |
| "epoch": 0.287419854079151, |
| "grad_norm": 2.3542439937591553, |
| "learning_rate": 4.0057171265224956e-05, |
| "loss": 4.3436, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.2885253150563785, |
| "grad_norm": 2.4977333545684814, |
| "learning_rate": 3.999502858563262e-05, |
| "loss": 4.2333, |
| "step": 1305 |
| }, |
| { |
| "epoch": 0.28963077603360604, |
| "grad_norm": 2.3839094638824463, |
| "learning_rate": 3.993288590604027e-05, |
| "loss": 4.2906, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.2907362370108335, |
| "grad_norm": 2.583096504211426, |
| "learning_rate": 3.9870743226447925e-05, |
| "loss": 4.2372, |
| "step": 1315 |
| }, |
| { |
| "epoch": 0.29184169798806103, |
| "grad_norm": 2.8082754611968994, |
| "learning_rate": 3.980860054685559e-05, |
| "loss": 4.3763, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.2929471589652885, |
| "grad_norm": 2.699869394302368, |
| "learning_rate": 3.974645786726324e-05, |
| "loss": 4.3501, |
| "step": 1325 |
| }, |
| { |
| "epoch": 0.294052619942516, |
| "grad_norm": 2.489060878753662, |
| "learning_rate": 3.968431518767089e-05, |
| "loss": 4.3261, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.29515808091974355, |
| "grad_norm": 2.6914567947387695, |
| "learning_rate": 3.962217250807855e-05, |
| "loss": 4.3582, |
| "step": 1335 |
| }, |
| { |
| "epoch": 0.296263541896971, |
| "grad_norm": 2.6697006225585938, |
| "learning_rate": 3.9560029828486205e-05, |
| "loss": 4.3114, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.29736900287419854, |
| "grad_norm": 2.5954415798187256, |
| "learning_rate": 3.949788714889386e-05, |
| "loss": 4.2934, |
| "step": 1345 |
| }, |
| { |
| "epoch": 0.29847446385142606, |
| "grad_norm": 2.985745906829834, |
| "learning_rate": 3.943574446930152e-05, |
| "loss": 4.3548, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.29957992482865353, |
| "grad_norm": 2.397188186645508, |
| "learning_rate": 3.9373601789709174e-05, |
| "loss": 4.378, |
| "step": 1355 |
| }, |
| { |
| "epoch": 0.30068538580588106, |
| "grad_norm": 2.328190565109253, |
| "learning_rate": 3.931145911011683e-05, |
| "loss": 4.3864, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.3017908467831086, |
| "grad_norm": 2.659130096435547, |
| "learning_rate": 3.9249316430524486e-05, |
| "loss": 4.2503, |
| "step": 1365 |
| }, |
| { |
| "epoch": 0.30289630776033605, |
| "grad_norm": 2.5458106994628906, |
| "learning_rate": 3.918717375093214e-05, |
| "loss": 4.4694, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.30400176873756357, |
| "grad_norm": 2.6253693103790283, |
| "learning_rate": 3.91250310713398e-05, |
| "loss": 4.3011, |
| "step": 1375 |
| }, |
| { |
| "epoch": 0.3051072297147911, |
| "grad_norm": 2.5949649810791016, |
| "learning_rate": 3.9062888391747455e-05, |
| "loss": 4.3781, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.30621269069201856, |
| "grad_norm": 2.6035447120666504, |
| "learning_rate": 3.900074571215511e-05, |
| "loss": 4.215, |
| "step": 1385 |
| }, |
| { |
| "epoch": 0.3073181516692461, |
| "grad_norm": 2.7866146564483643, |
| "learning_rate": 3.893860303256277e-05, |
| "loss": 4.3382, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.30842361264647356, |
| "grad_norm": 2.5743088722229004, |
| "learning_rate": 3.887646035297042e-05, |
| "loss": 4.3505, |
| "step": 1395 |
| }, |
| { |
| "epoch": 0.3095290736237011, |
| "grad_norm": 2.6363112926483154, |
| "learning_rate": 3.881431767337807e-05, |
| "loss": 4.37, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.3106345346009286, |
| "grad_norm": 2.409414291381836, |
| "learning_rate": 3.8752174993785736e-05, |
| "loss": 4.3642, |
| "step": 1405 |
| }, |
| { |
| "epoch": 0.31173999557815607, |
| "grad_norm": 2.6767184734344482, |
| "learning_rate": 3.869003231419339e-05, |
| "loss": 4.4374, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.3128454565553836, |
| "grad_norm": 2.6071739196777344, |
| "learning_rate": 3.862788963460104e-05, |
| "loss": 4.4875, |
| "step": 1415 |
| }, |
| { |
| "epoch": 0.3139509175326111, |
| "grad_norm": 2.8153324127197266, |
| "learning_rate": 3.8565746955008704e-05, |
| "loss": 4.2156, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.3150563785098386, |
| "grad_norm": 2.5854175090789795, |
| "learning_rate": 3.8503604275416354e-05, |
| "loss": 4.4762, |
| "step": 1425 |
| }, |
| { |
| "epoch": 0.3161618394870661, |
| "grad_norm": 2.6283559799194336, |
| "learning_rate": 3.8441461595824017e-05, |
| "loss": 4.3707, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.31726730046429363, |
| "grad_norm": 2.392477512359619, |
| "learning_rate": 3.837931891623167e-05, |
| "loss": 4.4578, |
| "step": 1435 |
| }, |
| { |
| "epoch": 0.3183727614415211, |
| "grad_norm": 2.5749545097351074, |
| "learning_rate": 3.831717623663932e-05, |
| "loss": 4.3093, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.3194782224187486, |
| "grad_norm": 2.57065486907959, |
| "learning_rate": 3.8255033557046985e-05, |
| "loss": 4.4154, |
| "step": 1445 |
| }, |
| { |
| "epoch": 0.32058368339597615, |
| "grad_norm": 2.652879476547241, |
| "learning_rate": 3.819289087745464e-05, |
| "loss": 4.5573, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.3216891443732036, |
| "grad_norm": 2.846167802810669, |
| "learning_rate": 3.813074819786229e-05, |
| "loss": 4.4113, |
| "step": 1455 |
| }, |
| { |
| "epoch": 0.32279460535043114, |
| "grad_norm": 2.641319513320923, |
| "learning_rate": 3.8068605518269954e-05, |
| "loss": 4.3614, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.3239000663276586, |
| "grad_norm": 2.5918776988983154, |
| "learning_rate": 3.80064628386776e-05, |
| "loss": 4.3636, |
| "step": 1465 |
| }, |
| { |
| "epoch": 0.32500552730488613, |
| "grad_norm": 2.6786410808563232, |
| "learning_rate": 3.794432015908526e-05, |
| "loss": 4.3731, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.32611098828211366, |
| "grad_norm": 2.548100233078003, |
| "learning_rate": 3.788217747949292e-05, |
| "loss": 4.2728, |
| "step": 1475 |
| }, |
| { |
| "epoch": 0.3272164492593411, |
| "grad_norm": 2.409332752227783, |
| "learning_rate": 3.782003479990057e-05, |
| "loss": 4.3442, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.32832191023656865, |
| "grad_norm": 2.8180229663848877, |
| "learning_rate": 3.775789212030823e-05, |
| "loss": 4.3566, |
| "step": 1485 |
| }, |
| { |
| "epoch": 0.32942737121379617, |
| "grad_norm": 2.634147882461548, |
| "learning_rate": 3.7695749440715884e-05, |
| "loss": 4.4708, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.33053283219102364, |
| "grad_norm": 2.3490123748779297, |
| "learning_rate": 3.763360676112354e-05, |
| "loss": 4.2733, |
| "step": 1495 |
| }, |
| { |
| "epoch": 0.33163829316825116, |
| "grad_norm": 2.638009548187256, |
| "learning_rate": 3.7571464081531196e-05, |
| "loss": 4.4472, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.3327437541454787, |
| "grad_norm": 2.601348638534546, |
| "learning_rate": 3.750932140193885e-05, |
| "loss": 4.5207, |
| "step": 1505 |
| }, |
| { |
| "epoch": 0.33384921512270616, |
| "grad_norm": 2.6195290088653564, |
| "learning_rate": 3.744717872234651e-05, |
| "loss": 4.3151, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.3349546760999337, |
| "grad_norm": 2.5007519721984863, |
| "learning_rate": 3.7385036042754165e-05, |
| "loss": 4.3751, |
| "step": 1515 |
| }, |
| { |
| "epoch": 0.3360601370771612, |
| "grad_norm": 2.4757566452026367, |
| "learning_rate": 3.732289336316182e-05, |
| "loss": 4.2864, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.33716559805438867, |
| "grad_norm": 2.612262487411499, |
| "learning_rate": 3.726075068356948e-05, |
| "loss": 4.4617, |
| "step": 1525 |
| }, |
| { |
| "epoch": 0.3382710590316162, |
| "grad_norm": 2.3229122161865234, |
| "learning_rate": 3.719860800397713e-05, |
| "loss": 4.2659, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.33937652000884366, |
| "grad_norm": 3.0333845615386963, |
| "learning_rate": 3.713646532438479e-05, |
| "loss": 4.2091, |
| "step": 1535 |
| }, |
| { |
| "epoch": 0.3404819809860712, |
| "grad_norm": 2.364445686340332, |
| "learning_rate": 3.7074322644792446e-05, |
| "loss": 4.1667, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.3415874419632987, |
| "grad_norm": 2.6092944145202637, |
| "learning_rate": 3.70121799652001e-05, |
| "loss": 4.4148, |
| "step": 1545 |
| }, |
| { |
| "epoch": 0.3426929029405262, |
| "grad_norm": 2.69758677482605, |
| "learning_rate": 3.695003728560776e-05, |
| "loss": 4.3029, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.3437983639177537, |
| "grad_norm": 2.665482997894287, |
| "learning_rate": 3.6887894606015414e-05, |
| "loss": 4.3617, |
| "step": 1555 |
| }, |
| { |
| "epoch": 0.3449038248949812, |
| "grad_norm": 2.6900408267974854, |
| "learning_rate": 3.682575192642307e-05, |
| "loss": 4.456, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.3460092858722087, |
| "grad_norm": 2.335728406906128, |
| "learning_rate": 3.6763609246830726e-05, |
| "loss": 4.3155, |
| "step": 1565 |
| }, |
| { |
| "epoch": 0.3471147468494362, |
| "grad_norm": 2.85036039352417, |
| "learning_rate": 3.670146656723838e-05, |
| "loss": 4.3152, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.34822020782666374, |
| "grad_norm": 2.652212381362915, |
| "learning_rate": 3.663932388764604e-05, |
| "loss": 4.4341, |
| "step": 1575 |
| }, |
| { |
| "epoch": 0.3493256688038912, |
| "grad_norm": 2.3771016597747803, |
| "learning_rate": 3.6577181208053695e-05, |
| "loss": 4.3358, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.35043112978111873, |
| "grad_norm": 2.7119994163513184, |
| "learning_rate": 3.651503852846135e-05, |
| "loss": 4.2583, |
| "step": 1585 |
| }, |
| { |
| "epoch": 0.35153659075834626, |
| "grad_norm": 2.4877076148986816, |
| "learning_rate": 3.645289584886901e-05, |
| "loss": 4.4398, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.3526420517355737, |
| "grad_norm": 2.5400094985961914, |
| "learning_rate": 3.639075316927666e-05, |
| "loss": 4.4864, |
| "step": 1595 |
| }, |
| { |
| "epoch": 0.35374751271280125, |
| "grad_norm": 2.929621458053589, |
| "learning_rate": 3.632861048968432e-05, |
| "loss": 4.2378, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.3548529736900287, |
| "grad_norm": 2.555133581161499, |
| "learning_rate": 3.6266467810091976e-05, |
| "loss": 4.3108, |
| "step": 1605 |
| }, |
| { |
| "epoch": 0.35595843466725624, |
| "grad_norm": 2.410792350769043, |
| "learning_rate": 3.6204325130499625e-05, |
| "loss": 4.3592, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.35706389564448376, |
| "grad_norm": 2.459975004196167, |
| "learning_rate": 3.614218245090729e-05, |
| "loss": 4.5196, |
| "step": 1615 |
| }, |
| { |
| "epoch": 0.35816935662171123, |
| "grad_norm": 2.834867000579834, |
| "learning_rate": 3.608003977131494e-05, |
| "loss": 4.3758, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.35927481759893876, |
| "grad_norm": 2.6577582359313965, |
| "learning_rate": 3.6017897091722594e-05, |
| "loss": 4.3663, |
| "step": 1625 |
| }, |
| { |
| "epoch": 0.3603802785761663, |
| "grad_norm": 2.725658416748047, |
| "learning_rate": 3.595575441213026e-05, |
| "loss": 4.3878, |
| "step": 1630 |
| }, |
| { |
| "epoch": 0.36148573955339375, |
| "grad_norm": 2.368903160095215, |
| "learning_rate": 3.5893611732537906e-05, |
| "loss": 4.3393, |
| "step": 1635 |
| }, |
| { |
| "epoch": 0.36259120053062127, |
| "grad_norm": 2.2058262825012207, |
| "learning_rate": 3.583146905294556e-05, |
| "loss": 4.3152, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.3636966615078488, |
| "grad_norm": 2.60345458984375, |
| "learning_rate": 3.576932637335322e-05, |
| "loss": 4.4803, |
| "step": 1645 |
| }, |
| { |
| "epoch": 0.36480212248507626, |
| "grad_norm": 2.657458543777466, |
| "learning_rate": 3.5707183693760875e-05, |
| "loss": 4.3058, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.3659075834623038, |
| "grad_norm": 2.596036195755005, |
| "learning_rate": 3.564504101416854e-05, |
| "loss": 4.2178, |
| "step": 1655 |
| }, |
| { |
| "epoch": 0.3670130444395313, |
| "grad_norm": 2.7093770503997803, |
| "learning_rate": 3.558289833457619e-05, |
| "loss": 4.3902, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.3681185054167588, |
| "grad_norm": 2.2766308784484863, |
| "learning_rate": 3.552075565498384e-05, |
| "loss": 4.4526, |
| "step": 1665 |
| }, |
| { |
| "epoch": 0.3692239663939863, |
| "grad_norm": 2.696753740310669, |
| "learning_rate": 3.5458612975391506e-05, |
| "loss": 4.3636, |
| "step": 1670 |
| }, |
| { |
| "epoch": 0.37032942737121377, |
| "grad_norm": 2.463946580886841, |
| "learning_rate": 3.5396470295799155e-05, |
| "loss": 4.2369, |
| "step": 1675 |
| }, |
| { |
| "epoch": 0.3714348883484413, |
| "grad_norm": 2.948925018310547, |
| "learning_rate": 3.533432761620681e-05, |
| "loss": 4.4674, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.3725403493256688, |
| "grad_norm": 2.914759874343872, |
| "learning_rate": 3.527218493661447e-05, |
| "loss": 4.2563, |
| "step": 1685 |
| }, |
| { |
| "epoch": 0.3736458103028963, |
| "grad_norm": 2.562021255493164, |
| "learning_rate": 3.5210042257022124e-05, |
| "loss": 4.2267, |
| "step": 1690 |
| }, |
| { |
| "epoch": 0.3747512712801238, |
| "grad_norm": 2.4976344108581543, |
| "learning_rate": 3.514789957742978e-05, |
| "loss": 4.3459, |
| "step": 1695 |
| }, |
| { |
| "epoch": 0.37585673225735133, |
| "grad_norm": 2.656845808029175, |
| "learning_rate": 3.5085756897837436e-05, |
| "loss": 4.2767, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.3769621932345788, |
| "grad_norm": 2.6122493743896484, |
| "learning_rate": 3.502361421824509e-05, |
| "loss": 4.2535, |
| "step": 1705 |
| }, |
| { |
| "epoch": 0.3780676542118063, |
| "grad_norm": 2.7145111560821533, |
| "learning_rate": 3.496147153865275e-05, |
| "loss": 4.373, |
| "step": 1710 |
| }, |
| { |
| "epoch": 0.37917311518903385, |
| "grad_norm": 2.6271467208862305, |
| "learning_rate": 3.4899328859060405e-05, |
| "loss": 4.2728, |
| "step": 1715 |
| }, |
| { |
| "epoch": 0.3802785761662613, |
| "grad_norm": 2.350149631500244, |
| "learning_rate": 3.483718617946806e-05, |
| "loss": 4.1621, |
| "step": 1720 |
| }, |
| { |
| "epoch": 0.38138403714348884, |
| "grad_norm": 2.773153305053711, |
| "learning_rate": 3.477504349987572e-05, |
| "loss": 4.3022, |
| "step": 1725 |
| }, |
| { |
| "epoch": 0.38248949812071636, |
| "grad_norm": 2.8574771881103516, |
| "learning_rate": 3.471290082028337e-05, |
| "loss": 4.2579, |
| "step": 1730 |
| }, |
| { |
| "epoch": 0.38359495909794383, |
| "grad_norm": 2.725560426712036, |
| "learning_rate": 3.465075814069103e-05, |
| "loss": 4.2797, |
| "step": 1735 |
| }, |
| { |
| "epoch": 0.38470042007517136, |
| "grad_norm": 2.513237476348877, |
| "learning_rate": 3.4588615461098686e-05, |
| "loss": 4.405, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.3858058810523988, |
| "grad_norm": 2.718583822250366, |
| "learning_rate": 3.452647278150634e-05, |
| "loss": 4.2946, |
| "step": 1745 |
| }, |
| { |
| "epoch": 0.38691134202962635, |
| "grad_norm": 2.4899282455444336, |
| "learning_rate": 3.446433010191399e-05, |
| "loss": 4.269, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.38801680300685387, |
| "grad_norm": 2.5338146686553955, |
| "learning_rate": 3.4402187422321654e-05, |
| "loss": 4.4835, |
| "step": 1755 |
| }, |
| { |
| "epoch": 0.38912226398408134, |
| "grad_norm": 2.3587207794189453, |
| "learning_rate": 3.434004474272931e-05, |
| "loss": 4.1855, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.39022772496130886, |
| "grad_norm": 2.939471960067749, |
| "learning_rate": 3.427790206313696e-05, |
| "loss": 4.31, |
| "step": 1765 |
| }, |
| { |
| "epoch": 0.3913331859385364, |
| "grad_norm": 2.79874324798584, |
| "learning_rate": 3.421575938354462e-05, |
| "loss": 4.2398, |
| "step": 1770 |
| }, |
| { |
| "epoch": 0.39243864691576386, |
| "grad_norm": 2.5179383754730225, |
| "learning_rate": 3.415361670395227e-05, |
| "loss": 4.2628, |
| "step": 1775 |
| }, |
| { |
| "epoch": 0.3935441078929914, |
| "grad_norm": 2.731872797012329, |
| "learning_rate": 3.4091474024359935e-05, |
| "loss": 4.3159, |
| "step": 1780 |
| }, |
| { |
| "epoch": 0.3946495688702189, |
| "grad_norm": 2.5067148208618164, |
| "learning_rate": 3.402933134476759e-05, |
| "loss": 4.4061, |
| "step": 1785 |
| }, |
| { |
| "epoch": 0.39575502984744637, |
| "grad_norm": 2.3916046619415283, |
| "learning_rate": 3.396718866517524e-05, |
| "loss": 4.2791, |
| "step": 1790 |
| }, |
| { |
| "epoch": 0.3968604908246739, |
| "grad_norm": 2.6597490310668945, |
| "learning_rate": 3.3905045985582904e-05, |
| "loss": 4.4391, |
| "step": 1795 |
| }, |
| { |
| "epoch": 0.3979659518019014, |
| "grad_norm": 2.5750606060028076, |
| "learning_rate": 3.384290330599056e-05, |
| "loss": 4.1806, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.3990714127791289, |
| "grad_norm": 2.561917781829834, |
| "learning_rate": 3.378076062639821e-05, |
| "loss": 4.4584, |
| "step": 1805 |
| }, |
| { |
| "epoch": 0.4001768737563564, |
| "grad_norm": 2.576657772064209, |
| "learning_rate": 3.371861794680587e-05, |
| "loss": 4.1388, |
| "step": 1810 |
| }, |
| { |
| "epoch": 0.4012823347335839, |
| "grad_norm": 2.5817503929138184, |
| "learning_rate": 3.365647526721352e-05, |
| "loss": 4.3074, |
| "step": 1815 |
| }, |
| { |
| "epoch": 0.4023877957108114, |
| "grad_norm": 2.4846079349517822, |
| "learning_rate": 3.359433258762118e-05, |
| "loss": 4.3061, |
| "step": 1820 |
| }, |
| { |
| "epoch": 0.4034932566880389, |
| "grad_norm": 2.833554744720459, |
| "learning_rate": 3.353218990802884e-05, |
| "loss": 4.4506, |
| "step": 1825 |
| }, |
| { |
| "epoch": 0.4045987176652664, |
| "grad_norm": 2.6276683807373047, |
| "learning_rate": 3.347004722843649e-05, |
| "loss": 4.3484, |
| "step": 1830 |
| }, |
| { |
| "epoch": 0.4057041786424939, |
| "grad_norm": 2.6111786365509033, |
| "learning_rate": 3.3407904548844146e-05, |
| "loss": 4.4257, |
| "step": 1835 |
| }, |
| { |
| "epoch": 0.40680963961972144, |
| "grad_norm": 2.813497304916382, |
| "learning_rate": 3.33457618692518e-05, |
| "loss": 4.3713, |
| "step": 1840 |
| }, |
| { |
| "epoch": 0.4079151005969489, |
| "grad_norm": 2.7521538734436035, |
| "learning_rate": 3.328361918965946e-05, |
| "loss": 4.4385, |
| "step": 1845 |
| }, |
| { |
| "epoch": 0.40902056157417643, |
| "grad_norm": 2.503818988800049, |
| "learning_rate": 3.3221476510067115e-05, |
| "loss": 4.4288, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.41012602255140396, |
| "grad_norm": 2.3562381267547607, |
| "learning_rate": 3.315933383047477e-05, |
| "loss": 4.2368, |
| "step": 1855 |
| }, |
| { |
| "epoch": 0.4112314835286314, |
| "grad_norm": 2.526411294937134, |
| "learning_rate": 3.309719115088243e-05, |
| "loss": 4.3008, |
| "step": 1860 |
| }, |
| { |
| "epoch": 0.41233694450585895, |
| "grad_norm": 2.6222381591796875, |
| "learning_rate": 3.303504847129008e-05, |
| "loss": 4.1532, |
| "step": 1865 |
| }, |
| { |
| "epoch": 0.4134424054830865, |
| "grad_norm": 2.6735141277313232, |
| "learning_rate": 3.297290579169774e-05, |
| "loss": 4.2497, |
| "step": 1870 |
| }, |
| { |
| "epoch": 0.41454786646031394, |
| "grad_norm": 2.612273931503296, |
| "learning_rate": 3.2910763112105396e-05, |
| "loss": 4.365, |
| "step": 1875 |
| }, |
| { |
| "epoch": 0.41565332743754146, |
| "grad_norm": 2.7102086544036865, |
| "learning_rate": 3.284862043251305e-05, |
| "loss": 4.2006, |
| "step": 1880 |
| }, |
| { |
| "epoch": 0.41675878841476893, |
| "grad_norm": 2.8893067836761475, |
| "learning_rate": 3.278647775292071e-05, |
| "loss": 4.4635, |
| "step": 1885 |
| }, |
| { |
| "epoch": 0.41786424939199646, |
| "grad_norm": 2.6870336532592773, |
| "learning_rate": 3.2724335073328364e-05, |
| "loss": 4.3284, |
| "step": 1890 |
| }, |
| { |
| "epoch": 0.418969710369224, |
| "grad_norm": 2.454735279083252, |
| "learning_rate": 3.266219239373602e-05, |
| "loss": 4.2499, |
| "step": 1895 |
| }, |
| { |
| "epoch": 0.42007517134645145, |
| "grad_norm": 2.5673999786376953, |
| "learning_rate": 3.2600049714143676e-05, |
| "loss": 4.3258, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.42118063232367897, |
| "grad_norm": 2.435605049133301, |
| "learning_rate": 3.253790703455133e-05, |
| "loss": 4.2839, |
| "step": 1905 |
| }, |
| { |
| "epoch": 0.4222860933009065, |
| "grad_norm": 2.7508575916290283, |
| "learning_rate": 3.247576435495899e-05, |
| "loss": 4.4643, |
| "step": 1910 |
| }, |
| { |
| "epoch": 0.42339155427813396, |
| "grad_norm": 2.5757343769073486, |
| "learning_rate": 3.2413621675366645e-05, |
| "loss": 4.1323, |
| "step": 1915 |
| }, |
| { |
| "epoch": 0.4244970152553615, |
| "grad_norm": 2.409933567047119, |
| "learning_rate": 3.23514789957743e-05, |
| "loss": 4.2882, |
| "step": 1920 |
| }, |
| { |
| "epoch": 0.425602476232589, |
| "grad_norm": 2.4064886569976807, |
| "learning_rate": 3.228933631618196e-05, |
| "loss": 4.3503, |
| "step": 1925 |
| }, |
| { |
| "epoch": 0.4267079372098165, |
| "grad_norm": 2.539107322692871, |
| "learning_rate": 3.222719363658961e-05, |
| "loss": 4.3415, |
| "step": 1930 |
| }, |
| { |
| "epoch": 0.427813398187044, |
| "grad_norm": 2.70954966545105, |
| "learning_rate": 3.216505095699727e-05, |
| "loss": 4.3901, |
| "step": 1935 |
| }, |
| { |
| "epoch": 0.4289188591642715, |
| "grad_norm": 2.902268171310425, |
| "learning_rate": 3.2102908277404926e-05, |
| "loss": 4.3829, |
| "step": 1940 |
| }, |
| { |
| "epoch": 0.430024320141499, |
| "grad_norm": 2.919811487197876, |
| "learning_rate": 3.2040765597812575e-05, |
| "loss": 4.3388, |
| "step": 1945 |
| }, |
| { |
| "epoch": 0.4311297811187265, |
| "grad_norm": 2.765904188156128, |
| "learning_rate": 3.197862291822024e-05, |
| "loss": 4.2619, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.432235242095954, |
| "grad_norm": 2.6072490215301514, |
| "learning_rate": 3.1916480238627894e-05, |
| "loss": 4.272, |
| "step": 1955 |
| }, |
| { |
| "epoch": 0.4333407030731815, |
| "grad_norm": 2.694185256958008, |
| "learning_rate": 3.1854337559035544e-05, |
| "loss": 4.3295, |
| "step": 1960 |
| }, |
| { |
| "epoch": 0.43444616405040903, |
| "grad_norm": 2.6962716579437256, |
| "learning_rate": 3.179219487944321e-05, |
| "loss": 4.2222, |
| "step": 1965 |
| }, |
| { |
| "epoch": 0.4355516250276365, |
| "grad_norm": 2.681506395339966, |
| "learning_rate": 3.1730052199850856e-05, |
| "loss": 4.3914, |
| "step": 1970 |
| }, |
| { |
| "epoch": 0.436657086004864, |
| "grad_norm": 2.792881488800049, |
| "learning_rate": 3.166790952025851e-05, |
| "loss": 4.4958, |
| "step": 1975 |
| }, |
| { |
| "epoch": 0.43776254698209155, |
| "grad_norm": 2.6680564880371094, |
| "learning_rate": 3.1605766840666175e-05, |
| "loss": 4.3593, |
| "step": 1980 |
| }, |
| { |
| "epoch": 0.438868007959319, |
| "grad_norm": 2.7864387035369873, |
| "learning_rate": 3.1543624161073825e-05, |
| "loss": 4.3489, |
| "step": 1985 |
| }, |
| { |
| "epoch": 0.43997346893654654, |
| "grad_norm": 2.5795204639434814, |
| "learning_rate": 3.148148148148148e-05, |
| "loss": 4.269, |
| "step": 1990 |
| }, |
| { |
| "epoch": 0.44107892991377406, |
| "grad_norm": 2.851243019104004, |
| "learning_rate": 3.141933880188914e-05, |
| "loss": 4.3883, |
| "step": 1995 |
| }, |
| { |
| "epoch": 0.44218439089100153, |
| "grad_norm": 2.732250452041626, |
| "learning_rate": 3.135719612229679e-05, |
| "loss": 4.2467, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.44328985186822906, |
| "grad_norm": 2.4607598781585693, |
| "learning_rate": 3.1295053442704456e-05, |
| "loss": 4.3155, |
| "step": 2005 |
| }, |
| { |
| "epoch": 0.4443953128454566, |
| "grad_norm": 2.546980857849121, |
| "learning_rate": 3.1232910763112105e-05, |
| "loss": 4.3949, |
| "step": 2010 |
| }, |
| { |
| "epoch": 0.44550077382268405, |
| "grad_norm": 2.734762191772461, |
| "learning_rate": 3.117076808351976e-05, |
| "loss": 4.46, |
| "step": 2015 |
| }, |
| { |
| "epoch": 0.4466062347999116, |
| "grad_norm": 2.5129942893981934, |
| "learning_rate": 3.110862540392742e-05, |
| "loss": 4.3879, |
| "step": 2020 |
| }, |
| { |
| "epoch": 0.44771169577713904, |
| "grad_norm": 2.644542694091797, |
| "learning_rate": 3.1046482724335074e-05, |
| "loss": 4.2476, |
| "step": 2025 |
| }, |
| { |
| "epoch": 0.44881715675436656, |
| "grad_norm": 2.771726369857788, |
| "learning_rate": 3.098434004474273e-05, |
| "loss": 4.4844, |
| "step": 2030 |
| }, |
| { |
| "epoch": 0.4499226177315941, |
| "grad_norm": 2.642275333404541, |
| "learning_rate": 3.0922197365150386e-05, |
| "loss": 4.3922, |
| "step": 2035 |
| }, |
| { |
| "epoch": 0.45102807870882156, |
| "grad_norm": 2.6931073665618896, |
| "learning_rate": 3.086005468555804e-05, |
| "loss": 4.3635, |
| "step": 2040 |
| }, |
| { |
| "epoch": 0.4521335396860491, |
| "grad_norm": 2.4507226943969727, |
| "learning_rate": 3.07979120059657e-05, |
| "loss": 4.3413, |
| "step": 2045 |
| }, |
| { |
| "epoch": 0.4532390006632766, |
| "grad_norm": 2.632704496383667, |
| "learning_rate": 3.0735769326373355e-05, |
| "loss": 4.324, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.45434446164050407, |
| "grad_norm": 2.6872873306274414, |
| "learning_rate": 3.067362664678101e-05, |
| "loss": 4.3887, |
| "step": 2055 |
| }, |
| { |
| "epoch": 0.4554499226177316, |
| "grad_norm": 2.8722641468048096, |
| "learning_rate": 3.061148396718867e-05, |
| "loss": 4.3594, |
| "step": 2060 |
| }, |
| { |
| "epoch": 0.4565553835949591, |
| "grad_norm": 2.642021417617798, |
| "learning_rate": 3.054934128759632e-05, |
| "loss": 4.266, |
| "step": 2065 |
| }, |
| { |
| "epoch": 0.4576608445721866, |
| "grad_norm": 2.8870849609375, |
| "learning_rate": 3.048719860800398e-05, |
| "loss": 4.4626, |
| "step": 2070 |
| }, |
| { |
| "epoch": 0.4587663055494141, |
| "grad_norm": 2.623518943786621, |
| "learning_rate": 3.0425055928411632e-05, |
| "loss": 4.3157, |
| "step": 2075 |
| }, |
| { |
| "epoch": 0.45987176652664163, |
| "grad_norm": 2.5889763832092285, |
| "learning_rate": 3.0362913248819292e-05, |
| "loss": 4.2704, |
| "step": 2080 |
| }, |
| { |
| "epoch": 0.4609772275038691, |
| "grad_norm": 2.8086538314819336, |
| "learning_rate": 3.0300770569226945e-05, |
| "loss": 4.3561, |
| "step": 2085 |
| }, |
| { |
| "epoch": 0.4620826884810966, |
| "grad_norm": 2.896907091140747, |
| "learning_rate": 3.02386278896346e-05, |
| "loss": 4.4201, |
| "step": 2090 |
| }, |
| { |
| "epoch": 0.4631881494583241, |
| "grad_norm": 2.5891048908233643, |
| "learning_rate": 3.017648521004226e-05, |
| "loss": 4.2137, |
| "step": 2095 |
| }, |
| { |
| "epoch": 0.4642936104355516, |
| "grad_norm": 2.5606133937835693, |
| "learning_rate": 3.0114342530449913e-05, |
| "loss": 4.3985, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.46539907141277914, |
| "grad_norm": 2.7957265377044678, |
| "learning_rate": 3.005219985085757e-05, |
| "loss": 4.395, |
| "step": 2105 |
| }, |
| { |
| "epoch": 0.4665045323900066, |
| "grad_norm": 2.593770742416382, |
| "learning_rate": 2.999005717126523e-05, |
| "loss": 4.4711, |
| "step": 2110 |
| }, |
| { |
| "epoch": 0.46760999336723413, |
| "grad_norm": 2.482818603515625, |
| "learning_rate": 2.992791449167288e-05, |
| "loss": 4.2323, |
| "step": 2115 |
| }, |
| { |
| "epoch": 0.46871545434446166, |
| "grad_norm": 2.972776174545288, |
| "learning_rate": 2.986577181208054e-05, |
| "loss": 4.3602, |
| "step": 2120 |
| }, |
| { |
| "epoch": 0.4698209153216891, |
| "grad_norm": 2.5987308025360107, |
| "learning_rate": 2.980362913248819e-05, |
| "loss": 4.5967, |
| "step": 2125 |
| }, |
| { |
| "epoch": 0.47092637629891665, |
| "grad_norm": 2.6634702682495117, |
| "learning_rate": 2.974148645289585e-05, |
| "loss": 4.1932, |
| "step": 2130 |
| }, |
| { |
| "epoch": 0.4720318372761442, |
| "grad_norm": 2.720262050628662, |
| "learning_rate": 2.967934377330351e-05, |
| "loss": 4.1392, |
| "step": 2135 |
| }, |
| { |
| "epoch": 0.47313729825337164, |
| "grad_norm": 2.9388368129730225, |
| "learning_rate": 2.9617201093711163e-05, |
| "loss": 4.2334, |
| "step": 2140 |
| }, |
| { |
| "epoch": 0.47424275923059916, |
| "grad_norm": 2.426968812942505, |
| "learning_rate": 2.955505841411882e-05, |
| "loss": 4.1942, |
| "step": 2145 |
| }, |
| { |
| "epoch": 0.4753482202078267, |
| "grad_norm": 2.53849458694458, |
| "learning_rate": 2.949291573452647e-05, |
| "loss": 4.4471, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.47645368118505416, |
| "grad_norm": 2.7019786834716797, |
| "learning_rate": 2.943077305493413e-05, |
| "loss": 4.2433, |
| "step": 2155 |
| }, |
| { |
| "epoch": 0.4775591421622817, |
| "grad_norm": 2.578589677810669, |
| "learning_rate": 2.9368630375341787e-05, |
| "loss": 4.2682, |
| "step": 2160 |
| }, |
| { |
| "epoch": 0.47866460313950915, |
| "grad_norm": 2.7424092292785645, |
| "learning_rate": 2.930648769574944e-05, |
| "loss": 4.4, |
| "step": 2165 |
| }, |
| { |
| "epoch": 0.47977006411673667, |
| "grad_norm": 2.6316614151000977, |
| "learning_rate": 2.92443450161571e-05, |
| "loss": 4.249, |
| "step": 2170 |
| }, |
| { |
| "epoch": 0.4808755250939642, |
| "grad_norm": 2.757974624633789, |
| "learning_rate": 2.9182202336564756e-05, |
| "loss": 4.3832, |
| "step": 2175 |
| }, |
| { |
| "epoch": 0.48198098607119166, |
| "grad_norm": 2.591416597366333, |
| "learning_rate": 2.912005965697241e-05, |
| "loss": 4.4295, |
| "step": 2180 |
| }, |
| { |
| "epoch": 0.4830864470484192, |
| "grad_norm": 2.576218605041504, |
| "learning_rate": 2.9057916977380068e-05, |
| "loss": 4.3352, |
| "step": 2185 |
| }, |
| { |
| "epoch": 0.4841919080256467, |
| "grad_norm": 2.5569541454315186, |
| "learning_rate": 2.899577429778772e-05, |
| "loss": 4.1921, |
| "step": 2190 |
| }, |
| { |
| "epoch": 0.4852973690028742, |
| "grad_norm": 2.489694118499756, |
| "learning_rate": 2.8933631618195377e-05, |
| "loss": 4.3463, |
| "step": 2195 |
| }, |
| { |
| "epoch": 0.4864028299801017, |
| "grad_norm": 2.486515522003174, |
| "learning_rate": 2.8871488938603037e-05, |
| "loss": 4.217, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.4875082909573292, |
| "grad_norm": 2.6798512935638428, |
| "learning_rate": 2.880934625901069e-05, |
| "loss": 4.3241, |
| "step": 2205 |
| }, |
| { |
| "epoch": 0.4886137519345567, |
| "grad_norm": 2.582374095916748, |
| "learning_rate": 2.8747203579418346e-05, |
| "loss": 4.3155, |
| "step": 2210 |
| }, |
| { |
| "epoch": 0.4897192129117842, |
| "grad_norm": 2.598309278488159, |
| "learning_rate": 2.8685060899826e-05, |
| "loss": 4.3281, |
| "step": 2215 |
| }, |
| { |
| "epoch": 0.49082467388901174, |
| "grad_norm": 2.5720064640045166, |
| "learning_rate": 2.8622918220233658e-05, |
| "loss": 4.3937, |
| "step": 2220 |
| }, |
| { |
| "epoch": 0.4919301348662392, |
| "grad_norm": 2.4057793617248535, |
| "learning_rate": 2.8560775540641317e-05, |
| "loss": 4.2625, |
| "step": 2225 |
| }, |
| { |
| "epoch": 0.49303559584346673, |
| "grad_norm": 2.5601112842559814, |
| "learning_rate": 2.8498632861048967e-05, |
| "loss": 4.2416, |
| "step": 2230 |
| }, |
| { |
| "epoch": 0.4941410568206942, |
| "grad_norm": 2.621948003768921, |
| "learning_rate": 2.8436490181456626e-05, |
| "loss": 4.439, |
| "step": 2235 |
| }, |
| { |
| "epoch": 0.4952465177979217, |
| "grad_norm": 2.5221333503723145, |
| "learning_rate": 2.837434750186428e-05, |
| "loss": 4.3375, |
| "step": 2240 |
| }, |
| { |
| "epoch": 0.49635197877514925, |
| "grad_norm": 2.555539608001709, |
| "learning_rate": 2.831220482227194e-05, |
| "loss": 4.3071, |
| "step": 2245 |
| }, |
| { |
| "epoch": 0.4974574397523767, |
| "grad_norm": 2.71470308303833, |
| "learning_rate": 2.8250062142679595e-05, |
| "loss": 4.3431, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.49856290072960424, |
| "grad_norm": 2.731353759765625, |
| "learning_rate": 2.8187919463087248e-05, |
| "loss": 4.4328, |
| "step": 2255 |
| }, |
| { |
| "epoch": 0.49966836170683177, |
| "grad_norm": 2.527031183242798, |
| "learning_rate": 2.8125776783494907e-05, |
| "loss": 4.3326, |
| "step": 2260 |
| }, |
| { |
| "epoch": 0.5007738226840592, |
| "grad_norm": 2.539781332015991, |
| "learning_rate": 2.8063634103902563e-05, |
| "loss": 4.3398, |
| "step": 2265 |
| }, |
| { |
| "epoch": 0.5018792836612868, |
| "grad_norm": 2.465778350830078, |
| "learning_rate": 2.8001491424310216e-05, |
| "loss": 4.1966, |
| "step": 2270 |
| }, |
| { |
| "epoch": 0.5029847446385143, |
| "grad_norm": 2.610877513885498, |
| "learning_rate": 2.7939348744717876e-05, |
| "loss": 4.4339, |
| "step": 2275 |
| }, |
| { |
| "epoch": 0.5040902056157418, |
| "grad_norm": 2.833237409591675, |
| "learning_rate": 2.787720606512553e-05, |
| "loss": 4.258, |
| "step": 2280 |
| }, |
| { |
| "epoch": 0.5051956665929692, |
| "grad_norm": 2.681429386138916, |
| "learning_rate": 2.7815063385533185e-05, |
| "loss": 4.3174, |
| "step": 2285 |
| }, |
| { |
| "epoch": 0.5063011275701967, |
| "grad_norm": 2.621767044067383, |
| "learning_rate": 2.7752920705940844e-05, |
| "loss": 4.3556, |
| "step": 2290 |
| }, |
| { |
| "epoch": 0.5074065885474243, |
| "grad_norm": 2.3988664150238037, |
| "learning_rate": 2.7690778026348497e-05, |
| "loss": 4.4304, |
| "step": 2295 |
| }, |
| { |
| "epoch": 0.5085120495246518, |
| "grad_norm": 2.6011765003204346, |
| "learning_rate": 2.7628635346756153e-05, |
| "loss": 4.3996, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.5096175105018793, |
| "grad_norm": 2.5418872833251953, |
| "learning_rate": 2.7566492667163806e-05, |
| "loss": 4.3227, |
| "step": 2305 |
| }, |
| { |
| "epoch": 0.5107229714791068, |
| "grad_norm": 2.7040741443634033, |
| "learning_rate": 2.7504349987571466e-05, |
| "loss": 4.3522, |
| "step": 2310 |
| }, |
| { |
| "epoch": 0.5118284324563342, |
| "grad_norm": 2.4782514572143555, |
| "learning_rate": 2.7442207307979122e-05, |
| "loss": 4.2093, |
| "step": 2315 |
| }, |
| { |
| "epoch": 0.5129338934335618, |
| "grad_norm": 2.709933042526245, |
| "learning_rate": 2.7380064628386775e-05, |
| "loss": 4.3424, |
| "step": 2320 |
| }, |
| { |
| "epoch": 0.5140393544107893, |
| "grad_norm": 3.0086729526519775, |
| "learning_rate": 2.7317921948794434e-05, |
| "loss": 4.5041, |
| "step": 2325 |
| }, |
| { |
| "epoch": 0.5151448153880168, |
| "grad_norm": 2.5372843742370605, |
| "learning_rate": 2.725577926920209e-05, |
| "loss": 4.3018, |
| "step": 2330 |
| }, |
| { |
| "epoch": 0.5162502763652443, |
| "grad_norm": 2.94974684715271, |
| "learning_rate": 2.7193636589609743e-05, |
| "loss": 4.2941, |
| "step": 2335 |
| }, |
| { |
| "epoch": 0.5173557373424718, |
| "grad_norm": 2.7399137020111084, |
| "learning_rate": 2.7131493910017403e-05, |
| "loss": 4.2627, |
| "step": 2340 |
| }, |
| { |
| "epoch": 0.5184611983196993, |
| "grad_norm": 2.6174683570861816, |
| "learning_rate": 2.7069351230425055e-05, |
| "loss": 4.2011, |
| "step": 2345 |
| }, |
| { |
| "epoch": 0.5195666592969268, |
| "grad_norm": 2.434396266937256, |
| "learning_rate": 2.7007208550832715e-05, |
| "loss": 4.2168, |
| "step": 2350 |
| }, |
| { |
| "epoch": 0.5206721202741543, |
| "grad_norm": 2.5760498046875, |
| "learning_rate": 2.694506587124037e-05, |
| "loss": 4.3722, |
| "step": 2355 |
| }, |
| { |
| "epoch": 0.5217775812513819, |
| "grad_norm": 2.616143226623535, |
| "learning_rate": 2.6882923191648024e-05, |
| "loss": 4.1671, |
| "step": 2360 |
| }, |
| { |
| "epoch": 0.5228830422286094, |
| "grad_norm": 2.406928539276123, |
| "learning_rate": 2.6820780512055683e-05, |
| "loss": 4.2319, |
| "step": 2365 |
| }, |
| { |
| "epoch": 0.5239885032058368, |
| "grad_norm": 2.4793832302093506, |
| "learning_rate": 2.6758637832463336e-05, |
| "loss": 4.2182, |
| "step": 2370 |
| }, |
| { |
| "epoch": 0.5250939641830643, |
| "grad_norm": 2.757474660873413, |
| "learning_rate": 2.6696495152870992e-05, |
| "loss": 4.4572, |
| "step": 2375 |
| }, |
| { |
| "epoch": 0.5261994251602918, |
| "grad_norm": 2.7199547290802, |
| "learning_rate": 2.6634352473278652e-05, |
| "loss": 4.2871, |
| "step": 2380 |
| }, |
| { |
| "epoch": 0.5273048861375194, |
| "grad_norm": 2.6695070266723633, |
| "learning_rate": 2.6572209793686305e-05, |
| "loss": 4.3649, |
| "step": 2385 |
| }, |
| { |
| "epoch": 0.5284103471147469, |
| "grad_norm": 2.5903425216674805, |
| "learning_rate": 2.651006711409396e-05, |
| "loss": 4.3604, |
| "step": 2390 |
| }, |
| { |
| "epoch": 0.5295158080919744, |
| "grad_norm": 2.871863842010498, |
| "learning_rate": 2.644792443450162e-05, |
| "loss": 4.2315, |
| "step": 2395 |
| }, |
| { |
| "epoch": 0.5306212690692018, |
| "grad_norm": 2.49452543258667, |
| "learning_rate": 2.6385781754909273e-05, |
| "loss": 4.3564, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.5317267300464293, |
| "grad_norm": 2.6567633152008057, |
| "learning_rate": 2.632363907531693e-05, |
| "loss": 4.2627, |
| "step": 2405 |
| }, |
| { |
| "epoch": 0.5328321910236569, |
| "grad_norm": 2.6986489295959473, |
| "learning_rate": 2.6261496395724582e-05, |
| "loss": 4.1613, |
| "step": 2410 |
| }, |
| { |
| "epoch": 0.5339376520008844, |
| "grad_norm": 2.942229986190796, |
| "learning_rate": 2.6199353716132242e-05, |
| "loss": 4.3428, |
| "step": 2415 |
| }, |
| { |
| "epoch": 0.5350431129781119, |
| "grad_norm": 2.7262582778930664, |
| "learning_rate": 2.6137211036539898e-05, |
| "loss": 4.273, |
| "step": 2420 |
| }, |
| { |
| "epoch": 0.5361485739553393, |
| "grad_norm": 2.6394593715667725, |
| "learning_rate": 2.607506835694755e-05, |
| "loss": 4.3921, |
| "step": 2425 |
| }, |
| { |
| "epoch": 0.5372540349325668, |
| "grad_norm": 2.6989800930023193, |
| "learning_rate": 2.601292567735521e-05, |
| "loss": 4.3518, |
| "step": 2430 |
| }, |
| { |
| "epoch": 0.5383594959097944, |
| "grad_norm": 2.593045711517334, |
| "learning_rate": 2.5950782997762863e-05, |
| "loss": 4.301, |
| "step": 2435 |
| }, |
| { |
| "epoch": 0.5394649568870219, |
| "grad_norm": 2.5254459381103516, |
| "learning_rate": 2.588864031817052e-05, |
| "loss": 4.209, |
| "step": 2440 |
| }, |
| { |
| "epoch": 0.5405704178642494, |
| "grad_norm": 2.765732526779175, |
| "learning_rate": 2.582649763857818e-05, |
| "loss": 4.2955, |
| "step": 2445 |
| }, |
| { |
| "epoch": 0.5416758788414769, |
| "grad_norm": 2.780750274658203, |
| "learning_rate": 2.576435495898583e-05, |
| "loss": 4.3846, |
| "step": 2450 |
| }, |
| { |
| "epoch": 0.5427813398187044, |
| "grad_norm": 2.811513662338257, |
| "learning_rate": 2.5702212279393488e-05, |
| "loss": 4.4617, |
| "step": 2455 |
| }, |
| { |
| "epoch": 0.5438868007959319, |
| "grad_norm": 2.5271966457366943, |
| "learning_rate": 2.564006959980114e-05, |
| "loss": 4.1798, |
| "step": 2460 |
| }, |
| { |
| "epoch": 0.5449922617731594, |
| "grad_norm": 2.721851348876953, |
| "learning_rate": 2.55779269202088e-05, |
| "loss": 4.2644, |
| "step": 2465 |
| }, |
| { |
| "epoch": 0.5460977227503869, |
| "grad_norm": 2.618861436843872, |
| "learning_rate": 2.551578424061646e-05, |
| "loss": 4.317, |
| "step": 2470 |
| }, |
| { |
| "epoch": 0.5472031837276145, |
| "grad_norm": 2.3622546195983887, |
| "learning_rate": 2.545364156102411e-05, |
| "loss": 4.4589, |
| "step": 2475 |
| }, |
| { |
| "epoch": 0.548308644704842, |
| "grad_norm": 2.5185422897338867, |
| "learning_rate": 2.539149888143177e-05, |
| "loss": 4.2975, |
| "step": 2480 |
| }, |
| { |
| "epoch": 0.5494141056820694, |
| "grad_norm": 2.54284930229187, |
| "learning_rate": 2.5329356201839428e-05, |
| "loss": 4.29, |
| "step": 2485 |
| }, |
| { |
| "epoch": 0.5505195666592969, |
| "grad_norm": 2.4982147216796875, |
| "learning_rate": 2.526721352224708e-05, |
| "loss": 4.3835, |
| "step": 2490 |
| }, |
| { |
| "epoch": 0.5516250276365244, |
| "grad_norm": 2.5386240482330322, |
| "learning_rate": 2.5205070842654737e-05, |
| "loss": 4.4286, |
| "step": 2495 |
| }, |
| { |
| "epoch": 0.552730488613752, |
| "grad_norm": 2.5726940631866455, |
| "learning_rate": 2.514292816306239e-05, |
| "loss": 4.3666, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.5538359495909795, |
| "grad_norm": 2.802129030227661, |
| "learning_rate": 2.508078548347005e-05, |
| "loss": 4.3205, |
| "step": 2505 |
| }, |
| { |
| "epoch": 0.5549414105682069, |
| "grad_norm": 2.713815212249756, |
| "learning_rate": 2.5018642803877706e-05, |
| "loss": 4.2775, |
| "step": 2510 |
| }, |
| { |
| "epoch": 0.5560468715454344, |
| "grad_norm": 2.597898244857788, |
| "learning_rate": 2.495650012428536e-05, |
| "loss": 4.2644, |
| "step": 2515 |
| }, |
| { |
| "epoch": 0.5571523325226619, |
| "grad_norm": 2.6316134929656982, |
| "learning_rate": 2.4894357444693018e-05, |
| "loss": 4.3634, |
| "step": 2520 |
| }, |
| { |
| "epoch": 0.5582577934998895, |
| "grad_norm": 2.663684129714966, |
| "learning_rate": 2.4832214765100674e-05, |
| "loss": 4.2632, |
| "step": 2525 |
| }, |
| { |
| "epoch": 0.559363254477117, |
| "grad_norm": 2.669243574142456, |
| "learning_rate": 2.4770072085508327e-05, |
| "loss": 4.3728, |
| "step": 2530 |
| }, |
| { |
| "epoch": 0.5604687154543445, |
| "grad_norm": 2.6854679584503174, |
| "learning_rate": 2.4707929405915983e-05, |
| "loss": 4.2938, |
| "step": 2535 |
| }, |
| { |
| "epoch": 0.5615741764315719, |
| "grad_norm": 2.625131130218506, |
| "learning_rate": 2.4645786726323643e-05, |
| "loss": 4.3859, |
| "step": 2540 |
| }, |
| { |
| "epoch": 0.5626796374087994, |
| "grad_norm": 2.6042797565460205, |
| "learning_rate": 2.4583644046731296e-05, |
| "loss": 4.2591, |
| "step": 2545 |
| }, |
| { |
| "epoch": 0.563785098386027, |
| "grad_norm": 2.763540267944336, |
| "learning_rate": 2.452150136713895e-05, |
| "loss": 4.2657, |
| "step": 2550 |
| }, |
| { |
| "epoch": 0.5648905593632545, |
| "grad_norm": 2.8229899406433105, |
| "learning_rate": 2.4459358687546608e-05, |
| "loss": 4.3078, |
| "step": 2555 |
| }, |
| { |
| "epoch": 0.565996020340482, |
| "grad_norm": 2.8097963333129883, |
| "learning_rate": 2.4397216007954264e-05, |
| "loss": 4.3871, |
| "step": 2560 |
| }, |
| { |
| "epoch": 0.5671014813177094, |
| "grad_norm": 2.6240086555480957, |
| "learning_rate": 2.433507332836192e-05, |
| "loss": 4.286, |
| "step": 2565 |
| }, |
| { |
| "epoch": 0.568206942294937, |
| "grad_norm": 2.685115098953247, |
| "learning_rate": 2.4272930648769576e-05, |
| "loss": 4.2783, |
| "step": 2570 |
| }, |
| { |
| "epoch": 0.5693124032721645, |
| "grad_norm": 2.697061538696289, |
| "learning_rate": 2.4210787969177233e-05, |
| "loss": 4.4211, |
| "step": 2575 |
| }, |
| { |
| "epoch": 0.570417864249392, |
| "grad_norm": 2.8929386138916016, |
| "learning_rate": 2.4148645289584885e-05, |
| "loss": 4.3608, |
| "step": 2580 |
| }, |
| { |
| "epoch": 0.5715233252266195, |
| "grad_norm": 2.6032614707946777, |
| "learning_rate": 2.4086502609992545e-05, |
| "loss": 4.2024, |
| "step": 2585 |
| }, |
| { |
| "epoch": 0.572628786203847, |
| "grad_norm": 2.629255533218384, |
| "learning_rate": 2.40243599304002e-05, |
| "loss": 4.4302, |
| "step": 2590 |
| }, |
| { |
| "epoch": 0.5737342471810745, |
| "grad_norm": 2.5833659172058105, |
| "learning_rate": 2.3962217250807857e-05, |
| "loss": 4.372, |
| "step": 2595 |
| }, |
| { |
| "epoch": 0.574839708158302, |
| "grad_norm": 2.425273895263672, |
| "learning_rate": 2.390007457121551e-05, |
| "loss": 4.2089, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.5759451691355295, |
| "grad_norm": 2.651646375656128, |
| "learning_rate": 2.383793189162317e-05, |
| "loss": 4.2374, |
| "step": 2605 |
| }, |
| { |
| "epoch": 0.577050630112757, |
| "grad_norm": 2.894827365875244, |
| "learning_rate": 2.3775789212030826e-05, |
| "loss": 4.105, |
| "step": 2610 |
| }, |
| { |
| "epoch": 0.5781560910899846, |
| "grad_norm": 2.646923780441284, |
| "learning_rate": 2.371364653243848e-05, |
| "loss": 4.3908, |
| "step": 2615 |
| }, |
| { |
| "epoch": 0.5792615520672121, |
| "grad_norm": 2.8050379753112793, |
| "learning_rate": 2.3651503852846135e-05, |
| "loss": 4.3573, |
| "step": 2620 |
| }, |
| { |
| "epoch": 0.5803670130444395, |
| "grad_norm": 2.8766565322875977, |
| "learning_rate": 2.358936117325379e-05, |
| "loss": 4.2688, |
| "step": 2625 |
| }, |
| { |
| "epoch": 0.581472474021667, |
| "grad_norm": 2.452597141265869, |
| "learning_rate": 2.3527218493661447e-05, |
| "loss": 4.3922, |
| "step": 2630 |
| }, |
| { |
| "epoch": 0.5825779349988945, |
| "grad_norm": 2.8422110080718994, |
| "learning_rate": 2.3465075814069103e-05, |
| "loss": 4.3008, |
| "step": 2635 |
| }, |
| { |
| "epoch": 0.5836833959761221, |
| "grad_norm": 2.661015033721924, |
| "learning_rate": 2.340293313447676e-05, |
| "loss": 4.2432, |
| "step": 2640 |
| }, |
| { |
| "epoch": 0.5847888569533496, |
| "grad_norm": 2.7962839603424072, |
| "learning_rate": 2.3340790454884416e-05, |
| "loss": 4.4387, |
| "step": 2645 |
| }, |
| { |
| "epoch": 0.585894317930577, |
| "grad_norm": 2.807640552520752, |
| "learning_rate": 2.3278647775292072e-05, |
| "loss": 4.3026, |
| "step": 2650 |
| }, |
| { |
| "epoch": 0.5869997789078045, |
| "grad_norm": 2.77174711227417, |
| "learning_rate": 2.3216505095699728e-05, |
| "loss": 4.3376, |
| "step": 2655 |
| }, |
| { |
| "epoch": 0.588105239885032, |
| "grad_norm": 2.6385319232940674, |
| "learning_rate": 2.3154362416107384e-05, |
| "loss": 4.211, |
| "step": 2660 |
| }, |
| { |
| "epoch": 0.5892107008622596, |
| "grad_norm": 2.464839458465576, |
| "learning_rate": 2.309221973651504e-05, |
| "loss": 4.1263, |
| "step": 2665 |
| }, |
| { |
| "epoch": 0.5903161618394871, |
| "grad_norm": 2.5542917251586914, |
| "learning_rate": 2.3030077056922693e-05, |
| "loss": 4.281, |
| "step": 2670 |
| }, |
| { |
| "epoch": 0.5914216228167146, |
| "grad_norm": 2.796891450881958, |
| "learning_rate": 2.2967934377330353e-05, |
| "loss": 4.2626, |
| "step": 2675 |
| }, |
| { |
| "epoch": 0.592527083793942, |
| "grad_norm": 2.6826398372650146, |
| "learning_rate": 2.290579169773801e-05, |
| "loss": 4.1999, |
| "step": 2680 |
| }, |
| { |
| "epoch": 0.5936325447711696, |
| "grad_norm": 2.77254581451416, |
| "learning_rate": 2.284364901814566e-05, |
| "loss": 4.3298, |
| "step": 2685 |
| }, |
| { |
| "epoch": 0.5947380057483971, |
| "grad_norm": 2.6188175678253174, |
| "learning_rate": 2.2781506338553318e-05, |
| "loss": 4.2272, |
| "step": 2690 |
| }, |
| { |
| "epoch": 0.5958434667256246, |
| "grad_norm": 2.374133825302124, |
| "learning_rate": 2.2719363658960977e-05, |
| "loss": 4.425, |
| "step": 2695 |
| }, |
| { |
| "epoch": 0.5969489277028521, |
| "grad_norm": 2.516446352005005, |
| "learning_rate": 2.2657220979368633e-05, |
| "loss": 4.3096, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.5980543886800795, |
| "grad_norm": 2.5473289489746094, |
| "learning_rate": 2.2595078299776286e-05, |
| "loss": 4.3916, |
| "step": 2705 |
| }, |
| { |
| "epoch": 0.5991598496573071, |
| "grad_norm": 2.9763638973236084, |
| "learning_rate": 2.2532935620183942e-05, |
| "loss": 4.2488, |
| "step": 2710 |
| }, |
| { |
| "epoch": 0.6002653106345346, |
| "grad_norm": 2.831369161605835, |
| "learning_rate": 2.2470792940591602e-05, |
| "loss": 4.4136, |
| "step": 2715 |
| }, |
| { |
| "epoch": 0.6013707716117621, |
| "grad_norm": 2.77677845954895, |
| "learning_rate": 2.2408650260999255e-05, |
| "loss": 4.3703, |
| "step": 2720 |
| }, |
| { |
| "epoch": 0.6024762325889896, |
| "grad_norm": 3.102226972579956, |
| "learning_rate": 2.234650758140691e-05, |
| "loss": 4.389, |
| "step": 2725 |
| }, |
| { |
| "epoch": 0.6035816935662172, |
| "grad_norm": 2.694725275039673, |
| "learning_rate": 2.2284364901814567e-05, |
| "loss": 4.3748, |
| "step": 2730 |
| }, |
| { |
| "epoch": 0.6046871545434446, |
| "grad_norm": 2.628998041152954, |
| "learning_rate": 2.2222222222222223e-05, |
| "loss": 4.2702, |
| "step": 2735 |
| }, |
| { |
| "epoch": 0.6057926155206721, |
| "grad_norm": 2.5050158500671387, |
| "learning_rate": 2.216007954262988e-05, |
| "loss": 4.4498, |
| "step": 2740 |
| }, |
| { |
| "epoch": 0.6068980764978996, |
| "grad_norm": 3.0304501056671143, |
| "learning_rate": 2.2097936863037536e-05, |
| "loss": 4.2093, |
| "step": 2745 |
| }, |
| { |
| "epoch": 0.6080035374751271, |
| "grad_norm": 2.7480475902557373, |
| "learning_rate": 2.2035794183445192e-05, |
| "loss": 4.452, |
| "step": 2750 |
| }, |
| { |
| "epoch": 0.6091089984523547, |
| "grad_norm": 2.5752625465393066, |
| "learning_rate": 2.1973651503852845e-05, |
| "loss": 4.1986, |
| "step": 2755 |
| }, |
| { |
| "epoch": 0.6102144594295822, |
| "grad_norm": 2.9249074459075928, |
| "learning_rate": 2.1911508824260504e-05, |
| "loss": 4.2884, |
| "step": 2760 |
| }, |
| { |
| "epoch": 0.6113199204068096, |
| "grad_norm": 2.565080165863037, |
| "learning_rate": 2.184936614466816e-05, |
| "loss": 4.3698, |
| "step": 2765 |
| }, |
| { |
| "epoch": 0.6124253813840371, |
| "grad_norm": 2.9593536853790283, |
| "learning_rate": 2.1787223465075816e-05, |
| "loss": 4.4363, |
| "step": 2770 |
| }, |
| { |
| "epoch": 0.6135308423612647, |
| "grad_norm": 2.698092460632324, |
| "learning_rate": 2.172508078548347e-05, |
| "loss": 4.4131, |
| "step": 2775 |
| }, |
| { |
| "epoch": 0.6146363033384922, |
| "grad_norm": 2.6179697513580322, |
| "learning_rate": 2.1662938105891125e-05, |
| "loss": 4.2489, |
| "step": 2780 |
| }, |
| { |
| "epoch": 0.6157417643157197, |
| "grad_norm": 2.7725419998168945, |
| "learning_rate": 2.1600795426298785e-05, |
| "loss": 4.3455, |
| "step": 2785 |
| }, |
| { |
| "epoch": 0.6168472252929471, |
| "grad_norm": 2.5519633293151855, |
| "learning_rate": 2.1538652746706438e-05, |
| "loss": 4.3074, |
| "step": 2790 |
| }, |
| { |
| "epoch": 0.6179526862701746, |
| "grad_norm": 2.6183152198791504, |
| "learning_rate": 2.1476510067114094e-05, |
| "loss": 4.3562, |
| "step": 2795 |
| }, |
| { |
| "epoch": 0.6190581472474022, |
| "grad_norm": 2.5165317058563232, |
| "learning_rate": 2.141436738752175e-05, |
| "loss": 4.2388, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.6201636082246297, |
| "grad_norm": 2.813973903656006, |
| "learning_rate": 2.1352224707929406e-05, |
| "loss": 4.2732, |
| "step": 2805 |
| }, |
| { |
| "epoch": 0.6212690692018572, |
| "grad_norm": 2.489633798599243, |
| "learning_rate": 2.1290082028337062e-05, |
| "loss": 4.183, |
| "step": 2810 |
| }, |
| { |
| "epoch": 0.6223745301790847, |
| "grad_norm": 2.606971502304077, |
| "learning_rate": 2.122793934874472e-05, |
| "loss": 4.3127, |
| "step": 2815 |
| }, |
| { |
| "epoch": 0.6234799911563121, |
| "grad_norm": 2.74040150642395, |
| "learning_rate": 2.1165796669152375e-05, |
| "loss": 4.3576, |
| "step": 2820 |
| }, |
| { |
| "epoch": 0.6245854521335397, |
| "grad_norm": 2.814483642578125, |
| "learning_rate": 2.110365398956003e-05, |
| "loss": 4.345, |
| "step": 2825 |
| }, |
| { |
| "epoch": 0.6256909131107672, |
| "grad_norm": 2.4296274185180664, |
| "learning_rate": 2.1041511309967687e-05, |
| "loss": 4.2154, |
| "step": 2830 |
| }, |
| { |
| "epoch": 0.6267963740879947, |
| "grad_norm": 3.018310785293579, |
| "learning_rate": 2.0979368630375343e-05, |
| "loss": 4.2779, |
| "step": 2835 |
| }, |
| { |
| "epoch": 0.6279018350652222, |
| "grad_norm": 2.85764741897583, |
| "learning_rate": 2.0917225950783e-05, |
| "loss": 4.2533, |
| "step": 2840 |
| }, |
| { |
| "epoch": 0.6290072960424496, |
| "grad_norm": 2.690497398376465, |
| "learning_rate": 2.0855083271190652e-05, |
| "loss": 4.3148, |
| "step": 2845 |
| }, |
| { |
| "epoch": 0.6301127570196772, |
| "grad_norm": 2.5241053104400635, |
| "learning_rate": 2.0792940591598312e-05, |
| "loss": 4.3019, |
| "step": 2850 |
| }, |
| { |
| "epoch": 0.6312182179969047, |
| "grad_norm": 2.63004732131958, |
| "learning_rate": 2.0730797912005968e-05, |
| "loss": 4.3274, |
| "step": 2855 |
| }, |
| { |
| "epoch": 0.6323236789741322, |
| "grad_norm": 2.6619880199432373, |
| "learning_rate": 2.066865523241362e-05, |
| "loss": 4.4063, |
| "step": 2860 |
| }, |
| { |
| "epoch": 0.6334291399513597, |
| "grad_norm": 2.918989419937134, |
| "learning_rate": 2.0606512552821277e-05, |
| "loss": 4.3446, |
| "step": 2865 |
| }, |
| { |
| "epoch": 0.6345346009285873, |
| "grad_norm": 2.6898226737976074, |
| "learning_rate": 2.0544369873228937e-05, |
| "loss": 4.3895, |
| "step": 2870 |
| }, |
| { |
| "epoch": 0.6356400619058147, |
| "grad_norm": 2.659388542175293, |
| "learning_rate": 2.0482227193636593e-05, |
| "loss": 4.2844, |
| "step": 2875 |
| }, |
| { |
| "epoch": 0.6367455228830422, |
| "grad_norm": 2.9145493507385254, |
| "learning_rate": 2.0420084514044246e-05, |
| "loss": 4.234, |
| "step": 2880 |
| }, |
| { |
| "epoch": 0.6378509838602697, |
| "grad_norm": 2.542527198791504, |
| "learning_rate": 2.03579418344519e-05, |
| "loss": 4.2848, |
| "step": 2885 |
| }, |
| { |
| "epoch": 0.6389564448374973, |
| "grad_norm": 2.690652847290039, |
| "learning_rate": 2.0295799154859558e-05, |
| "loss": 4.2601, |
| "step": 2890 |
| }, |
| { |
| "epoch": 0.6400619058147248, |
| "grad_norm": 2.74469256401062, |
| "learning_rate": 2.0233656475267214e-05, |
| "loss": 4.2875, |
| "step": 2895 |
| }, |
| { |
| "epoch": 0.6411673667919523, |
| "grad_norm": 2.5279908180236816, |
| "learning_rate": 2.017151379567487e-05, |
| "loss": 4.3336, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.6422728277691797, |
| "grad_norm": 2.6275908946990967, |
| "learning_rate": 2.0109371116082526e-05, |
| "loss": 4.3125, |
| "step": 2905 |
| }, |
| { |
| "epoch": 0.6433782887464072, |
| "grad_norm": 2.629896879196167, |
| "learning_rate": 2.0047228436490183e-05, |
| "loss": 4.3233, |
| "step": 2910 |
| }, |
| { |
| "epoch": 0.6444837497236348, |
| "grad_norm": 2.8916358947753906, |
| "learning_rate": 1.998508575689784e-05, |
| "loss": 4.2835, |
| "step": 2915 |
| }, |
| { |
| "epoch": 0.6455892107008623, |
| "grad_norm": 2.6450507640838623, |
| "learning_rate": 1.9922943077305495e-05, |
| "loss": 4.3504, |
| "step": 2920 |
| }, |
| { |
| "epoch": 0.6466946716780898, |
| "grad_norm": 2.617589235305786, |
| "learning_rate": 1.986080039771315e-05, |
| "loss": 4.4431, |
| "step": 2925 |
| }, |
| { |
| "epoch": 0.6478001326553172, |
| "grad_norm": 2.4875051975250244, |
| "learning_rate": 1.9798657718120804e-05, |
| "loss": 4.3341, |
| "step": 2930 |
| }, |
| { |
| "epoch": 0.6489055936325447, |
| "grad_norm": 2.5593132972717285, |
| "learning_rate": 1.9736515038528463e-05, |
| "loss": 4.335, |
| "step": 2935 |
| }, |
| { |
| "epoch": 0.6500110546097723, |
| "grad_norm": 2.687657594680786, |
| "learning_rate": 1.967437235893612e-05, |
| "loss": 4.3632, |
| "step": 2940 |
| }, |
| { |
| "epoch": 0.6511165155869998, |
| "grad_norm": 2.605257987976074, |
| "learning_rate": 1.9612229679343776e-05, |
| "loss": 4.3999, |
| "step": 2945 |
| }, |
| { |
| "epoch": 0.6522219765642273, |
| "grad_norm": 2.3589608669281006, |
| "learning_rate": 1.955008699975143e-05, |
| "loss": 4.2815, |
| "step": 2950 |
| }, |
| { |
| "epoch": 0.6533274375414548, |
| "grad_norm": 2.8207266330718994, |
| "learning_rate": 1.9487944320159085e-05, |
| "loss": 4.2614, |
| "step": 2955 |
| }, |
| { |
| "epoch": 0.6544328985186822, |
| "grad_norm": 2.7098288536071777, |
| "learning_rate": 1.9425801640566744e-05, |
| "loss": 4.2278, |
| "step": 2960 |
| }, |
| { |
| "epoch": 0.6555383594959098, |
| "grad_norm": 2.819708824157715, |
| "learning_rate": 1.9363658960974397e-05, |
| "loss": 4.22, |
| "step": 2965 |
| }, |
| { |
| "epoch": 0.6566438204731373, |
| "grad_norm": 2.7340097427368164, |
| "learning_rate": 1.9301516281382053e-05, |
| "loss": 4.2767, |
| "step": 2970 |
| }, |
| { |
| "epoch": 0.6577492814503648, |
| "grad_norm": 2.6747171878814697, |
| "learning_rate": 1.923937360178971e-05, |
| "loss": 4.3268, |
| "step": 2975 |
| }, |
| { |
| "epoch": 0.6588547424275923, |
| "grad_norm": 2.5896904468536377, |
| "learning_rate": 1.917723092219737e-05, |
| "loss": 4.309, |
| "step": 2980 |
| }, |
| { |
| "epoch": 0.6599602034048198, |
| "grad_norm": 2.6400575637817383, |
| "learning_rate": 1.9115088242605022e-05, |
| "loss": 4.2878, |
| "step": 2985 |
| }, |
| { |
| "epoch": 0.6610656643820473, |
| "grad_norm": 2.62795352935791, |
| "learning_rate": 1.9052945563012678e-05, |
| "loss": 4.3861, |
| "step": 2990 |
| }, |
| { |
| "epoch": 0.6621711253592748, |
| "grad_norm": 2.7335047721862793, |
| "learning_rate": 1.8990802883420334e-05, |
| "loss": 4.2773, |
| "step": 2995 |
| }, |
| { |
| "epoch": 0.6632765863365023, |
| "grad_norm": 2.781811237335205, |
| "learning_rate": 1.892866020382799e-05, |
| "loss": 4.3049, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.6643820473137299, |
| "grad_norm": 2.65694522857666, |
| "learning_rate": 1.8866517524235646e-05, |
| "loss": 4.2534, |
| "step": 3005 |
| }, |
| { |
| "epoch": 0.6654875082909574, |
| "grad_norm": 2.611654043197632, |
| "learning_rate": 1.8804374844643303e-05, |
| "loss": 4.2397, |
| "step": 3010 |
| }, |
| { |
| "epoch": 0.6665929692681848, |
| "grad_norm": 2.759890079498291, |
| "learning_rate": 1.874223216505096e-05, |
| "loss": 4.1524, |
| "step": 3015 |
| }, |
| { |
| "epoch": 0.6676984302454123, |
| "grad_norm": 2.7549400329589844, |
| "learning_rate": 1.868008948545861e-05, |
| "loss": 4.2703, |
| "step": 3020 |
| }, |
| { |
| "epoch": 0.6688038912226398, |
| "grad_norm": 2.606306552886963, |
| "learning_rate": 1.861794680586627e-05, |
| "loss": 4.2695, |
| "step": 3025 |
| }, |
| { |
| "epoch": 0.6699093521998674, |
| "grad_norm": 3.0413312911987305, |
| "learning_rate": 1.8555804126273927e-05, |
| "loss": 4.5286, |
| "step": 3030 |
| }, |
| { |
| "epoch": 0.6710148131770949, |
| "grad_norm": 2.6322450637817383, |
| "learning_rate": 1.849366144668158e-05, |
| "loss": 4.3509, |
| "step": 3035 |
| }, |
| { |
| "epoch": 0.6721202741543224, |
| "grad_norm": 2.7126147747039795, |
| "learning_rate": 1.8431518767089236e-05, |
| "loss": 4.502, |
| "step": 3040 |
| }, |
| { |
| "epoch": 0.6732257351315498, |
| "grad_norm": 2.5845155715942383, |
| "learning_rate": 1.8369376087496896e-05, |
| "loss": 4.4788, |
| "step": 3045 |
| }, |
| { |
| "epoch": 0.6743311961087773, |
| "grad_norm": 2.713156223297119, |
| "learning_rate": 1.8307233407904552e-05, |
| "loss": 4.4627, |
| "step": 3050 |
| }, |
| { |
| "epoch": 0.6754366570860049, |
| "grad_norm": 2.5280685424804688, |
| "learning_rate": 1.8245090728312205e-05, |
| "loss": 4.3126, |
| "step": 3055 |
| }, |
| { |
| "epoch": 0.6765421180632324, |
| "grad_norm": 2.6877503395080566, |
| "learning_rate": 1.818294804871986e-05, |
| "loss": 4.4045, |
| "step": 3060 |
| }, |
| { |
| "epoch": 0.6776475790404599, |
| "grad_norm": 2.5872035026550293, |
| "learning_rate": 1.8120805369127517e-05, |
| "loss": 4.4283, |
| "step": 3065 |
| }, |
| { |
| "epoch": 0.6787530400176873, |
| "grad_norm": 2.494570255279541, |
| "learning_rate": 1.8058662689535173e-05, |
| "loss": 4.3445, |
| "step": 3070 |
| }, |
| { |
| "epoch": 0.6798585009949148, |
| "grad_norm": 2.8552112579345703, |
| "learning_rate": 1.799652000994283e-05, |
| "loss": 4.2656, |
| "step": 3075 |
| }, |
| { |
| "epoch": 0.6809639619721424, |
| "grad_norm": 2.528190851211548, |
| "learning_rate": 1.7934377330350486e-05, |
| "loss": 4.2317, |
| "step": 3080 |
| }, |
| { |
| "epoch": 0.6820694229493699, |
| "grad_norm": 2.6249637603759766, |
| "learning_rate": 1.7872234650758142e-05, |
| "loss": 4.4084, |
| "step": 3085 |
| }, |
| { |
| "epoch": 0.6831748839265974, |
| "grad_norm": 2.8214519023895264, |
| "learning_rate": 1.7810091971165798e-05, |
| "loss": 4.4469, |
| "step": 3090 |
| }, |
| { |
| "epoch": 0.6842803449038249, |
| "grad_norm": 3.1400296688079834, |
| "learning_rate": 1.7747949291573454e-05, |
| "loss": 4.4882, |
| "step": 3095 |
| }, |
| { |
| "epoch": 0.6853858058810524, |
| "grad_norm": 2.7912092208862305, |
| "learning_rate": 1.768580661198111e-05, |
| "loss": 4.2987, |
| "step": 3100 |
| }, |
| { |
| "epoch": 0.6864912668582799, |
| "grad_norm": 2.444261312484741, |
| "learning_rate": 1.7623663932388766e-05, |
| "loss": 4.3692, |
| "step": 3105 |
| }, |
| { |
| "epoch": 0.6875967278355074, |
| "grad_norm": 2.8983335494995117, |
| "learning_rate": 1.756152125279642e-05, |
| "loss": 4.2532, |
| "step": 3110 |
| }, |
| { |
| "epoch": 0.6887021888127349, |
| "grad_norm": 2.8009955883026123, |
| "learning_rate": 1.749937857320408e-05, |
| "loss": 4.343, |
| "step": 3115 |
| }, |
| { |
| "epoch": 0.6898076497899625, |
| "grad_norm": 2.664306640625, |
| "learning_rate": 1.7437235893611735e-05, |
| "loss": 4.3392, |
| "step": 3120 |
| }, |
| { |
| "epoch": 0.6909131107671899, |
| "grad_norm": 2.744086742401123, |
| "learning_rate": 1.7375093214019388e-05, |
| "loss": 4.5081, |
| "step": 3125 |
| }, |
| { |
| "epoch": 0.6920185717444174, |
| "grad_norm": 2.5243453979492188, |
| "learning_rate": 1.7312950534427044e-05, |
| "loss": 4.138, |
| "step": 3130 |
| }, |
| { |
| "epoch": 0.6931240327216449, |
| "grad_norm": 2.879436492919922, |
| "learning_rate": 1.7250807854834704e-05, |
| "loss": 4.3065, |
| "step": 3135 |
| }, |
| { |
| "epoch": 0.6942294936988724, |
| "grad_norm": 2.766604423522949, |
| "learning_rate": 1.7188665175242356e-05, |
| "loss": 4.3584, |
| "step": 3140 |
| }, |
| { |
| "epoch": 0.6953349546761, |
| "grad_norm": 2.644548177719116, |
| "learning_rate": 1.7126522495650012e-05, |
| "loss": 4.2898, |
| "step": 3145 |
| }, |
| { |
| "epoch": 0.6964404156533275, |
| "grad_norm": 2.6209113597869873, |
| "learning_rate": 1.706437981605767e-05, |
| "loss": 4.273, |
| "step": 3150 |
| }, |
| { |
| "epoch": 0.6975458766305549, |
| "grad_norm": 2.7458090782165527, |
| "learning_rate": 1.7002237136465328e-05, |
| "loss": 4.3472, |
| "step": 3155 |
| }, |
| { |
| "epoch": 0.6986513376077824, |
| "grad_norm": 2.5772080421447754, |
| "learning_rate": 1.694009445687298e-05, |
| "loss": 4.4346, |
| "step": 3160 |
| }, |
| { |
| "epoch": 0.6997567985850099, |
| "grad_norm": 2.7952399253845215, |
| "learning_rate": 1.6877951777280637e-05, |
| "loss": 4.3793, |
| "step": 3165 |
| }, |
| { |
| "epoch": 0.7008622595622375, |
| "grad_norm": 2.724113702774048, |
| "learning_rate": 1.6815809097688293e-05, |
| "loss": 4.2947, |
| "step": 3170 |
| }, |
| { |
| "epoch": 0.701967720539465, |
| "grad_norm": 2.809077262878418, |
| "learning_rate": 1.675366641809595e-05, |
| "loss": 4.4637, |
| "step": 3175 |
| }, |
| { |
| "epoch": 0.7030731815166925, |
| "grad_norm": 2.6896934509277344, |
| "learning_rate": 1.6691523738503606e-05, |
| "loss": 4.2131, |
| "step": 3180 |
| }, |
| { |
| "epoch": 0.7041786424939199, |
| "grad_norm": 2.823146343231201, |
| "learning_rate": 1.6629381058911262e-05, |
| "loss": 4.2319, |
| "step": 3185 |
| }, |
| { |
| "epoch": 0.7052841034711474, |
| "grad_norm": 2.5893144607543945, |
| "learning_rate": 1.6567238379318918e-05, |
| "loss": 4.3153, |
| "step": 3190 |
| }, |
| { |
| "epoch": 0.706389564448375, |
| "grad_norm": 2.8390941619873047, |
| "learning_rate": 1.650509569972657e-05, |
| "loss": 4.2297, |
| "step": 3195 |
| }, |
| { |
| "epoch": 0.7074950254256025, |
| "grad_norm": 2.496361255645752, |
| "learning_rate": 1.644295302013423e-05, |
| "loss": 4.4646, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.70860048640283, |
| "grad_norm": 2.776575803756714, |
| "learning_rate": 1.6380810340541887e-05, |
| "loss": 4.5525, |
| "step": 3205 |
| }, |
| { |
| "epoch": 0.7097059473800574, |
| "grad_norm": 2.6303658485412598, |
| "learning_rate": 1.631866766094954e-05, |
| "loss": 4.3819, |
| "step": 3210 |
| }, |
| { |
| "epoch": 0.710811408357285, |
| "grad_norm": 2.4757165908813477, |
| "learning_rate": 1.6256524981357195e-05, |
| "loss": 4.2136, |
| "step": 3215 |
| }, |
| { |
| "epoch": 0.7119168693345125, |
| "grad_norm": 2.7062437534332275, |
| "learning_rate": 1.619438230176485e-05, |
| "loss": 4.3914, |
| "step": 3220 |
| }, |
| { |
| "epoch": 0.71302233031174, |
| "grad_norm": 2.7044432163238525, |
| "learning_rate": 1.613223962217251e-05, |
| "loss": 4.3731, |
| "step": 3225 |
| }, |
| { |
| "epoch": 0.7141277912889675, |
| "grad_norm": 2.7421531677246094, |
| "learning_rate": 1.6070096942580164e-05, |
| "loss": 4.4874, |
| "step": 3230 |
| }, |
| { |
| "epoch": 0.715233252266195, |
| "grad_norm": 2.770270347595215, |
| "learning_rate": 1.600795426298782e-05, |
| "loss": 4.2702, |
| "step": 3235 |
| }, |
| { |
| "epoch": 0.7163387132434225, |
| "grad_norm": 2.617872714996338, |
| "learning_rate": 1.5945811583395476e-05, |
| "loss": 4.3877, |
| "step": 3240 |
| }, |
| { |
| "epoch": 0.71744417422065, |
| "grad_norm": 2.5779149532318115, |
| "learning_rate": 1.5883668903803133e-05, |
| "loss": 4.2644, |
| "step": 3245 |
| }, |
| { |
| "epoch": 0.7185496351978775, |
| "grad_norm": 2.465280771255493, |
| "learning_rate": 1.582152622421079e-05, |
| "loss": 4.2764, |
| "step": 3250 |
| }, |
| { |
| "epoch": 0.719655096175105, |
| "grad_norm": 2.6684722900390625, |
| "learning_rate": 1.5759383544618445e-05, |
| "loss": 4.4445, |
| "step": 3255 |
| }, |
| { |
| "epoch": 0.7207605571523326, |
| "grad_norm": 2.7769546508789062, |
| "learning_rate": 1.56972408650261e-05, |
| "loss": 4.3571, |
| "step": 3260 |
| }, |
| { |
| "epoch": 0.72186601812956, |
| "grad_norm": 2.58829402923584, |
| "learning_rate": 1.5635098185433757e-05, |
| "loss": 4.2226, |
| "step": 3265 |
| }, |
| { |
| "epoch": 0.7229714791067875, |
| "grad_norm": 2.5519750118255615, |
| "learning_rate": 1.5572955505841413e-05, |
| "loss": 4.4029, |
| "step": 3270 |
| }, |
| { |
| "epoch": 0.724076940084015, |
| "grad_norm": 2.6074788570404053, |
| "learning_rate": 1.551081282624907e-05, |
| "loss": 4.0522, |
| "step": 3275 |
| }, |
| { |
| "epoch": 0.7251824010612425, |
| "grad_norm": 2.721590042114258, |
| "learning_rate": 1.5448670146656726e-05, |
| "loss": 4.1492, |
| "step": 3280 |
| }, |
| { |
| "epoch": 0.7262878620384701, |
| "grad_norm": 2.80806827545166, |
| "learning_rate": 1.538652746706438e-05, |
| "loss": 4.412, |
| "step": 3285 |
| }, |
| { |
| "epoch": 0.7273933230156976, |
| "grad_norm": 2.87967848777771, |
| "learning_rate": 1.5324384787472038e-05, |
| "loss": 4.3851, |
| "step": 3290 |
| }, |
| { |
| "epoch": 0.728498783992925, |
| "grad_norm": 2.5552468299865723, |
| "learning_rate": 1.5262242107879694e-05, |
| "loss": 4.2578, |
| "step": 3295 |
| }, |
| { |
| "epoch": 0.7296042449701525, |
| "grad_norm": 2.6064484119415283, |
| "learning_rate": 1.5200099428287349e-05, |
| "loss": 4.4176, |
| "step": 3300 |
| }, |
| { |
| "epoch": 0.73070970594738, |
| "grad_norm": 2.6501288414001465, |
| "learning_rate": 1.5137956748695003e-05, |
| "loss": 4.2782, |
| "step": 3305 |
| }, |
| { |
| "epoch": 0.7318151669246076, |
| "grad_norm": 2.7041335105895996, |
| "learning_rate": 1.5075814069102661e-05, |
| "loss": 4.4355, |
| "step": 3310 |
| }, |
| { |
| "epoch": 0.7329206279018351, |
| "grad_norm": 2.7473063468933105, |
| "learning_rate": 1.5013671389510317e-05, |
| "loss": 4.3692, |
| "step": 3315 |
| }, |
| { |
| "epoch": 0.7340260888790626, |
| "grad_norm": 2.753004312515259, |
| "learning_rate": 1.4951528709917972e-05, |
| "loss": 4.3074, |
| "step": 3320 |
| }, |
| { |
| "epoch": 0.73513154985629, |
| "grad_norm": 2.5943238735198975, |
| "learning_rate": 1.4889386030325628e-05, |
| "loss": 4.2984, |
| "step": 3325 |
| }, |
| { |
| "epoch": 0.7362370108335176, |
| "grad_norm": 3.0592753887176514, |
| "learning_rate": 1.4827243350733282e-05, |
| "loss": 4.3758, |
| "step": 3330 |
| }, |
| { |
| "epoch": 0.7373424718107451, |
| "grad_norm": 2.9579524993896484, |
| "learning_rate": 1.4765100671140942e-05, |
| "loss": 4.3336, |
| "step": 3335 |
| }, |
| { |
| "epoch": 0.7384479327879726, |
| "grad_norm": 2.8208494186401367, |
| "learning_rate": 1.4702957991548596e-05, |
| "loss": 4.3748, |
| "step": 3340 |
| }, |
| { |
| "epoch": 0.7395533937652001, |
| "grad_norm": 2.7068212032318115, |
| "learning_rate": 1.4640815311956253e-05, |
| "loss": 4.3802, |
| "step": 3345 |
| }, |
| { |
| "epoch": 0.7406588547424275, |
| "grad_norm": 2.6911303997039795, |
| "learning_rate": 1.4578672632363907e-05, |
| "loss": 4.2637, |
| "step": 3350 |
| }, |
| { |
| "epoch": 0.7417643157196551, |
| "grad_norm": 2.925656318664551, |
| "learning_rate": 1.4516529952771565e-05, |
| "loss": 4.1862, |
| "step": 3355 |
| }, |
| { |
| "epoch": 0.7428697766968826, |
| "grad_norm": 2.8226230144500732, |
| "learning_rate": 1.4454387273179221e-05, |
| "loss": 4.2084, |
| "step": 3360 |
| }, |
| { |
| "epoch": 0.7439752376741101, |
| "grad_norm": 2.73540997505188, |
| "learning_rate": 1.4392244593586876e-05, |
| "loss": 4.3171, |
| "step": 3365 |
| }, |
| { |
| "epoch": 0.7450806986513376, |
| "grad_norm": 2.88110613822937, |
| "learning_rate": 1.4330101913994532e-05, |
| "loss": 4.3005, |
| "step": 3370 |
| }, |
| { |
| "epoch": 0.7461861596285652, |
| "grad_norm": 2.618785858154297, |
| "learning_rate": 1.4267959234402186e-05, |
| "loss": 4.2863, |
| "step": 3375 |
| }, |
| { |
| "epoch": 0.7472916206057926, |
| "grad_norm": 2.434032440185547, |
| "learning_rate": 1.4205816554809844e-05, |
| "loss": 4.3868, |
| "step": 3380 |
| }, |
| { |
| "epoch": 0.7483970815830201, |
| "grad_norm": 2.4145843982696533, |
| "learning_rate": 1.41436738752175e-05, |
| "loss": 4.1055, |
| "step": 3385 |
| }, |
| { |
| "epoch": 0.7495025425602476, |
| "grad_norm": 2.813927412033081, |
| "learning_rate": 1.4081531195625155e-05, |
| "loss": 4.4497, |
| "step": 3390 |
| }, |
| { |
| "epoch": 0.7506080035374751, |
| "grad_norm": 2.5696094036102295, |
| "learning_rate": 1.4019388516032811e-05, |
| "loss": 4.2388, |
| "step": 3395 |
| }, |
| { |
| "epoch": 0.7517134645147027, |
| "grad_norm": 3.0586514472961426, |
| "learning_rate": 1.3957245836440469e-05, |
| "loss": 4.3375, |
| "step": 3400 |
| }, |
| { |
| "epoch": 0.7528189254919301, |
| "grad_norm": 2.7942728996276855, |
| "learning_rate": 1.3895103156848125e-05, |
| "loss": 4.2727, |
| "step": 3405 |
| }, |
| { |
| "epoch": 0.7539243864691576, |
| "grad_norm": 2.541633129119873, |
| "learning_rate": 1.383296047725578e-05, |
| "loss": 4.3377, |
| "step": 3410 |
| }, |
| { |
| "epoch": 0.7550298474463851, |
| "grad_norm": 2.821420192718506, |
| "learning_rate": 1.3770817797663436e-05, |
| "loss": 4.4895, |
| "step": 3415 |
| }, |
| { |
| "epoch": 0.7561353084236127, |
| "grad_norm": 2.650139570236206, |
| "learning_rate": 1.3708675118071093e-05, |
| "loss": 4.3168, |
| "step": 3420 |
| }, |
| { |
| "epoch": 0.7572407694008402, |
| "grad_norm": 2.784208059310913, |
| "learning_rate": 1.3646532438478748e-05, |
| "loss": 4.247, |
| "step": 3425 |
| }, |
| { |
| "epoch": 0.7583462303780677, |
| "grad_norm": 2.6416375637054443, |
| "learning_rate": 1.3584389758886404e-05, |
| "loss": 4.3903, |
| "step": 3430 |
| }, |
| { |
| "epoch": 0.7594516913552951, |
| "grad_norm": 2.7830934524536133, |
| "learning_rate": 1.3522247079294059e-05, |
| "loss": 4.4317, |
| "step": 3435 |
| }, |
| { |
| "epoch": 0.7605571523325226, |
| "grad_norm": 2.5094573497772217, |
| "learning_rate": 1.3460104399701715e-05, |
| "loss": 4.2657, |
| "step": 3440 |
| }, |
| { |
| "epoch": 0.7616626133097502, |
| "grad_norm": 2.6464684009552, |
| "learning_rate": 1.3397961720109373e-05, |
| "loss": 4.344, |
| "step": 3445 |
| }, |
| { |
| "epoch": 0.7627680742869777, |
| "grad_norm": 2.725152015686035, |
| "learning_rate": 1.3335819040517029e-05, |
| "loss": 4.3255, |
| "step": 3450 |
| }, |
| { |
| "epoch": 0.7638735352642052, |
| "grad_norm": 2.7001333236694336, |
| "learning_rate": 1.3273676360924683e-05, |
| "loss": 4.2375, |
| "step": 3455 |
| }, |
| { |
| "epoch": 0.7649789962414327, |
| "grad_norm": 2.7043142318725586, |
| "learning_rate": 1.321153368133234e-05, |
| "loss": 4.3848, |
| "step": 3460 |
| }, |
| { |
| "epoch": 0.7660844572186601, |
| "grad_norm": 2.5512447357177734, |
| "learning_rate": 1.3149391001739997e-05, |
| "loss": 4.3744, |
| "step": 3465 |
| }, |
| { |
| "epoch": 0.7671899181958877, |
| "grad_norm": 2.840555191040039, |
| "learning_rate": 1.3087248322147652e-05, |
| "loss": 4.3698, |
| "step": 3470 |
| }, |
| { |
| "epoch": 0.7682953791731152, |
| "grad_norm": 2.7197751998901367, |
| "learning_rate": 1.3025105642555308e-05, |
| "loss": 4.2368, |
| "step": 3475 |
| }, |
| { |
| "epoch": 0.7694008401503427, |
| "grad_norm": 2.49568247795105, |
| "learning_rate": 1.2962962962962962e-05, |
| "loss": 4.3001, |
| "step": 3480 |
| }, |
| { |
| "epoch": 0.7705063011275702, |
| "grad_norm": 2.975504159927368, |
| "learning_rate": 1.2900820283370619e-05, |
| "loss": 4.45, |
| "step": 3485 |
| }, |
| { |
| "epoch": 0.7716117621047976, |
| "grad_norm": 2.614933729171753, |
| "learning_rate": 1.2838677603778276e-05, |
| "loss": 4.3452, |
| "step": 3490 |
| }, |
| { |
| "epoch": 0.7727172230820252, |
| "grad_norm": 2.6430065631866455, |
| "learning_rate": 1.2776534924185931e-05, |
| "loss": 4.2741, |
| "step": 3495 |
| }, |
| { |
| "epoch": 0.7738226840592527, |
| "grad_norm": 2.71543550491333, |
| "learning_rate": 1.2714392244593587e-05, |
| "loss": 4.4366, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.7749281450364802, |
| "grad_norm": 2.868475914001465, |
| "learning_rate": 1.2652249565001242e-05, |
| "loss": 4.4391, |
| "step": 3505 |
| }, |
| { |
| "epoch": 0.7760336060137077, |
| "grad_norm": 2.8595988750457764, |
| "learning_rate": 1.2590106885408901e-05, |
| "loss": 4.2634, |
| "step": 3510 |
| }, |
| { |
| "epoch": 0.7771390669909353, |
| "grad_norm": 2.577758312225342, |
| "learning_rate": 1.2527964205816556e-05, |
| "loss": 4.4947, |
| "step": 3515 |
| }, |
| { |
| "epoch": 0.7782445279681627, |
| "grad_norm": 2.552488088607788, |
| "learning_rate": 1.2465821526224212e-05, |
| "loss": 4.4396, |
| "step": 3520 |
| }, |
| { |
| "epoch": 0.7793499889453902, |
| "grad_norm": 2.7421538829803467, |
| "learning_rate": 1.2403678846631868e-05, |
| "loss": 4.344, |
| "step": 3525 |
| }, |
| { |
| "epoch": 0.7804554499226177, |
| "grad_norm": 2.6724436283111572, |
| "learning_rate": 1.2341536167039522e-05, |
| "loss": 4.507, |
| "step": 3530 |
| }, |
| { |
| "epoch": 0.7815609108998453, |
| "grad_norm": 2.5183072090148926, |
| "learning_rate": 1.227939348744718e-05, |
| "loss": 4.3875, |
| "step": 3535 |
| }, |
| { |
| "epoch": 0.7826663718770728, |
| "grad_norm": 2.7601890563964844, |
| "learning_rate": 1.2217250807854835e-05, |
| "loss": 4.2108, |
| "step": 3540 |
| }, |
| { |
| "epoch": 0.7837718328543002, |
| "grad_norm": 2.8598101139068604, |
| "learning_rate": 1.2155108128262491e-05, |
| "loss": 4.4034, |
| "step": 3545 |
| }, |
| { |
| "epoch": 0.7848772938315277, |
| "grad_norm": 2.6984620094299316, |
| "learning_rate": 1.2092965448670147e-05, |
| "loss": 4.3129, |
| "step": 3550 |
| }, |
| { |
| "epoch": 0.7859827548087552, |
| "grad_norm": 2.6067955493927, |
| "learning_rate": 1.2030822769077803e-05, |
| "loss": 4.1753, |
| "step": 3555 |
| }, |
| { |
| "epoch": 0.7870882157859828, |
| "grad_norm": 2.763763904571533, |
| "learning_rate": 1.196868008948546e-05, |
| "loss": 4.3784, |
| "step": 3560 |
| }, |
| { |
| "epoch": 0.7881936767632103, |
| "grad_norm": 2.5143606662750244, |
| "learning_rate": 1.1906537409893114e-05, |
| "loss": 4.3958, |
| "step": 3565 |
| }, |
| { |
| "epoch": 0.7892991377404378, |
| "grad_norm": 2.7460179328918457, |
| "learning_rate": 1.1844394730300772e-05, |
| "loss": 4.4161, |
| "step": 3570 |
| }, |
| { |
| "epoch": 0.7904045987176652, |
| "grad_norm": 2.9888150691986084, |
| "learning_rate": 1.1782252050708426e-05, |
| "loss": 4.3169, |
| "step": 3575 |
| }, |
| { |
| "epoch": 0.7915100596948927, |
| "grad_norm": 2.7542128562927246, |
| "learning_rate": 1.1720109371116084e-05, |
| "loss": 4.2701, |
| "step": 3580 |
| }, |
| { |
| "epoch": 0.7926155206721203, |
| "grad_norm": 2.622459650039673, |
| "learning_rate": 1.1657966691523739e-05, |
| "loss": 4.2324, |
| "step": 3585 |
| }, |
| { |
| "epoch": 0.7937209816493478, |
| "grad_norm": 2.7815279960632324, |
| "learning_rate": 1.1595824011931397e-05, |
| "loss": 4.4407, |
| "step": 3590 |
| }, |
| { |
| "epoch": 0.7948264426265753, |
| "grad_norm": 2.414452075958252, |
| "learning_rate": 1.1533681332339051e-05, |
| "loss": 4.2533, |
| "step": 3595 |
| }, |
| { |
| "epoch": 0.7959319036038028, |
| "grad_norm": 2.864292860031128, |
| "learning_rate": 1.1471538652746707e-05, |
| "loss": 4.3427, |
| "step": 3600 |
| }, |
| { |
| "epoch": 0.7970373645810302, |
| "grad_norm": 2.6127429008483887, |
| "learning_rate": 1.1409395973154363e-05, |
| "loss": 4.3717, |
| "step": 3605 |
| }, |
| { |
| "epoch": 0.7981428255582578, |
| "grad_norm": 2.8165504932403564, |
| "learning_rate": 1.1347253293562018e-05, |
| "loss": 4.4479, |
| "step": 3610 |
| }, |
| { |
| "epoch": 0.7992482865354853, |
| "grad_norm": 2.7605228424072266, |
| "learning_rate": 1.1285110613969676e-05, |
| "loss": 4.3603, |
| "step": 3615 |
| }, |
| { |
| "epoch": 0.8003537475127128, |
| "grad_norm": 2.749600648880005, |
| "learning_rate": 1.122296793437733e-05, |
| "loss": 4.5357, |
| "step": 3620 |
| }, |
| { |
| "epoch": 0.8014592084899403, |
| "grad_norm": 2.5620622634887695, |
| "learning_rate": 1.1160825254784988e-05, |
| "loss": 4.2939, |
| "step": 3625 |
| }, |
| { |
| "epoch": 0.8025646694671678, |
| "grad_norm": 2.840747356414795, |
| "learning_rate": 1.1098682575192643e-05, |
| "loss": 4.4695, |
| "step": 3630 |
| }, |
| { |
| "epoch": 0.8036701304443953, |
| "grad_norm": 2.9626359939575195, |
| "learning_rate": 1.1036539895600299e-05, |
| "loss": 4.3105, |
| "step": 3635 |
| }, |
| { |
| "epoch": 0.8047755914216228, |
| "grad_norm": 2.748305320739746, |
| "learning_rate": 1.0974397216007955e-05, |
| "loss": 4.3532, |
| "step": 3640 |
| }, |
| { |
| "epoch": 0.8058810523988503, |
| "grad_norm": 2.6843719482421875, |
| "learning_rate": 1.091225453641561e-05, |
| "loss": 4.2337, |
| "step": 3645 |
| }, |
| { |
| "epoch": 0.8069865133760779, |
| "grad_norm": 2.6707520484924316, |
| "learning_rate": 1.0850111856823267e-05, |
| "loss": 4.282, |
| "step": 3650 |
| }, |
| { |
| "epoch": 0.8080919743533054, |
| "grad_norm": 2.5987465381622314, |
| "learning_rate": 1.0787969177230922e-05, |
| "loss": 4.3666, |
| "step": 3655 |
| }, |
| { |
| "epoch": 0.8091974353305328, |
| "grad_norm": 2.6529898643493652, |
| "learning_rate": 1.072582649763858e-05, |
| "loss": 4.4617, |
| "step": 3660 |
| }, |
| { |
| "epoch": 0.8103028963077603, |
| "grad_norm": 2.5571646690368652, |
| "learning_rate": 1.0663683818046234e-05, |
| "loss": 4.2555, |
| "step": 3665 |
| }, |
| { |
| "epoch": 0.8114083572849878, |
| "grad_norm": 2.8901898860931396, |
| "learning_rate": 1.060154113845389e-05, |
| "loss": 4.282, |
| "step": 3670 |
| }, |
| { |
| "epoch": 0.8125138182622154, |
| "grad_norm": 2.535372018814087, |
| "learning_rate": 1.0539398458861546e-05, |
| "loss": 4.2765, |
| "step": 3675 |
| }, |
| { |
| "epoch": 0.8136192792394429, |
| "grad_norm": 2.7033450603485107, |
| "learning_rate": 1.0477255779269203e-05, |
| "loss": 4.4398, |
| "step": 3680 |
| }, |
| { |
| "epoch": 0.8147247402166704, |
| "grad_norm": 2.949090003967285, |
| "learning_rate": 1.0415113099676859e-05, |
| "loss": 4.3627, |
| "step": 3685 |
| }, |
| { |
| "epoch": 0.8158302011938978, |
| "grad_norm": 3.2762537002563477, |
| "learning_rate": 1.0352970420084515e-05, |
| "loss": 4.4777, |
| "step": 3690 |
| }, |
| { |
| "epoch": 0.8169356621711253, |
| "grad_norm": 2.536367893218994, |
| "learning_rate": 1.0290827740492171e-05, |
| "loss": 4.312, |
| "step": 3695 |
| }, |
| { |
| "epoch": 0.8180411231483529, |
| "grad_norm": 2.8747854232788086, |
| "learning_rate": 1.0228685060899826e-05, |
| "loss": 4.466, |
| "step": 3700 |
| }, |
| { |
| "epoch": 0.8191465841255804, |
| "grad_norm": 2.527646780014038, |
| "learning_rate": 1.0166542381307482e-05, |
| "loss": 4.2035, |
| "step": 3705 |
| }, |
| { |
| "epoch": 0.8202520451028079, |
| "grad_norm": 2.8456356525421143, |
| "learning_rate": 1.0104399701715138e-05, |
| "loss": 4.4013, |
| "step": 3710 |
| }, |
| { |
| "epoch": 0.8213575060800353, |
| "grad_norm": 2.6337332725524902, |
| "learning_rate": 1.0042257022122794e-05, |
| "loss": 4.4722, |
| "step": 3715 |
| }, |
| { |
| "epoch": 0.8224629670572629, |
| "grad_norm": 2.5773563385009766, |
| "learning_rate": 9.98011434253045e-06, |
| "loss": 4.3434, |
| "step": 3720 |
| }, |
| { |
| "epoch": 0.8235684280344904, |
| "grad_norm": 2.7738966941833496, |
| "learning_rate": 9.917971662938106e-06, |
| "loss": 4.3367, |
| "step": 3725 |
| }, |
| { |
| "epoch": 0.8246738890117179, |
| "grad_norm": 2.672043561935425, |
| "learning_rate": 9.855828983345763e-06, |
| "loss": 4.1075, |
| "step": 3730 |
| }, |
| { |
| "epoch": 0.8257793499889454, |
| "grad_norm": 2.633709669113159, |
| "learning_rate": 9.793686303753419e-06, |
| "loss": 4.3165, |
| "step": 3735 |
| }, |
| { |
| "epoch": 0.826884810966173, |
| "grad_norm": 2.5204927921295166, |
| "learning_rate": 9.731543624161075e-06, |
| "loss": 4.265, |
| "step": 3740 |
| }, |
| { |
| "epoch": 0.8279902719434004, |
| "grad_norm": 2.7711668014526367, |
| "learning_rate": 9.669400944568731e-06, |
| "loss": 4.3085, |
| "step": 3745 |
| }, |
| { |
| "epoch": 0.8290957329206279, |
| "grad_norm": 2.5938053131103516, |
| "learning_rate": 9.607258264976386e-06, |
| "loss": 4.4216, |
| "step": 3750 |
| }, |
| { |
| "epoch": 0.8302011938978554, |
| "grad_norm": 2.4221818447113037, |
| "learning_rate": 9.545115585384042e-06, |
| "loss": 4.2004, |
| "step": 3755 |
| }, |
| { |
| "epoch": 0.8313066548750829, |
| "grad_norm": 2.75688099861145, |
| "learning_rate": 9.482972905791698e-06, |
| "loss": 4.4424, |
| "step": 3760 |
| }, |
| { |
| "epoch": 0.8324121158523105, |
| "grad_norm": 2.8027572631835938, |
| "learning_rate": 9.420830226199354e-06, |
| "loss": 4.3706, |
| "step": 3765 |
| }, |
| { |
| "epoch": 0.8335175768295379, |
| "grad_norm": 2.787280797958374, |
| "learning_rate": 9.35868754660701e-06, |
| "loss": 4.298, |
| "step": 3770 |
| }, |
| { |
| "epoch": 0.8346230378067654, |
| "grad_norm": 2.797969341278076, |
| "learning_rate": 9.296544867014666e-06, |
| "loss": 4.4039, |
| "step": 3775 |
| }, |
| { |
| "epoch": 0.8357284987839929, |
| "grad_norm": 2.5721869468688965, |
| "learning_rate": 9.234402187422323e-06, |
| "loss": 4.3801, |
| "step": 3780 |
| }, |
| { |
| "epoch": 0.8368339597612204, |
| "grad_norm": 2.480556011199951, |
| "learning_rate": 9.172259507829977e-06, |
| "loss": 4.5008, |
| "step": 3785 |
| }, |
| { |
| "epoch": 0.837939420738448, |
| "grad_norm": 3.0445311069488525, |
| "learning_rate": 9.110116828237635e-06, |
| "loss": 4.376, |
| "step": 3790 |
| }, |
| { |
| "epoch": 0.8390448817156755, |
| "grad_norm": 2.906247615814209, |
| "learning_rate": 9.04797414864529e-06, |
| "loss": 4.1985, |
| "step": 3795 |
| }, |
| { |
| "epoch": 0.8401503426929029, |
| "grad_norm": 2.624952793121338, |
| "learning_rate": 8.985831469052947e-06, |
| "loss": 4.4116, |
| "step": 3800 |
| }, |
| { |
| "epoch": 0.8412558036701304, |
| "grad_norm": 2.826939821243286, |
| "learning_rate": 8.923688789460602e-06, |
| "loss": 4.3384, |
| "step": 3805 |
| }, |
| { |
| "epoch": 0.8423612646473579, |
| "grad_norm": 2.7362842559814453, |
| "learning_rate": 8.861546109868258e-06, |
| "loss": 4.3327, |
| "step": 3810 |
| }, |
| { |
| "epoch": 0.8434667256245855, |
| "grad_norm": 2.5066606998443604, |
| "learning_rate": 8.799403430275914e-06, |
| "loss": 4.3919, |
| "step": 3815 |
| }, |
| { |
| "epoch": 0.844572186601813, |
| "grad_norm": 2.625035524368286, |
| "learning_rate": 8.737260750683569e-06, |
| "loss": 4.3227, |
| "step": 3820 |
| }, |
| { |
| "epoch": 0.8456776475790405, |
| "grad_norm": 2.6161510944366455, |
| "learning_rate": 8.675118071091226e-06, |
| "loss": 4.273, |
| "step": 3825 |
| }, |
| { |
| "epoch": 0.8467831085562679, |
| "grad_norm": 2.6360316276550293, |
| "learning_rate": 8.612975391498881e-06, |
| "loss": 4.3517, |
| "step": 3830 |
| }, |
| { |
| "epoch": 0.8478885695334955, |
| "grad_norm": 2.945129632949829, |
| "learning_rate": 8.550832711906539e-06, |
| "loss": 4.4634, |
| "step": 3835 |
| }, |
| { |
| "epoch": 0.848994030510723, |
| "grad_norm": 2.797037124633789, |
| "learning_rate": 8.488690032314193e-06, |
| "loss": 4.3474, |
| "step": 3840 |
| }, |
| { |
| "epoch": 0.8500994914879505, |
| "grad_norm": 2.6918272972106934, |
| "learning_rate": 8.42654735272185e-06, |
| "loss": 4.1983, |
| "step": 3845 |
| }, |
| { |
| "epoch": 0.851204952465178, |
| "grad_norm": 2.786607027053833, |
| "learning_rate": 8.364404673129506e-06, |
| "loss": 4.2545, |
| "step": 3850 |
| }, |
| { |
| "epoch": 0.8523104134424054, |
| "grad_norm": 2.799255132675171, |
| "learning_rate": 8.302261993537162e-06, |
| "loss": 4.4633, |
| "step": 3855 |
| }, |
| { |
| "epoch": 0.853415874419633, |
| "grad_norm": 2.393765926361084, |
| "learning_rate": 8.240119313944818e-06, |
| "loss": 4.3144, |
| "step": 3860 |
| }, |
| { |
| "epoch": 0.8545213353968605, |
| "grad_norm": 3.014911413192749, |
| "learning_rate": 8.177976634352472e-06, |
| "loss": 4.4218, |
| "step": 3865 |
| }, |
| { |
| "epoch": 0.855626796374088, |
| "grad_norm": 2.7910256385803223, |
| "learning_rate": 8.11583395476013e-06, |
| "loss": 4.3782, |
| "step": 3870 |
| }, |
| { |
| "epoch": 0.8567322573513155, |
| "grad_norm": 2.5579280853271484, |
| "learning_rate": 8.053691275167785e-06, |
| "loss": 4.3776, |
| "step": 3875 |
| }, |
| { |
| "epoch": 0.857837718328543, |
| "grad_norm": 2.6511480808258057, |
| "learning_rate": 7.991548595575441e-06, |
| "loss": 4.3284, |
| "step": 3880 |
| }, |
| { |
| "epoch": 0.8589431793057705, |
| "grad_norm": 2.7104756832122803, |
| "learning_rate": 7.929405915983097e-06, |
| "loss": 4.3875, |
| "step": 3885 |
| }, |
| { |
| "epoch": 0.860048640282998, |
| "grad_norm": 2.8262667655944824, |
| "learning_rate": 7.867263236390753e-06, |
| "loss": 4.401, |
| "step": 3890 |
| }, |
| { |
| "epoch": 0.8611541012602255, |
| "grad_norm": 2.8072750568389893, |
| "learning_rate": 7.80512055679841e-06, |
| "loss": 4.3245, |
| "step": 3895 |
| }, |
| { |
| "epoch": 0.862259562237453, |
| "grad_norm": 3.0384953022003174, |
| "learning_rate": 7.742977877206066e-06, |
| "loss": 4.2691, |
| "step": 3900 |
| }, |
| { |
| "epoch": 0.8633650232146806, |
| "grad_norm": 2.7213258743286133, |
| "learning_rate": 7.680835197613722e-06, |
| "loss": 4.3848, |
| "step": 3905 |
| }, |
| { |
| "epoch": 0.864470484191908, |
| "grad_norm": 2.9310898780822754, |
| "learning_rate": 7.618692518021378e-06, |
| "loss": 4.3875, |
| "step": 3910 |
| }, |
| { |
| "epoch": 0.8655759451691355, |
| "grad_norm": 2.7270753383636475, |
| "learning_rate": 7.556549838429033e-06, |
| "loss": 4.4668, |
| "step": 3915 |
| }, |
| { |
| "epoch": 0.866681406146363, |
| "grad_norm": 2.7479376792907715, |
| "learning_rate": 7.494407158836689e-06, |
| "loss": 4.3906, |
| "step": 3920 |
| }, |
| { |
| "epoch": 0.8677868671235905, |
| "grad_norm": 2.773819923400879, |
| "learning_rate": 7.432264479244346e-06, |
| "loss": 4.2478, |
| "step": 3925 |
| }, |
| { |
| "epoch": 0.8688923281008181, |
| "grad_norm": 2.642632484436035, |
| "learning_rate": 7.370121799652001e-06, |
| "loss": 4.3643, |
| "step": 3930 |
| }, |
| { |
| "epoch": 0.8699977890780456, |
| "grad_norm": 2.830242872238159, |
| "learning_rate": 7.307979120059657e-06, |
| "loss": 4.4359, |
| "step": 3935 |
| }, |
| { |
| "epoch": 0.871103250055273, |
| "grad_norm": 2.8000121116638184, |
| "learning_rate": 7.2458364404673125e-06, |
| "loss": 4.5984, |
| "step": 3940 |
| }, |
| { |
| "epoch": 0.8722087110325005, |
| "grad_norm": 2.8083910942077637, |
| "learning_rate": 7.1836937608749695e-06, |
| "loss": 4.2437, |
| "step": 3945 |
| }, |
| { |
| "epoch": 0.873314172009728, |
| "grad_norm": 2.6732099056243896, |
| "learning_rate": 7.121551081282625e-06, |
| "loss": 4.4326, |
| "step": 3950 |
| }, |
| { |
| "epoch": 0.8744196329869556, |
| "grad_norm": 2.4670119285583496, |
| "learning_rate": 7.059408401690282e-06, |
| "loss": 4.2204, |
| "step": 3955 |
| }, |
| { |
| "epoch": 0.8755250939641831, |
| "grad_norm": 2.698272943496704, |
| "learning_rate": 6.997265722097937e-06, |
| "loss": 4.305, |
| "step": 3960 |
| }, |
| { |
| "epoch": 0.8766305549414106, |
| "grad_norm": 2.7143428325653076, |
| "learning_rate": 6.935123042505594e-06, |
| "loss": 4.348, |
| "step": 3965 |
| }, |
| { |
| "epoch": 0.877736015918638, |
| "grad_norm": 2.571596145629883, |
| "learning_rate": 6.8729803629132495e-06, |
| "loss": 4.3278, |
| "step": 3970 |
| }, |
| { |
| "epoch": 0.8788414768958656, |
| "grad_norm": 3.0739476680755615, |
| "learning_rate": 6.810837683320905e-06, |
| "loss": 4.3202, |
| "step": 3975 |
| }, |
| { |
| "epoch": 0.8799469378730931, |
| "grad_norm": 2.72713041305542, |
| "learning_rate": 6.748695003728561e-06, |
| "loss": 4.5188, |
| "step": 3980 |
| }, |
| { |
| "epoch": 0.8810523988503206, |
| "grad_norm": 2.7530996799468994, |
| "learning_rate": 6.686552324136216e-06, |
| "loss": 4.3479, |
| "step": 3985 |
| }, |
| { |
| "epoch": 0.8821578598275481, |
| "grad_norm": 2.7766714096069336, |
| "learning_rate": 6.624409644543873e-06, |
| "loss": 4.3766, |
| "step": 3990 |
| }, |
| { |
| "epoch": 0.8832633208047755, |
| "grad_norm": 3.0622363090515137, |
| "learning_rate": 6.562266964951529e-06, |
| "loss": 4.3819, |
| "step": 3995 |
| }, |
| { |
| "epoch": 0.8843687817820031, |
| "grad_norm": 2.711118221282959, |
| "learning_rate": 6.500124285359186e-06, |
| "loss": 4.2281, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.8854742427592306, |
| "grad_norm": 2.5327889919281006, |
| "learning_rate": 6.437981605766841e-06, |
| "loss": 4.3108, |
| "step": 4005 |
| }, |
| { |
| "epoch": 0.8865797037364581, |
| "grad_norm": 2.6793577671051025, |
| "learning_rate": 6.375838926174497e-06, |
| "loss": 4.419, |
| "step": 4010 |
| }, |
| { |
| "epoch": 0.8876851647136856, |
| "grad_norm": 2.7030229568481445, |
| "learning_rate": 6.3136962465821526e-06, |
| "loss": 4.1583, |
| "step": 4015 |
| }, |
| { |
| "epoch": 0.8887906256909132, |
| "grad_norm": 2.6065833568573, |
| "learning_rate": 6.2515535669898096e-06, |
| "loss": 4.5478, |
| "step": 4020 |
| }, |
| { |
| "epoch": 0.8898960866681406, |
| "grad_norm": 2.8415439128875732, |
| "learning_rate": 6.189410887397465e-06, |
| "loss": 4.4606, |
| "step": 4025 |
| }, |
| { |
| "epoch": 0.8910015476453681, |
| "grad_norm": 2.9203150272369385, |
| "learning_rate": 6.127268207805121e-06, |
| "loss": 4.207, |
| "step": 4030 |
| }, |
| { |
| "epoch": 0.8921070086225956, |
| "grad_norm": 2.5476462841033936, |
| "learning_rate": 6.065125528212777e-06, |
| "loss": 4.3745, |
| "step": 4035 |
| }, |
| { |
| "epoch": 0.8932124695998231, |
| "grad_norm": 3.014671564102173, |
| "learning_rate": 6.002982848620433e-06, |
| "loss": 4.2554, |
| "step": 4040 |
| }, |
| { |
| "epoch": 0.8943179305770507, |
| "grad_norm": 2.628617763519287, |
| "learning_rate": 5.940840169028089e-06, |
| "loss": 4.329, |
| "step": 4045 |
| }, |
| { |
| "epoch": 0.8954233915542781, |
| "grad_norm": 2.746119737625122, |
| "learning_rate": 5.878697489435745e-06, |
| "loss": 4.2055, |
| "step": 4050 |
| }, |
| { |
| "epoch": 0.8965288525315056, |
| "grad_norm": 2.9705591201782227, |
| "learning_rate": 5.8165548098434e-06, |
| "loss": 4.3678, |
| "step": 4055 |
| }, |
| { |
| "epoch": 0.8976343135087331, |
| "grad_norm": 2.6920156478881836, |
| "learning_rate": 5.754412130251056e-06, |
| "loss": 4.2901, |
| "step": 4060 |
| }, |
| { |
| "epoch": 0.8987397744859607, |
| "grad_norm": 2.442110538482666, |
| "learning_rate": 5.692269450658713e-06, |
| "loss": 4.3028, |
| "step": 4065 |
| }, |
| { |
| "epoch": 0.8998452354631882, |
| "grad_norm": 2.74092698097229, |
| "learning_rate": 5.630126771066369e-06, |
| "loss": 4.3002, |
| "step": 4070 |
| }, |
| { |
| "epoch": 0.9009506964404157, |
| "grad_norm": 2.442526340484619, |
| "learning_rate": 5.567984091474025e-06, |
| "loss": 4.284, |
| "step": 4075 |
| }, |
| { |
| "epoch": 0.9020561574176431, |
| "grad_norm": 2.78788161277771, |
| "learning_rate": 5.50584141188168e-06, |
| "loss": 4.3699, |
| "step": 4080 |
| }, |
| { |
| "epoch": 0.9031616183948706, |
| "grad_norm": 2.884793281555176, |
| "learning_rate": 5.4436987322893364e-06, |
| "loss": 4.3204, |
| "step": 4085 |
| }, |
| { |
| "epoch": 0.9042670793720982, |
| "grad_norm": 2.645921230316162, |
| "learning_rate": 5.381556052696993e-06, |
| "loss": 4.4775, |
| "step": 4090 |
| }, |
| { |
| "epoch": 0.9053725403493257, |
| "grad_norm": 2.7526016235351562, |
| "learning_rate": 5.319413373104649e-06, |
| "loss": 4.2971, |
| "step": 4095 |
| }, |
| { |
| "epoch": 0.9064780013265532, |
| "grad_norm": 2.6196508407592773, |
| "learning_rate": 5.257270693512305e-06, |
| "loss": 4.32, |
| "step": 4100 |
| }, |
| { |
| "epoch": 0.9075834623037807, |
| "grad_norm": 2.9636263847351074, |
| "learning_rate": 5.195128013919961e-06, |
| "loss": 4.3498, |
| "step": 4105 |
| }, |
| { |
| "epoch": 0.9086889232810081, |
| "grad_norm": 2.7609803676605225, |
| "learning_rate": 5.1329853343276164e-06, |
| "loss": 4.3134, |
| "step": 4110 |
| }, |
| { |
| "epoch": 0.9097943842582357, |
| "grad_norm": 2.84635329246521, |
| "learning_rate": 5.070842654735273e-06, |
| "loss": 4.5655, |
| "step": 4115 |
| }, |
| { |
| "epoch": 0.9108998452354632, |
| "grad_norm": 2.9101991653442383, |
| "learning_rate": 5.008699975142928e-06, |
| "loss": 4.3149, |
| "step": 4120 |
| }, |
| { |
| "epoch": 0.9120053062126907, |
| "grad_norm": 2.50285005569458, |
| "learning_rate": 4.946557295550584e-06, |
| "loss": 4.5046, |
| "step": 4125 |
| }, |
| { |
| "epoch": 0.9131107671899182, |
| "grad_norm": 2.6111807823181152, |
| "learning_rate": 4.88441461595824e-06, |
| "loss": 4.4572, |
| "step": 4130 |
| }, |
| { |
| "epoch": 0.9142162281671457, |
| "grad_norm": 2.8482987880706787, |
| "learning_rate": 4.8222719363658965e-06, |
| "loss": 4.2982, |
| "step": 4135 |
| }, |
| { |
| "epoch": 0.9153216891443732, |
| "grad_norm": 2.635841131210327, |
| "learning_rate": 4.760129256773553e-06, |
| "loss": 4.3807, |
| "step": 4140 |
| }, |
| { |
| "epoch": 0.9164271501216007, |
| "grad_norm": 2.969567060470581, |
| "learning_rate": 4.697986577181209e-06, |
| "loss": 4.5337, |
| "step": 4145 |
| }, |
| { |
| "epoch": 0.9175326110988282, |
| "grad_norm": 2.5630719661712646, |
| "learning_rate": 4.635843897588864e-06, |
| "loss": 4.2317, |
| "step": 4150 |
| }, |
| { |
| "epoch": 0.9186380720760557, |
| "grad_norm": 3.0482473373413086, |
| "learning_rate": 4.57370121799652e-06, |
| "loss": 4.3618, |
| "step": 4155 |
| }, |
| { |
| "epoch": 0.9197435330532833, |
| "grad_norm": 2.6049513816833496, |
| "learning_rate": 4.511558538404176e-06, |
| "loss": 4.3415, |
| "step": 4160 |
| }, |
| { |
| "epoch": 0.9208489940305107, |
| "grad_norm": 2.672549247741699, |
| "learning_rate": 4.449415858811832e-06, |
| "loss": 4.3546, |
| "step": 4165 |
| }, |
| { |
| "epoch": 0.9219544550077382, |
| "grad_norm": 2.3971190452575684, |
| "learning_rate": 4.387273179219488e-06, |
| "loss": 4.4124, |
| "step": 4170 |
| }, |
| { |
| "epoch": 0.9230599159849657, |
| "grad_norm": 2.9324026107788086, |
| "learning_rate": 4.325130499627144e-06, |
| "loss": 4.4178, |
| "step": 4175 |
| }, |
| { |
| "epoch": 0.9241653769621933, |
| "grad_norm": 2.6847023963928223, |
| "learning_rate": 4.2629878200348e-06, |
| "loss": 4.3332, |
| "step": 4180 |
| }, |
| { |
| "epoch": 0.9252708379394208, |
| "grad_norm": 2.586578369140625, |
| "learning_rate": 4.2008451404424565e-06, |
| "loss": 4.3377, |
| "step": 4185 |
| }, |
| { |
| "epoch": 0.9263762989166482, |
| "grad_norm": 2.6753554344177246, |
| "learning_rate": 4.138702460850112e-06, |
| "loss": 4.4402, |
| "step": 4190 |
| }, |
| { |
| "epoch": 0.9274817598938757, |
| "grad_norm": 2.7684082984924316, |
| "learning_rate": 4.076559781257768e-06, |
| "loss": 4.3591, |
| "step": 4195 |
| }, |
| { |
| "epoch": 0.9285872208711032, |
| "grad_norm": 2.5447866916656494, |
| "learning_rate": 4.014417101665424e-06, |
| "loss": 4.2935, |
| "step": 4200 |
| }, |
| { |
| "epoch": 0.9296926818483308, |
| "grad_norm": 2.744508981704712, |
| "learning_rate": 3.95227442207308e-06, |
| "loss": 4.4423, |
| "step": 4205 |
| }, |
| { |
| "epoch": 0.9307981428255583, |
| "grad_norm": 2.8013176918029785, |
| "learning_rate": 3.8901317424807365e-06, |
| "loss": 4.4143, |
| "step": 4210 |
| }, |
| { |
| "epoch": 0.9319036038027858, |
| "grad_norm": 2.7098312377929688, |
| "learning_rate": 3.827989062888392e-06, |
| "loss": 4.4103, |
| "step": 4215 |
| }, |
| { |
| "epoch": 0.9330090647800132, |
| "grad_norm": 2.6168668270111084, |
| "learning_rate": 3.7658463832960476e-06, |
| "loss": 4.2801, |
| "step": 4220 |
| }, |
| { |
| "epoch": 0.9341145257572407, |
| "grad_norm": 2.5833184719085693, |
| "learning_rate": 3.7037037037037037e-06, |
| "loss": 4.4013, |
| "step": 4225 |
| }, |
| { |
| "epoch": 0.9352199867344683, |
| "grad_norm": 2.377253293991089, |
| "learning_rate": 3.64156102411136e-06, |
| "loss": 4.185, |
| "step": 4230 |
| }, |
| { |
| "epoch": 0.9363254477116958, |
| "grad_norm": 2.6081435680389404, |
| "learning_rate": 3.5794183445190157e-06, |
| "loss": 4.253, |
| "step": 4235 |
| }, |
| { |
| "epoch": 0.9374309086889233, |
| "grad_norm": 2.711153030395508, |
| "learning_rate": 3.517275664926672e-06, |
| "loss": 4.3479, |
| "step": 4240 |
| }, |
| { |
| "epoch": 0.9385363696661508, |
| "grad_norm": 2.4365053176879883, |
| "learning_rate": 3.455132985334328e-06, |
| "loss": 4.1939, |
| "step": 4245 |
| }, |
| { |
| "epoch": 0.9396418306433783, |
| "grad_norm": 2.638932704925537, |
| "learning_rate": 3.3929903057419838e-06, |
| "loss": 4.3875, |
| "step": 4250 |
| }, |
| { |
| "epoch": 0.9407472916206058, |
| "grad_norm": 2.5555827617645264, |
| "learning_rate": 3.33084762614964e-06, |
| "loss": 4.2698, |
| "step": 4255 |
| }, |
| { |
| "epoch": 0.9418527525978333, |
| "grad_norm": 2.713468074798584, |
| "learning_rate": 3.268704946557296e-06, |
| "loss": 4.349, |
| "step": 4260 |
| }, |
| { |
| "epoch": 0.9429582135750608, |
| "grad_norm": 2.841186761856079, |
| "learning_rate": 3.206562266964952e-06, |
| "loss": 4.2716, |
| "step": 4265 |
| }, |
| { |
| "epoch": 0.9440636745522883, |
| "grad_norm": 2.8116109371185303, |
| "learning_rate": 3.144419587372607e-06, |
| "loss": 4.4094, |
| "step": 4270 |
| }, |
| { |
| "epoch": 0.9451691355295158, |
| "grad_norm": 2.7146096229553223, |
| "learning_rate": 3.0822769077802638e-06, |
| "loss": 4.2455, |
| "step": 4275 |
| }, |
| { |
| "epoch": 0.9462745965067433, |
| "grad_norm": 2.577312469482422, |
| "learning_rate": 3.02013422818792e-06, |
| "loss": 4.3422, |
| "step": 4280 |
| }, |
| { |
| "epoch": 0.9473800574839708, |
| "grad_norm": 2.4600229263305664, |
| "learning_rate": 2.9579915485955753e-06, |
| "loss": 4.511, |
| "step": 4285 |
| }, |
| { |
| "epoch": 0.9484855184611983, |
| "grad_norm": 2.7700321674346924, |
| "learning_rate": 2.8958488690032314e-06, |
| "loss": 4.2781, |
| "step": 4290 |
| }, |
| { |
| "epoch": 0.9495909794384259, |
| "grad_norm": 2.7642529010772705, |
| "learning_rate": 2.8337061894108876e-06, |
| "loss": 4.3321, |
| "step": 4295 |
| }, |
| { |
| "epoch": 0.9506964404156534, |
| "grad_norm": 2.4941701889038086, |
| "learning_rate": 2.7715635098185434e-06, |
| "loss": 4.2922, |
| "step": 4300 |
| }, |
| { |
| "epoch": 0.9518019013928808, |
| "grad_norm": 2.6204841136932373, |
| "learning_rate": 2.7094208302261995e-06, |
| "loss": 4.4099, |
| "step": 4305 |
| }, |
| { |
| "epoch": 0.9529073623701083, |
| "grad_norm": 2.7678253650665283, |
| "learning_rate": 2.6472781506338553e-06, |
| "loss": 4.5308, |
| "step": 4310 |
| }, |
| { |
| "epoch": 0.9540128233473358, |
| "grad_norm": 2.610168218612671, |
| "learning_rate": 2.5851354710415115e-06, |
| "loss": 4.2916, |
| "step": 4315 |
| }, |
| { |
| "epoch": 0.9551182843245634, |
| "grad_norm": 2.404608726501465, |
| "learning_rate": 2.522992791449167e-06, |
| "loss": 4.1172, |
| "step": 4320 |
| }, |
| { |
| "epoch": 0.9562237453017909, |
| "grad_norm": 2.581918478012085, |
| "learning_rate": 2.4608501118568234e-06, |
| "loss": 4.5247, |
| "step": 4325 |
| }, |
| { |
| "epoch": 0.9573292062790183, |
| "grad_norm": 2.4554283618927, |
| "learning_rate": 2.3987074322644795e-06, |
| "loss": 4.4112, |
| "step": 4330 |
| }, |
| { |
| "epoch": 0.9584346672562458, |
| "grad_norm": 3.0333340167999268, |
| "learning_rate": 2.3365647526721353e-06, |
| "loss": 4.4101, |
| "step": 4335 |
| }, |
| { |
| "epoch": 0.9595401282334733, |
| "grad_norm": 2.745823621749878, |
| "learning_rate": 2.274422073079791e-06, |
| "loss": 4.4591, |
| "step": 4340 |
| }, |
| { |
| "epoch": 0.9606455892107009, |
| "grad_norm": 2.8770716190338135, |
| "learning_rate": 2.2122793934874472e-06, |
| "loss": 4.4189, |
| "step": 4345 |
| }, |
| { |
| "epoch": 0.9617510501879284, |
| "grad_norm": 2.701787233352661, |
| "learning_rate": 2.1501367138951034e-06, |
| "loss": 4.4115, |
| "step": 4350 |
| }, |
| { |
| "epoch": 0.9628565111651559, |
| "grad_norm": 2.8112969398498535, |
| "learning_rate": 2.087994034302759e-06, |
| "loss": 4.3162, |
| "step": 4355 |
| }, |
| { |
| "epoch": 0.9639619721423833, |
| "grad_norm": 2.660151958465576, |
| "learning_rate": 2.0258513547104153e-06, |
| "loss": 4.4636, |
| "step": 4360 |
| }, |
| { |
| "epoch": 0.9650674331196109, |
| "grad_norm": 2.6464245319366455, |
| "learning_rate": 1.963708675118071e-06, |
| "loss": 4.5144, |
| "step": 4365 |
| }, |
| { |
| "epoch": 0.9661728940968384, |
| "grad_norm": 2.581138849258423, |
| "learning_rate": 1.901565995525727e-06, |
| "loss": 4.3598, |
| "step": 4370 |
| }, |
| { |
| "epoch": 0.9672783550740659, |
| "grad_norm": 2.4853599071502686, |
| "learning_rate": 1.8394233159333832e-06, |
| "loss": 4.2964, |
| "step": 4375 |
| }, |
| { |
| "epoch": 0.9683838160512934, |
| "grad_norm": 2.554091691970825, |
| "learning_rate": 1.7772806363410391e-06, |
| "loss": 4.4226, |
| "step": 4380 |
| }, |
| { |
| "epoch": 0.969489277028521, |
| "grad_norm": 2.9564058780670166, |
| "learning_rate": 1.7151379567486951e-06, |
| "loss": 4.3925, |
| "step": 4385 |
| }, |
| { |
| "epoch": 0.9705947380057484, |
| "grad_norm": 2.502652406692505, |
| "learning_rate": 1.6529952771563513e-06, |
| "loss": 4.3428, |
| "step": 4390 |
| }, |
| { |
| "epoch": 0.9717001989829759, |
| "grad_norm": 2.493762969970703, |
| "learning_rate": 1.5908525975640068e-06, |
| "loss": 4.249, |
| "step": 4395 |
| }, |
| { |
| "epoch": 0.9728056599602034, |
| "grad_norm": 2.4519858360290527, |
| "learning_rate": 1.528709917971663e-06, |
| "loss": 4.2229, |
| "step": 4400 |
| }, |
| { |
| "epoch": 0.9739111209374309, |
| "grad_norm": 2.7903311252593994, |
| "learning_rate": 1.466567238379319e-06, |
| "loss": 4.4687, |
| "step": 4405 |
| }, |
| { |
| "epoch": 0.9750165819146585, |
| "grad_norm": 2.556363821029663, |
| "learning_rate": 1.4044245587869751e-06, |
| "loss": 4.3987, |
| "step": 4410 |
| }, |
| { |
| "epoch": 0.9761220428918859, |
| "grad_norm": 2.542534351348877, |
| "learning_rate": 1.3422818791946309e-06, |
| "loss": 4.4648, |
| "step": 4415 |
| }, |
| { |
| "epoch": 0.9772275038691134, |
| "grad_norm": 2.5431811809539795, |
| "learning_rate": 1.280139199602287e-06, |
| "loss": 4.3701, |
| "step": 4420 |
| }, |
| { |
| "epoch": 0.9783329648463409, |
| "grad_norm": 2.6445794105529785, |
| "learning_rate": 1.2179965200099428e-06, |
| "loss": 4.263, |
| "step": 4425 |
| }, |
| { |
| "epoch": 0.9794384258235684, |
| "grad_norm": 2.8488686084747314, |
| "learning_rate": 1.1558538404175988e-06, |
| "loss": 4.2224, |
| "step": 4430 |
| }, |
| { |
| "epoch": 0.980543886800796, |
| "grad_norm": 2.919131278991699, |
| "learning_rate": 1.093711160825255e-06, |
| "loss": 4.3791, |
| "step": 4435 |
| }, |
| { |
| "epoch": 0.9816493477780235, |
| "grad_norm": 2.830904483795166, |
| "learning_rate": 1.0315684812329107e-06, |
| "loss": 4.3433, |
| "step": 4440 |
| }, |
| { |
| "epoch": 0.9827548087552509, |
| "grad_norm": 2.7437570095062256, |
| "learning_rate": 9.694258016405668e-07, |
| "loss": 4.2391, |
| "step": 4445 |
| }, |
| { |
| "epoch": 0.9838602697324784, |
| "grad_norm": 2.664886713027954, |
| "learning_rate": 9.072831220482228e-07, |
| "loss": 4.488, |
| "step": 4450 |
| }, |
| { |
| "epoch": 0.9849657307097059, |
| "grad_norm": 2.518346071243286, |
| "learning_rate": 8.451404424558787e-07, |
| "loss": 4.4019, |
| "step": 4455 |
| }, |
| { |
| "epoch": 0.9860711916869335, |
| "grad_norm": 2.9975318908691406, |
| "learning_rate": 7.829977628635347e-07, |
| "loss": 4.2323, |
| "step": 4460 |
| }, |
| { |
| "epoch": 0.987176652664161, |
| "grad_norm": 2.6765410900115967, |
| "learning_rate": 7.208550832711907e-07, |
| "loss": 4.3188, |
| "step": 4465 |
| }, |
| { |
| "epoch": 0.9882821136413884, |
| "grad_norm": 2.8536341190338135, |
| "learning_rate": 6.587124036788466e-07, |
| "loss": 4.4314, |
| "step": 4470 |
| }, |
| { |
| "epoch": 0.9893875746186159, |
| "grad_norm": 2.316105365753174, |
| "learning_rate": 5.965697240865026e-07, |
| "loss": 4.5006, |
| "step": 4475 |
| }, |
| { |
| "epoch": 0.9904930355958435, |
| "grad_norm": 2.705261468887329, |
| "learning_rate": 5.344270444941587e-07, |
| "loss": 4.4413, |
| "step": 4480 |
| }, |
| { |
| "epoch": 0.991598496573071, |
| "grad_norm": 2.7570252418518066, |
| "learning_rate": 4.722843649018146e-07, |
| "loss": 4.4869, |
| "step": 4485 |
| }, |
| { |
| "epoch": 0.9927039575502985, |
| "grad_norm": 2.687154531478882, |
| "learning_rate": 4.1014168530947054e-07, |
| "loss": 4.5021, |
| "step": 4490 |
| }, |
| { |
| "epoch": 0.993809418527526, |
| "grad_norm": 2.885932683944702, |
| "learning_rate": 3.4799900571712656e-07, |
| "loss": 4.3324, |
| "step": 4495 |
| }, |
| { |
| "epoch": 0.9949148795047534, |
| "grad_norm": 2.6431424617767334, |
| "learning_rate": 2.858563261247825e-07, |
| "loss": 4.4544, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.996020340481981, |
| "grad_norm": 2.5612587928771973, |
| "learning_rate": 2.2371364653243848e-07, |
| "loss": 4.4129, |
| "step": 4505 |
| }, |
| { |
| "epoch": 0.9971258014592085, |
| "grad_norm": 2.5301103591918945, |
| "learning_rate": 1.6157096694009447e-07, |
| "loss": 4.2888, |
| "step": 4510 |
| }, |
| { |
| "epoch": 0.998231262436436, |
| "grad_norm": 2.852886199951172, |
| "learning_rate": 9.942828734775043e-08, |
| "loss": 4.4167, |
| "step": 4515 |
| }, |
| { |
| "epoch": 0.9993367234136635, |
| "grad_norm": 3.017920970916748, |
| "learning_rate": 3.728560775540641e-08, |
| "loss": 4.2826, |
| "step": 4520 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 4523, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.7364382421434368e+16, |
| "train_batch_size": 64, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|