| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 500, |
| "global_step": 4523, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.001105460977227504, |
| "grad_norm": 2.856322765350342, |
| "learning_rate": 5.000000000000001e-07, |
| "loss": 4.5165, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.002210921954455008, |
| "grad_norm": 2.6451292037963867, |
| "learning_rate": 1.0000000000000002e-06, |
| "loss": 4.3082, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.0033163829316825116, |
| "grad_norm": 3.0033833980560303, |
| "learning_rate": 1.5e-06, |
| "loss": 4.3804, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.004421843908910016, |
| "grad_norm": 2.5815796852111816, |
| "learning_rate": 2.0000000000000003e-06, |
| "loss": 4.3296, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.0055273048861375195, |
| "grad_norm": 2.6665921211242676, |
| "learning_rate": 2.5e-06, |
| "loss": 4.274, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.006632765863365023, |
| "grad_norm": 2.6247975826263428, |
| "learning_rate": 3e-06, |
| "loss": 4.3979, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.007738226840592527, |
| "grad_norm": 2.822925329208374, |
| "learning_rate": 3.5000000000000004e-06, |
| "loss": 4.3966, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.008843687817820032, |
| "grad_norm": 2.9031052589416504, |
| "learning_rate": 4.000000000000001e-06, |
| "loss": 4.2606, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.009949148795047534, |
| "grad_norm": 2.43031907081604, |
| "learning_rate": 4.5e-06, |
| "loss": 4.3432, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.011054609772275039, |
| "grad_norm": 2.6758840084075928, |
| "learning_rate": 5e-06, |
| "loss": 4.3543, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.012160070749502542, |
| "grad_norm": 2.5539205074310303, |
| "learning_rate": 5.500000000000001e-06, |
| "loss": 4.2527, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.013265531726730046, |
| "grad_norm": 2.552877187728882, |
| "learning_rate": 6e-06, |
| "loss": 4.2603, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.014370992703957551, |
| "grad_norm": 2.7067909240722656, |
| "learning_rate": 6.5000000000000004e-06, |
| "loss": 4.3525, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.015476453681185054, |
| "grad_norm": 2.633598566055298, |
| "learning_rate": 7.000000000000001e-06, |
| "loss": 4.2775, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.016581914658412557, |
| "grad_norm": 2.582083225250244, |
| "learning_rate": 7.5e-06, |
| "loss": 4.4059, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.017687375635640063, |
| "grad_norm": 2.7137420177459717, |
| "learning_rate": 8.000000000000001e-06, |
| "loss": 4.2763, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.018792836612867566, |
| "grad_norm": 2.743177652359009, |
| "learning_rate": 8.500000000000002e-06, |
| "loss": 4.3027, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.01989829759009507, |
| "grad_norm": 2.9156761169433594, |
| "learning_rate": 9e-06, |
| "loss": 4.3686, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.021003758567322575, |
| "grad_norm": 2.936218738555908, |
| "learning_rate": 9.5e-06, |
| "loss": 4.3308, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.022109219544550078, |
| "grad_norm": 2.5734968185424805, |
| "learning_rate": 1e-05, |
| "loss": 4.144, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.02321468052177758, |
| "grad_norm": 3.0580193996429443, |
| "learning_rate": 1.05e-05, |
| "loss": 4.2334, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.024320141499005084, |
| "grad_norm": 2.8130428791046143, |
| "learning_rate": 1.1000000000000001e-05, |
| "loss": 4.0793, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.02542560247623259, |
| "grad_norm": 2.9316952228546143, |
| "learning_rate": 1.1500000000000002e-05, |
| "loss": 4.2116, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.026531063453460093, |
| "grad_norm": 2.8418164253234863, |
| "learning_rate": 1.2e-05, |
| "loss": 4.1086, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.027636524430687596, |
| "grad_norm": 2.6649138927459717, |
| "learning_rate": 1.25e-05, |
| "loss": 4.1225, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.028741985407915102, |
| "grad_norm": 2.6316049098968506, |
| "learning_rate": 1.3000000000000001e-05, |
| "loss": 4.3653, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.029847446385142605, |
| "grad_norm": 2.9526383876800537, |
| "learning_rate": 1.3500000000000001e-05, |
| "loss": 4.2687, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.030952907362370108, |
| "grad_norm": 2.761291980743408, |
| "learning_rate": 1.4000000000000001e-05, |
| "loss": 4.3326, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.03205836833959761, |
| "grad_norm": 2.6319758892059326, |
| "learning_rate": 1.45e-05, |
| "loss": 4.3805, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.033163829316825114, |
| "grad_norm": 2.6983299255371094, |
| "learning_rate": 1.5e-05, |
| "loss": 4.2701, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.03426929029405262, |
| "grad_norm": 2.749418020248413, |
| "learning_rate": 1.55e-05, |
| "loss": 4.3861, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.035374751271280126, |
| "grad_norm": 2.784226179122925, |
| "learning_rate": 1.6000000000000003e-05, |
| "loss": 4.1366, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.03648021224850763, |
| "grad_norm": 2.6632113456726074, |
| "learning_rate": 1.65e-05, |
| "loss": 4.2244, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.03758567322573513, |
| "grad_norm": 2.804885149002075, |
| "learning_rate": 1.7000000000000003e-05, |
| "loss": 4.1976, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.038691134202962635, |
| "grad_norm": 2.806664228439331, |
| "learning_rate": 1.75e-05, |
| "loss": 4.0235, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.03979659518019014, |
| "grad_norm": 2.6123688220977783, |
| "learning_rate": 1.8e-05, |
| "loss": 4.1966, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.04090205615741764, |
| "grad_norm": 2.80129075050354, |
| "learning_rate": 1.85e-05, |
| "loss": 4.1403, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.04200751713464515, |
| "grad_norm": 2.7253201007843018, |
| "learning_rate": 1.9e-05, |
| "loss": 4.1317, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.04311297811187265, |
| "grad_norm": 2.852238178253174, |
| "learning_rate": 1.9500000000000003e-05, |
| "loss": 4.243, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.044218439089100156, |
| "grad_norm": 2.968660831451416, |
| "learning_rate": 2e-05, |
| "loss": 4.1083, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.04532390006632766, |
| "grad_norm": 2.7089550495147705, |
| "learning_rate": 2.05e-05, |
| "loss": 4.3324, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.04642936104355516, |
| "grad_norm": 2.6991310119628906, |
| "learning_rate": 2.1e-05, |
| "loss": 4.3528, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.047534822020782665, |
| "grad_norm": 2.5547115802764893, |
| "learning_rate": 2.15e-05, |
| "loss": 4.0371, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.04864028299801017, |
| "grad_norm": 2.708559989929199, |
| "learning_rate": 2.2000000000000003e-05, |
| "loss": 4.1409, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.04974574397523768, |
| "grad_norm": 2.709721565246582, |
| "learning_rate": 2.25e-05, |
| "loss": 4.0498, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.05085120495246518, |
| "grad_norm": 2.7421419620513916, |
| "learning_rate": 2.3000000000000003e-05, |
| "loss": 4.1863, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.05195666592969268, |
| "grad_norm": 2.776456832885742, |
| "learning_rate": 2.35e-05, |
| "loss": 4.1545, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.053062126906920186, |
| "grad_norm": 2.8448917865753174, |
| "learning_rate": 2.4e-05, |
| "loss": 4.0625, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.05416758788414769, |
| "grad_norm": 2.933760404586792, |
| "learning_rate": 2.45e-05, |
| "loss": 4.1578, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.05527304886137519, |
| "grad_norm": 3.026527166366577, |
| "learning_rate": 2.5e-05, |
| "loss": 4.1339, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.056378509838602694, |
| "grad_norm": 2.5931596755981445, |
| "learning_rate": 2.5500000000000003e-05, |
| "loss": 4.0458, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.057483970815830204, |
| "grad_norm": 2.9681997299194336, |
| "learning_rate": 2.6000000000000002e-05, |
| "loss": 4.2949, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.05858943179305771, |
| "grad_norm": 2.822819232940674, |
| "learning_rate": 2.6500000000000004e-05, |
| "loss": 4.1542, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.05969489277028521, |
| "grad_norm": 2.794525623321533, |
| "learning_rate": 2.7000000000000002e-05, |
| "loss": 4.1644, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.06080035374751271, |
| "grad_norm": 2.6282451152801514, |
| "learning_rate": 2.7500000000000004e-05, |
| "loss": 4.0558, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.061905814724740216, |
| "grad_norm": 2.87127947807312, |
| "learning_rate": 2.8000000000000003e-05, |
| "loss": 4.2151, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.06301127570196773, |
| "grad_norm": 2.7771425247192383, |
| "learning_rate": 2.8499999999999998e-05, |
| "loss": 4.1902, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.06411673667919522, |
| "grad_norm": 2.7243714332580566, |
| "learning_rate": 2.9e-05, |
| "loss": 4.1633, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.06522219765642273, |
| "grad_norm": 2.733858108520508, |
| "learning_rate": 2.95e-05, |
| "loss": 4.2343, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.06632765863365023, |
| "grad_norm": 3.054060935974121, |
| "learning_rate": 3e-05, |
| "loss": 4.0605, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.06743311961087774, |
| "grad_norm": 2.681039333343506, |
| "learning_rate": 3.05e-05, |
| "loss": 4.0797, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.06853858058810525, |
| "grad_norm": 2.594285011291504, |
| "learning_rate": 3.1e-05, |
| "loss": 4.0443, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.06964404156533274, |
| "grad_norm": 2.9265353679656982, |
| "learning_rate": 3.15e-05, |
| "loss": 4.2976, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.07074950254256025, |
| "grad_norm": 3.3384079933166504, |
| "learning_rate": 3.2000000000000005e-05, |
| "loss": 4.2831, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.07185496351978775, |
| "grad_norm": 2.9113404750823975, |
| "learning_rate": 3.2500000000000004e-05, |
| "loss": 4.0696, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.07296042449701526, |
| "grad_norm": 2.746483087539673, |
| "learning_rate": 3.3e-05, |
| "loss": 3.9145, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.07406588547424275, |
| "grad_norm": 2.892920970916748, |
| "learning_rate": 3.35e-05, |
| "loss": 4.0664, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.07517134645147026, |
| "grad_norm": 3.030963659286499, |
| "learning_rate": 3.4000000000000007e-05, |
| "loss": 4.14, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.07627680742869777, |
| "grad_norm": 3.1139981746673584, |
| "learning_rate": 3.45e-05, |
| "loss": 4.0361, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.07738226840592527, |
| "grad_norm": 2.646188497543335, |
| "learning_rate": 3.5e-05, |
| "loss": 4.0998, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.07848772938315278, |
| "grad_norm": 2.9719629287719727, |
| "learning_rate": 3.55e-05, |
| "loss": 3.9815, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.07959319036038028, |
| "grad_norm": 2.6908960342407227, |
| "learning_rate": 3.6e-05, |
| "loss": 4.016, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.08069865133760779, |
| "grad_norm": 3.2028872966766357, |
| "learning_rate": 3.65e-05, |
| "loss": 4.0359, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.08180411231483528, |
| "grad_norm": 2.9519758224487305, |
| "learning_rate": 3.7e-05, |
| "loss": 4.0871, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.08290957329206279, |
| "grad_norm": 2.844874143600464, |
| "learning_rate": 3.7500000000000003e-05, |
| "loss": 4.1217, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.0840150342692903, |
| "grad_norm": 2.73949933052063, |
| "learning_rate": 3.8e-05, |
| "loss": 4.0189, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.0851204952465178, |
| "grad_norm": 2.928393840789795, |
| "learning_rate": 3.85e-05, |
| "loss": 3.9084, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.0862259562237453, |
| "grad_norm": 2.726449966430664, |
| "learning_rate": 3.9000000000000006e-05, |
| "loss": 4.2775, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.0873314172009728, |
| "grad_norm": 2.5583412647247314, |
| "learning_rate": 3.9500000000000005e-05, |
| "loss": 4.0654, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.08843687817820031, |
| "grad_norm": 2.8123371601104736, |
| "learning_rate": 4e-05, |
| "loss": 4.0601, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.08954233915542781, |
| "grad_norm": 3.2048697471618652, |
| "learning_rate": 4.05e-05, |
| "loss": 4.1568, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.09064780013265532, |
| "grad_norm": 2.8617966175079346, |
| "learning_rate": 4.1e-05, |
| "loss": 4.209, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.09175326110988283, |
| "grad_norm": 3.07211971282959, |
| "learning_rate": 4.15e-05, |
| "loss": 3.9661, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.09285872208711032, |
| "grad_norm": 2.84535813331604, |
| "learning_rate": 4.2e-05, |
| "loss": 4.0976, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.09396418306433783, |
| "grad_norm": 2.6337199211120605, |
| "learning_rate": 4.25e-05, |
| "loss": 3.9352, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.09506964404156533, |
| "grad_norm": 3.1465373039245605, |
| "learning_rate": 4.3e-05, |
| "loss": 4.0732, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.09617510501879284, |
| "grad_norm": 2.9059720039367676, |
| "learning_rate": 4.35e-05, |
| "loss": 4.0377, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.09728056599602034, |
| "grad_norm": 3.147087812423706, |
| "learning_rate": 4.4000000000000006e-05, |
| "loss": 4.0003, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.09838602697324784, |
| "grad_norm": 2.878849983215332, |
| "learning_rate": 4.4500000000000004e-05, |
| "loss": 4.1393, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.09949148795047535, |
| "grad_norm": 2.9624218940734863, |
| "learning_rate": 4.5e-05, |
| "loss": 3.9989, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.10059694892770285, |
| "grad_norm": 3.047313690185547, |
| "learning_rate": 4.55e-05, |
| "loss": 3.9942, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.10170240990493036, |
| "grad_norm": 3.069126605987549, |
| "learning_rate": 4.600000000000001e-05, |
| "loss": 4.0184, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.10280787088215786, |
| "grad_norm": 3.046513319015503, |
| "learning_rate": 4.6500000000000005e-05, |
| "loss": 4.0441, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.10391333185938537, |
| "grad_norm": 2.829324722290039, |
| "learning_rate": 4.7e-05, |
| "loss": 3.9685, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.10501879283661286, |
| "grad_norm": 3.0912318229675293, |
| "learning_rate": 4.75e-05, |
| "loss": 3.9195, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.10612425381384037, |
| "grad_norm": 3.3232522010803223, |
| "learning_rate": 4.8e-05, |
| "loss": 4.092, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.10722971479106788, |
| "grad_norm": 3.12263822555542, |
| "learning_rate": 4.85e-05, |
| "loss": 4.1614, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.10833517576829538, |
| "grad_norm": 3.245594024658203, |
| "learning_rate": 4.9e-05, |
| "loss": 3.9858, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.10944063674552289, |
| "grad_norm": 3.0725033283233643, |
| "learning_rate": 4.9500000000000004e-05, |
| "loss": 4.0024, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.11054609772275038, |
| "grad_norm": 3.056286334991455, |
| "learning_rate": 5e-05, |
| "loss": 3.9464, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.1116515586999779, |
| "grad_norm": 3.068084478378296, |
| "learning_rate": 4.993785732040766e-05, |
| "loss": 4.1743, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.11275701967720539, |
| "grad_norm": 3.189666509628296, |
| "learning_rate": 4.9875714640815315e-05, |
| "loss": 3.9721, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.1138624806544329, |
| "grad_norm": 3.0129644870758057, |
| "learning_rate": 4.981357196122297e-05, |
| "loss": 4.0626, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.11496794163166041, |
| "grad_norm": 2.962771415710449, |
| "learning_rate": 4.975142928163063e-05, |
| "loss": 4.0752, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.1160734026088879, |
| "grad_norm": 3.028667688369751, |
| "learning_rate": 4.968928660203828e-05, |
| "loss": 4.0011, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.11717886358611541, |
| "grad_norm": 2.98563551902771, |
| "learning_rate": 4.962714392244594e-05, |
| "loss": 4.0049, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.11828432456334291, |
| "grad_norm": 3.0009968280792236, |
| "learning_rate": 4.9565001242853596e-05, |
| "loss": 3.8586, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.11938978554057042, |
| "grad_norm": 3.038587808609009, |
| "learning_rate": 4.950285856326125e-05, |
| "loss": 4.2185, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.12049524651779792, |
| "grad_norm": 2.9189321994781494, |
| "learning_rate": 4.944071588366891e-05, |
| "loss": 4.0958, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.12160070749502543, |
| "grad_norm": 2.9720592498779297, |
| "learning_rate": 4.9378573204076564e-05, |
| "loss": 4.0651, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.12270616847225294, |
| "grad_norm": 3.094660520553589, |
| "learning_rate": 4.931643052448422e-05, |
| "loss": 4.0115, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.12381162944948043, |
| "grad_norm": 3.197223663330078, |
| "learning_rate": 4.925428784489187e-05, |
| "loss": 3.9921, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.12491709042670794, |
| "grad_norm": 3.033642053604126, |
| "learning_rate": 4.919214516529953e-05, |
| "loss": 3.9471, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.12602255140393545, |
| "grad_norm": 3.1595492362976074, |
| "learning_rate": 4.913000248570719e-05, |
| "loss": 3.9529, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.12712801238116295, |
| "grad_norm": 2.948946714401245, |
| "learning_rate": 4.906785980611484e-05, |
| "loss": 4.0634, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.12823347335839044, |
| "grad_norm": 3.366753339767456, |
| "learning_rate": 4.90057171265225e-05, |
| "loss": 3.8098, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.12933893433561794, |
| "grad_norm": 3.2447152137756348, |
| "learning_rate": 4.894357444693015e-05, |
| "loss": 3.9081, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.13044439531284546, |
| "grad_norm": 3.0394585132598877, |
| "learning_rate": 4.888143176733781e-05, |
| "loss": 3.8164, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.13154985629007296, |
| "grad_norm": 2.983616828918457, |
| "learning_rate": 4.881928908774547e-05, |
| "loss": 3.9558, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.13265531726730045, |
| "grad_norm": 3.1075408458709717, |
| "learning_rate": 4.875714640815312e-05, |
| "loss": 3.9563, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.13376077824452798, |
| "grad_norm": 3.068930149078369, |
| "learning_rate": 4.8695003728560775e-05, |
| "loss": 3.9616, |
| "step": 605 |
| }, |
| { |
| "epoch": 0.13486623922175547, |
| "grad_norm": 3.2127275466918945, |
| "learning_rate": 4.863286104896843e-05, |
| "loss": 4.0268, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.13597170019898297, |
| "grad_norm": 3.0738019943237305, |
| "learning_rate": 4.857071836937609e-05, |
| "loss": 4.0659, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.1370771611762105, |
| "grad_norm": 3.2203280925750732, |
| "learning_rate": 4.850857568978375e-05, |
| "loss": 4.052, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.138182622153438, |
| "grad_norm": 3.0868825912475586, |
| "learning_rate": 4.84464330101914e-05, |
| "loss": 4.1941, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.13928808313066549, |
| "grad_norm": 2.9370384216308594, |
| "learning_rate": 4.8384290330599056e-05, |
| "loss": 3.9984, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.14039354410789298, |
| "grad_norm": 3.230595111846924, |
| "learning_rate": 4.832214765100672e-05, |
| "loss": 4.0905, |
| "step": 635 |
| }, |
| { |
| "epoch": 0.1414990050851205, |
| "grad_norm": 3.1805593967437744, |
| "learning_rate": 4.826000497141437e-05, |
| "loss": 4.0938, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.142604466062348, |
| "grad_norm": 2.952800989151001, |
| "learning_rate": 4.8197862291822025e-05, |
| "loss": 3.9694, |
| "step": 645 |
| }, |
| { |
| "epoch": 0.1437099270395755, |
| "grad_norm": 2.96767520904541, |
| "learning_rate": 4.813571961222968e-05, |
| "loss": 4.089, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.14481538801680302, |
| "grad_norm": 3.2061245441436768, |
| "learning_rate": 4.807357693263734e-05, |
| "loss": 3.925, |
| "step": 655 |
| }, |
| { |
| "epoch": 0.14592084899403052, |
| "grad_norm": 3.4966869354248047, |
| "learning_rate": 4.801143425304499e-05, |
| "loss": 4.0303, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.147026309971258, |
| "grad_norm": 3.0343263149261475, |
| "learning_rate": 4.794929157345265e-05, |
| "loss": 4.1302, |
| "step": 665 |
| }, |
| { |
| "epoch": 0.1481317709484855, |
| "grad_norm": 3.1001501083374023, |
| "learning_rate": 4.7887148893860305e-05, |
| "loss": 4.136, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.14923723192571303, |
| "grad_norm": 3.0706558227539062, |
| "learning_rate": 4.782500621426796e-05, |
| "loss": 3.97, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.15034269290294053, |
| "grad_norm": 3.4160215854644775, |
| "learning_rate": 4.776286353467562e-05, |
| "loss": 4.1283, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.15144815388016802, |
| "grad_norm": 3.6512129306793213, |
| "learning_rate": 4.7700720855083274e-05, |
| "loss": 4.1619, |
| "step": 685 |
| }, |
| { |
| "epoch": 0.15255361485739555, |
| "grad_norm": 2.8638243675231934, |
| "learning_rate": 4.763857817549093e-05, |
| "loss": 4.0563, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.15365907583462304, |
| "grad_norm": 2.87731671333313, |
| "learning_rate": 4.7576435495898586e-05, |
| "loss": 4.0609, |
| "step": 695 |
| }, |
| { |
| "epoch": 0.15476453681185054, |
| "grad_norm": 3.2787325382232666, |
| "learning_rate": 4.751429281630624e-05, |
| "loss": 4.031, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.15586999778907804, |
| "grad_norm": 2.9089596271514893, |
| "learning_rate": 4.74521501367139e-05, |
| "loss": 3.8997, |
| "step": 705 |
| }, |
| { |
| "epoch": 0.15697545876630556, |
| "grad_norm": 3.02470326423645, |
| "learning_rate": 4.7390007457121555e-05, |
| "loss": 4.1458, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.15808091974353305, |
| "grad_norm": 3.1005873680114746, |
| "learning_rate": 4.7327864777529204e-05, |
| "loss": 3.8473, |
| "step": 715 |
| }, |
| { |
| "epoch": 0.15918638072076055, |
| "grad_norm": 3.2032277584075928, |
| "learning_rate": 4.726572209793687e-05, |
| "loss": 4.0817, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.16029184169798807, |
| "grad_norm": 3.1510956287384033, |
| "learning_rate": 4.720357941834452e-05, |
| "loss": 4.0441, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.16139730267521557, |
| "grad_norm": 3.088815689086914, |
| "learning_rate": 4.714143673875217e-05, |
| "loss": 3.8953, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.16250276365244307, |
| "grad_norm": 3.099492073059082, |
| "learning_rate": 4.7079294059159836e-05, |
| "loss": 3.8765, |
| "step": 735 |
| }, |
| { |
| "epoch": 0.16360822462967056, |
| "grad_norm": 2.95200252532959, |
| "learning_rate": 4.7017151379567485e-05, |
| "loss": 4.0126, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.16471368560689809, |
| "grad_norm": 3.2879955768585205, |
| "learning_rate": 4.695500869997515e-05, |
| "loss": 4.0581, |
| "step": 745 |
| }, |
| { |
| "epoch": 0.16581914658412558, |
| "grad_norm": 3.344324827194214, |
| "learning_rate": 4.6892866020382804e-05, |
| "loss": 4.056, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.16692460756135308, |
| "grad_norm": 3.3089466094970703, |
| "learning_rate": 4.6830723340790454e-05, |
| "loss": 3.9941, |
| "step": 755 |
| }, |
| { |
| "epoch": 0.1680300685385806, |
| "grad_norm": 3.3503427505493164, |
| "learning_rate": 4.6768580661198117e-05, |
| "loss": 3.9987, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.1691355295158081, |
| "grad_norm": 3.3430700302124023, |
| "learning_rate": 4.670643798160577e-05, |
| "loss": 3.8631, |
| "step": 765 |
| }, |
| { |
| "epoch": 0.1702409904930356, |
| "grad_norm": 3.0984108448028564, |
| "learning_rate": 4.664429530201342e-05, |
| "loss": 4.0144, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.1713464514702631, |
| "grad_norm": 3.1141326427459717, |
| "learning_rate": 4.6582152622421085e-05, |
| "loss": 3.9256, |
| "step": 775 |
| }, |
| { |
| "epoch": 0.1724519124474906, |
| "grad_norm": 3.1998496055603027, |
| "learning_rate": 4.6520009942828734e-05, |
| "loss": 3.9675, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.1735573734247181, |
| "grad_norm": 3.034891128540039, |
| "learning_rate": 4.645786726323639e-05, |
| "loss": 4.0099, |
| "step": 785 |
| }, |
| { |
| "epoch": 0.1746628344019456, |
| "grad_norm": 3.2506675720214844, |
| "learning_rate": 4.6395724583644054e-05, |
| "loss": 3.9246, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.17576829537917313, |
| "grad_norm": 3.485947608947754, |
| "learning_rate": 4.63335819040517e-05, |
| "loss": 3.9919, |
| "step": 795 |
| }, |
| { |
| "epoch": 0.17687375635640062, |
| "grad_norm": 3.2420520782470703, |
| "learning_rate": 4.627143922445936e-05, |
| "loss": 4.021, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.17797921733362812, |
| "grad_norm": 2.989863872528076, |
| "learning_rate": 4.6209296544867015e-05, |
| "loss": 3.9956, |
| "step": 805 |
| }, |
| { |
| "epoch": 0.17908467831085562, |
| "grad_norm": 2.9505488872528076, |
| "learning_rate": 4.614715386527467e-05, |
| "loss": 4.1098, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.18019013928808314, |
| "grad_norm": 3.1943299770355225, |
| "learning_rate": 4.608501118568233e-05, |
| "loss": 3.962, |
| "step": 815 |
| }, |
| { |
| "epoch": 0.18129560026531064, |
| "grad_norm": 3.1761474609375, |
| "learning_rate": 4.6022868506089984e-05, |
| "loss": 3.8666, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.18240106124253813, |
| "grad_norm": 3.454538345336914, |
| "learning_rate": 4.596072582649764e-05, |
| "loss": 4.1169, |
| "step": 825 |
| }, |
| { |
| "epoch": 0.18350652221976566, |
| "grad_norm": 3.3881819248199463, |
| "learning_rate": 4.5898583146905296e-05, |
| "loss": 4.0902, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.18461198319699315, |
| "grad_norm": 3.0427277088165283, |
| "learning_rate": 4.583644046731295e-05, |
| "loss": 3.9533, |
| "step": 835 |
| }, |
| { |
| "epoch": 0.18571744417422065, |
| "grad_norm": 3.062037944793701, |
| "learning_rate": 4.577429778772061e-05, |
| "loss": 4.0955, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.18682290515144814, |
| "grad_norm": 3.1821091175079346, |
| "learning_rate": 4.5712155108128265e-05, |
| "loss": 4.1441, |
| "step": 845 |
| }, |
| { |
| "epoch": 0.18792836612867567, |
| "grad_norm": 3.1128711700439453, |
| "learning_rate": 4.565001242853592e-05, |
| "loss": 4.1418, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.18903382710590316, |
| "grad_norm": 3.0755162239074707, |
| "learning_rate": 4.558786974894358e-05, |
| "loss": 4.0246, |
| "step": 855 |
| }, |
| { |
| "epoch": 0.19013928808313066, |
| "grad_norm": 3.2559144496917725, |
| "learning_rate": 4.552572706935123e-05, |
| "loss": 4.0333, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.19124474906035818, |
| "grad_norm": 2.929656744003296, |
| "learning_rate": 4.546358438975889e-05, |
| "loss": 4.102, |
| "step": 865 |
| }, |
| { |
| "epoch": 0.19235021003758568, |
| "grad_norm": 3.1212410926818848, |
| "learning_rate": 4.5401441710166546e-05, |
| "loss": 3.8648, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.19345567101481317, |
| "grad_norm": 3.0112760066986084, |
| "learning_rate": 4.53392990305742e-05, |
| "loss": 4.0, |
| "step": 875 |
| }, |
| { |
| "epoch": 0.19456113199204067, |
| "grad_norm": 3.1704013347625732, |
| "learning_rate": 4.527715635098186e-05, |
| "loss": 4.0259, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.1956665929692682, |
| "grad_norm": 2.999876022338867, |
| "learning_rate": 4.5215013671389514e-05, |
| "loss": 3.8822, |
| "step": 885 |
| }, |
| { |
| "epoch": 0.1967720539464957, |
| "grad_norm": 3.1141977310180664, |
| "learning_rate": 4.515287099179717e-05, |
| "loss": 4.1104, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.19787751492372319, |
| "grad_norm": 3.2327237129211426, |
| "learning_rate": 4.509072831220482e-05, |
| "loss": 3.8755, |
| "step": 895 |
| }, |
| { |
| "epoch": 0.1989829759009507, |
| "grad_norm": 3.019273519515991, |
| "learning_rate": 4.502858563261248e-05, |
| "loss": 3.9992, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.2000884368781782, |
| "grad_norm": 3.203974962234497, |
| "learning_rate": 4.496644295302014e-05, |
| "loss": 3.978, |
| "step": 905 |
| }, |
| { |
| "epoch": 0.2011938978554057, |
| "grad_norm": 3.0810108184814453, |
| "learning_rate": 4.490430027342779e-05, |
| "loss": 4.0461, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.2022993588326332, |
| "grad_norm": 3.004460096359253, |
| "learning_rate": 4.484215759383545e-05, |
| "loss": 3.9562, |
| "step": 915 |
| }, |
| { |
| "epoch": 0.20340481980986072, |
| "grad_norm": 3.146409034729004, |
| "learning_rate": 4.478001491424311e-05, |
| "loss": 4.0321, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.20451028078708822, |
| "grad_norm": 3.180551528930664, |
| "learning_rate": 4.471787223465076e-05, |
| "loss": 4.0203, |
| "step": 925 |
| }, |
| { |
| "epoch": 0.2056157417643157, |
| "grad_norm": 3.2521543502807617, |
| "learning_rate": 4.465572955505842e-05, |
| "loss": 3.9592, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.20672120274154324, |
| "grad_norm": 3.3072097301483154, |
| "learning_rate": 4.459358687546607e-05, |
| "loss": 3.9383, |
| "step": 935 |
| }, |
| { |
| "epoch": 0.20782666371877073, |
| "grad_norm": 3.152592182159424, |
| "learning_rate": 4.4531444195873725e-05, |
| "loss": 3.9695, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.20893212469599823, |
| "grad_norm": 3.3956856727600098, |
| "learning_rate": 4.446930151628139e-05, |
| "loss": 4.1435, |
| "step": 945 |
| }, |
| { |
| "epoch": 0.21003758567322572, |
| "grad_norm": 3.2591230869293213, |
| "learning_rate": 4.440715883668904e-05, |
| "loss": 3.9847, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.21114304665045325, |
| "grad_norm": 3.197763204574585, |
| "learning_rate": 4.4345016157096694e-05, |
| "loss": 4.0096, |
| "step": 955 |
| }, |
| { |
| "epoch": 0.21224850762768074, |
| "grad_norm": 3.1687469482421875, |
| "learning_rate": 4.428287347750435e-05, |
| "loss": 3.9947, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.21335396860490824, |
| "grad_norm": 3.01877498626709, |
| "learning_rate": 4.4220730797912006e-05, |
| "loss": 3.9609, |
| "step": 965 |
| }, |
| { |
| "epoch": 0.21445942958213576, |
| "grad_norm": 3.0294318199157715, |
| "learning_rate": 4.415858811831967e-05, |
| "loss": 4.1849, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.21556489055936326, |
| "grad_norm": 3.6619277000427246, |
| "learning_rate": 4.409644543872732e-05, |
| "loss": 4.0503, |
| "step": 975 |
| }, |
| { |
| "epoch": 0.21667035153659076, |
| "grad_norm": 3.24751353263855, |
| "learning_rate": 4.4034302759134975e-05, |
| "loss": 4.1227, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.21777581251381825, |
| "grad_norm": 3.2298481464385986, |
| "learning_rate": 4.397216007954264e-05, |
| "loss": 4.0815, |
| "step": 985 |
| }, |
| { |
| "epoch": 0.21888127349104577, |
| "grad_norm": 3.2555155754089355, |
| "learning_rate": 4.391001739995029e-05, |
| "loss": 4.1461, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.21998673446827327, |
| "grad_norm": 3.141761064529419, |
| "learning_rate": 4.384787472035794e-05, |
| "loss": 4.021, |
| "step": 995 |
| }, |
| { |
| "epoch": 0.22109219544550077, |
| "grad_norm": 3.0659165382385254, |
| "learning_rate": 4.37857320407656e-05, |
| "loss": 3.8781, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.2221976564227283, |
| "grad_norm": 3.1628031730651855, |
| "learning_rate": 4.3723589361173255e-05, |
| "loss": 4.0618, |
| "step": 1005 |
| }, |
| { |
| "epoch": 0.2233031173999558, |
| "grad_norm": 3.143479347229004, |
| "learning_rate": 4.366144668158091e-05, |
| "loss": 4.0251, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.22440857837718328, |
| "grad_norm": 3.302840232849121, |
| "learning_rate": 4.359930400198857e-05, |
| "loss": 3.8384, |
| "step": 1015 |
| }, |
| { |
| "epoch": 0.22551403935441078, |
| "grad_norm": 2.7286899089813232, |
| "learning_rate": 4.3537161322396224e-05, |
| "loss": 4.0165, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.2266195003316383, |
| "grad_norm": 3.0600860118865967, |
| "learning_rate": 4.347501864280388e-05, |
| "loss": 4.0364, |
| "step": 1025 |
| }, |
| { |
| "epoch": 0.2277249613088658, |
| "grad_norm": 2.9517204761505127, |
| "learning_rate": 4.3412875963211536e-05, |
| "loss": 4.0458, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.2288304222860933, |
| "grad_norm": 3.2530035972595215, |
| "learning_rate": 4.335073328361919e-05, |
| "loss": 4.1022, |
| "step": 1035 |
| }, |
| { |
| "epoch": 0.22993588326332082, |
| "grad_norm": 3.277559280395508, |
| "learning_rate": 4.328859060402685e-05, |
| "loss": 3.9183, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.2310413442405483, |
| "grad_norm": 3.286675453186035, |
| "learning_rate": 4.3226447924434505e-05, |
| "loss": 4.1264, |
| "step": 1045 |
| }, |
| { |
| "epoch": 0.2321468052177758, |
| "grad_norm": 3.010737180709839, |
| "learning_rate": 4.3164305244842154e-05, |
| "loss": 4.0477, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.2332522661950033, |
| "grad_norm": 3.050497055053711, |
| "learning_rate": 4.310216256524982e-05, |
| "loss": 4.0464, |
| "step": 1055 |
| }, |
| { |
| "epoch": 0.23435772717223083, |
| "grad_norm": 3.201765537261963, |
| "learning_rate": 4.304001988565747e-05, |
| "loss": 4.0519, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.23546318814945832, |
| "grad_norm": 3.3649299144744873, |
| "learning_rate": 4.297787720606512e-05, |
| "loss": 3.81, |
| "step": 1065 |
| }, |
| { |
| "epoch": 0.23656864912668582, |
| "grad_norm": 3.5535190105438232, |
| "learning_rate": 4.2915734526472786e-05, |
| "loss": 4.1328, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.23767411010391334, |
| "grad_norm": 3.1812844276428223, |
| "learning_rate": 4.285359184688044e-05, |
| "loss": 4.052, |
| "step": 1075 |
| }, |
| { |
| "epoch": 0.23877957108114084, |
| "grad_norm": 3.303905725479126, |
| "learning_rate": 4.279144916728809e-05, |
| "loss": 3.8988, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.23988503205836834, |
| "grad_norm": 3.1050772666931152, |
| "learning_rate": 4.2729306487695754e-05, |
| "loss": 3.9013, |
| "step": 1085 |
| }, |
| { |
| "epoch": 0.24099049303559583, |
| "grad_norm": 3.2585289478302, |
| "learning_rate": 4.2667163808103404e-05, |
| "loss": 4.1435, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.24209595401282336, |
| "grad_norm": 3.3238561153411865, |
| "learning_rate": 4.2605021128511067e-05, |
| "loss": 3.9212, |
| "step": 1095 |
| }, |
| { |
| "epoch": 0.24320141499005085, |
| "grad_norm": 3.151242971420288, |
| "learning_rate": 4.254287844891872e-05, |
| "loss": 4.0018, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.24430687596727835, |
| "grad_norm": 2.9132590293884277, |
| "learning_rate": 4.248073576932637e-05, |
| "loss": 3.9902, |
| "step": 1105 |
| }, |
| { |
| "epoch": 0.24541233694450587, |
| "grad_norm": 3.318678140640259, |
| "learning_rate": 4.2418593089734035e-05, |
| "loss": 3.9546, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.24651779792173337, |
| "grad_norm": 3.3934099674224854, |
| "learning_rate": 4.2356450410141684e-05, |
| "loss": 3.9115, |
| "step": 1115 |
| }, |
| { |
| "epoch": 0.24762325889896086, |
| "grad_norm": 3.0218331813812256, |
| "learning_rate": 4.229430773054934e-05, |
| "loss": 4.0945, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.24872871987618836, |
| "grad_norm": 3.152254581451416, |
| "learning_rate": 4.2232165050957004e-05, |
| "loss": 4.1443, |
| "step": 1125 |
| }, |
| { |
| "epoch": 0.24983418085341588, |
| "grad_norm": 3.2911226749420166, |
| "learning_rate": 4.217002237136465e-05, |
| "loss": 4.0634, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.2509396418306434, |
| "grad_norm": 3.0462334156036377, |
| "learning_rate": 4.210787969177231e-05, |
| "loss": 4.0296, |
| "step": 1135 |
| }, |
| { |
| "epoch": 0.2520451028078709, |
| "grad_norm": 3.0708699226379395, |
| "learning_rate": 4.204573701217997e-05, |
| "loss": 4.1341, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.25315056378509837, |
| "grad_norm": 3.381535053253174, |
| "learning_rate": 4.198359433258762e-05, |
| "loss": 3.9333, |
| "step": 1145 |
| }, |
| { |
| "epoch": 0.2542560247623259, |
| "grad_norm": 3.021491050720215, |
| "learning_rate": 4.192145165299528e-05, |
| "loss": 4.0599, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.2553614857395534, |
| "grad_norm": 3.339264154434204, |
| "learning_rate": 4.1859308973402934e-05, |
| "loss": 4.0867, |
| "step": 1155 |
| }, |
| { |
| "epoch": 0.2564669467167809, |
| "grad_norm": 2.9898245334625244, |
| "learning_rate": 4.179716629381059e-05, |
| "loss": 4.0395, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.2575724076940084, |
| "grad_norm": 3.3147876262664795, |
| "learning_rate": 4.1735023614218246e-05, |
| "loss": 3.9406, |
| "step": 1165 |
| }, |
| { |
| "epoch": 0.2586778686712359, |
| "grad_norm": 3.3725435733795166, |
| "learning_rate": 4.16728809346259e-05, |
| "loss": 3.9498, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.2597833296484634, |
| "grad_norm": 3.2875232696533203, |
| "learning_rate": 4.161073825503356e-05, |
| "loss": 3.9765, |
| "step": 1175 |
| }, |
| { |
| "epoch": 0.2608887906256909, |
| "grad_norm": 3.117985248565674, |
| "learning_rate": 4.1548595575441215e-05, |
| "loss": 4.2161, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.2619942516029184, |
| "grad_norm": 3.326371669769287, |
| "learning_rate": 4.148645289584887e-05, |
| "loss": 3.8891, |
| "step": 1185 |
| }, |
| { |
| "epoch": 0.2630997125801459, |
| "grad_norm": 3.4053702354431152, |
| "learning_rate": 4.142431021625653e-05, |
| "loss": 4.1167, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.26420517355737344, |
| "grad_norm": 2.9902451038360596, |
| "learning_rate": 4.136216753666418e-05, |
| "loss": 4.1837, |
| "step": 1195 |
| }, |
| { |
| "epoch": 0.2653106345346009, |
| "grad_norm": 3.04341721534729, |
| "learning_rate": 4.130002485707184e-05, |
| "loss": 3.9783, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.26641609551182843, |
| "grad_norm": 3.1881587505340576, |
| "learning_rate": 4.123788217747949e-05, |
| "loss": 4.0327, |
| "step": 1205 |
| }, |
| { |
| "epoch": 0.26752155648905596, |
| "grad_norm": 3.1782286167144775, |
| "learning_rate": 4.117573949788715e-05, |
| "loss": 3.9614, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.2686270174662834, |
| "grad_norm": 3.0777156352996826, |
| "learning_rate": 4.111359681829481e-05, |
| "loss": 3.9946, |
| "step": 1215 |
| }, |
| { |
| "epoch": 0.26973247844351095, |
| "grad_norm": 3.0450563430786133, |
| "learning_rate": 4.1051454138702464e-05, |
| "loss": 4.0267, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.27083793942073847, |
| "grad_norm": 3.516542673110962, |
| "learning_rate": 4.098931145911012e-05, |
| "loss": 4.0077, |
| "step": 1225 |
| }, |
| { |
| "epoch": 0.27194340039796594, |
| "grad_norm": 3.6443097591400146, |
| "learning_rate": 4.0927168779517776e-05, |
| "loss": 3.9799, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.27304886137519346, |
| "grad_norm": 3.004601240158081, |
| "learning_rate": 4.086502609992543e-05, |
| "loss": 3.997, |
| "step": 1235 |
| }, |
| { |
| "epoch": 0.274154322352421, |
| "grad_norm": 2.9626457691192627, |
| "learning_rate": 4.080288342033309e-05, |
| "loss": 3.9609, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.27525978332964846, |
| "grad_norm": 3.267373561859131, |
| "learning_rate": 4.074074074074074e-05, |
| "loss": 4.0279, |
| "step": 1245 |
| }, |
| { |
| "epoch": 0.276365244306876, |
| "grad_norm": 3.2012808322906494, |
| "learning_rate": 4.06785980611484e-05, |
| "loss": 4.0551, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.27747070528410345, |
| "grad_norm": 3.1443517208099365, |
| "learning_rate": 4.061645538155606e-05, |
| "loss": 3.9241, |
| "step": 1255 |
| }, |
| { |
| "epoch": 0.27857616626133097, |
| "grad_norm": 3.201756238937378, |
| "learning_rate": 4.055431270196371e-05, |
| "loss": 4.0168, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.2796816272385585, |
| "grad_norm": 3.381840229034424, |
| "learning_rate": 4.049217002237137e-05, |
| "loss": 4.0506, |
| "step": 1265 |
| }, |
| { |
| "epoch": 0.28078708821578596, |
| "grad_norm": 3.3655803203582764, |
| "learning_rate": 4.043002734277902e-05, |
| "loss": 4.0166, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.2818925491930135, |
| "grad_norm": 3.1821653842926025, |
| "learning_rate": 4.0367884663186675e-05, |
| "loss": 4.0161, |
| "step": 1275 |
| }, |
| { |
| "epoch": 0.282998010170241, |
| "grad_norm": 3.2986061573028564, |
| "learning_rate": 4.030574198359434e-05, |
| "loss": 3.8855, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.2841034711474685, |
| "grad_norm": 3.3557889461517334, |
| "learning_rate": 4.024359930400199e-05, |
| "loss": 4.0151, |
| "step": 1285 |
| }, |
| { |
| "epoch": 0.285208932124696, |
| "grad_norm": 3.358522891998291, |
| "learning_rate": 4.0181456624409644e-05, |
| "loss": 3.9199, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.2863143931019235, |
| "grad_norm": 3.4547970294952393, |
| "learning_rate": 4.011931394481731e-05, |
| "loss": 4.0687, |
| "step": 1295 |
| }, |
| { |
| "epoch": 0.287419854079151, |
| "grad_norm": 3.0661280155181885, |
| "learning_rate": 4.0057171265224956e-05, |
| "loss": 4.0077, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.2885253150563785, |
| "grad_norm": 3.2720112800598145, |
| "learning_rate": 3.999502858563262e-05, |
| "loss": 3.8876, |
| "step": 1305 |
| }, |
| { |
| "epoch": 0.28963077603360604, |
| "grad_norm": 3.0981643199920654, |
| "learning_rate": 3.993288590604027e-05, |
| "loss": 3.9393, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.2907362370108335, |
| "grad_norm": 3.2599971294403076, |
| "learning_rate": 3.9870743226447925e-05, |
| "loss": 3.8995, |
| "step": 1315 |
| }, |
| { |
| "epoch": 0.29184169798806103, |
| "grad_norm": 3.6165876388549805, |
| "learning_rate": 3.980860054685559e-05, |
| "loss": 4.0319, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.2929471589652885, |
| "grad_norm": 3.432969331741333, |
| "learning_rate": 3.974645786726324e-05, |
| "loss": 4.0085, |
| "step": 1325 |
| }, |
| { |
| "epoch": 0.294052619942516, |
| "grad_norm": 3.2116641998291016, |
| "learning_rate": 3.968431518767089e-05, |
| "loss": 3.9819, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.29515808091974355, |
| "grad_norm": 3.476435661315918, |
| "learning_rate": 3.962217250807855e-05, |
| "loss": 4.028, |
| "step": 1335 |
| }, |
| { |
| "epoch": 0.296263541896971, |
| "grad_norm": 3.428138017654419, |
| "learning_rate": 3.9560029828486205e-05, |
| "loss": 3.9686, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.29736900287419854, |
| "grad_norm": 3.2953410148620605, |
| "learning_rate": 3.949788714889386e-05, |
| "loss": 3.9535, |
| "step": 1345 |
| }, |
| { |
| "epoch": 0.29847446385142606, |
| "grad_norm": 3.800462245941162, |
| "learning_rate": 3.943574446930152e-05, |
| "loss": 4.02, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.29957992482865353, |
| "grad_norm": 3.0902063846588135, |
| "learning_rate": 3.9373601789709174e-05, |
| "loss": 4.0621, |
| "step": 1355 |
| }, |
| { |
| "epoch": 0.30068538580588106, |
| "grad_norm": 3.0530946254730225, |
| "learning_rate": 3.931145911011683e-05, |
| "loss": 4.0547, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.3017908467831086, |
| "grad_norm": 3.3780524730682373, |
| "learning_rate": 3.9249316430524486e-05, |
| "loss": 3.8966, |
| "step": 1365 |
| }, |
| { |
| "epoch": 0.30289630776033605, |
| "grad_norm": 3.302295207977295, |
| "learning_rate": 3.918717375093214e-05, |
| "loss": 4.1423, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.30400176873756357, |
| "grad_norm": 3.452106237411499, |
| "learning_rate": 3.91250310713398e-05, |
| "loss": 3.95, |
| "step": 1375 |
| }, |
| { |
| "epoch": 0.3051072297147911, |
| "grad_norm": 3.3365650177001953, |
| "learning_rate": 3.9062888391747455e-05, |
| "loss": 4.0451, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.30621269069201856, |
| "grad_norm": 3.3903305530548096, |
| "learning_rate": 3.900074571215511e-05, |
| "loss": 3.8807, |
| "step": 1385 |
| }, |
| { |
| "epoch": 0.3073181516692461, |
| "grad_norm": 3.6150190830230713, |
| "learning_rate": 3.893860303256277e-05, |
| "loss": 4.0183, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.30842361264647356, |
| "grad_norm": 3.298021078109741, |
| "learning_rate": 3.887646035297042e-05, |
| "loss": 4.0159, |
| "step": 1395 |
| }, |
| { |
| "epoch": 0.3095290736237011, |
| "grad_norm": 3.3884518146514893, |
| "learning_rate": 3.881431767337807e-05, |
| "loss": 4.0274, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.3106345346009286, |
| "grad_norm": 3.0882458686828613, |
| "learning_rate": 3.8752174993785736e-05, |
| "loss": 4.0236, |
| "step": 1405 |
| }, |
| { |
| "epoch": 0.31173999557815607, |
| "grad_norm": 3.4634859561920166, |
| "learning_rate": 3.869003231419339e-05, |
| "loss": 4.106, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.3128454565553836, |
| "grad_norm": 3.3966925144195557, |
| "learning_rate": 3.862788963460104e-05, |
| "loss": 4.1579, |
| "step": 1415 |
| }, |
| { |
| "epoch": 0.3139509175326111, |
| "grad_norm": 3.643110990524292, |
| "learning_rate": 3.8565746955008704e-05, |
| "loss": 3.8821, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.3150563785098386, |
| "grad_norm": 3.37382435798645, |
| "learning_rate": 3.8503604275416354e-05, |
| "loss": 4.1456, |
| "step": 1425 |
| }, |
| { |
| "epoch": 0.3161618394870661, |
| "grad_norm": 3.523825168609619, |
| "learning_rate": 3.8441461595824017e-05, |
| "loss": 4.0356, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.31726730046429363, |
| "grad_norm": 3.146383762359619, |
| "learning_rate": 3.837931891623167e-05, |
| "loss": 4.1187, |
| "step": 1435 |
| }, |
| { |
| "epoch": 0.3183727614415211, |
| "grad_norm": 3.3049044609069824, |
| "learning_rate": 3.831717623663932e-05, |
| "loss": 3.9896, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.3194782224187486, |
| "grad_norm": 3.3387224674224854, |
| "learning_rate": 3.8255033557046985e-05, |
| "loss": 4.0838, |
| "step": 1445 |
| }, |
| { |
| "epoch": 0.32058368339597615, |
| "grad_norm": 3.432584047317505, |
| "learning_rate": 3.819289087745464e-05, |
| "loss": 4.2188, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.3216891443732036, |
| "grad_norm": 3.689253568649292, |
| "learning_rate": 3.813074819786229e-05, |
| "loss": 4.0942, |
| "step": 1455 |
| }, |
| { |
| "epoch": 0.32279460535043114, |
| "grad_norm": 3.4148080348968506, |
| "learning_rate": 3.8068605518269954e-05, |
| "loss": 4.0352, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.3239000663276586, |
| "grad_norm": 3.3507676124572754, |
| "learning_rate": 3.80064628386776e-05, |
| "loss": 4.0372, |
| "step": 1465 |
| }, |
| { |
| "epoch": 0.32500552730488613, |
| "grad_norm": 3.4236788749694824, |
| "learning_rate": 3.794432015908526e-05, |
| "loss": 4.0303, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.32611098828211366, |
| "grad_norm": 3.2741448879241943, |
| "learning_rate": 3.788217747949292e-05, |
| "loss": 3.9362, |
| "step": 1475 |
| }, |
| { |
| "epoch": 0.3272164492593411, |
| "grad_norm": 3.177788734436035, |
| "learning_rate": 3.782003479990057e-05, |
| "loss": 4.0183, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.32832191023656865, |
| "grad_norm": 3.6237776279449463, |
| "learning_rate": 3.775789212030823e-05, |
| "loss": 4.0285, |
| "step": 1485 |
| }, |
| { |
| "epoch": 0.32942737121379617, |
| "grad_norm": 3.418241024017334, |
| "learning_rate": 3.7695749440715884e-05, |
| "loss": 4.1458, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.33053283219102364, |
| "grad_norm": 3.0317554473876953, |
| "learning_rate": 3.763360676112354e-05, |
| "loss": 3.9586, |
| "step": 1495 |
| }, |
| { |
| "epoch": 0.33163829316825116, |
| "grad_norm": 3.402616024017334, |
| "learning_rate": 3.7571464081531196e-05, |
| "loss": 4.1311, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.3327437541454787, |
| "grad_norm": 3.386590003967285, |
| "learning_rate": 3.750932140193885e-05, |
| "loss": 4.189, |
| "step": 1505 |
| }, |
| { |
| "epoch": 0.33384921512270616, |
| "grad_norm": 3.329336404800415, |
| "learning_rate": 3.744717872234651e-05, |
| "loss": 3.9931, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.3349546760999337, |
| "grad_norm": 3.281658411026001, |
| "learning_rate": 3.7385036042754165e-05, |
| "loss": 4.0458, |
| "step": 1515 |
| }, |
| { |
| "epoch": 0.3360601370771612, |
| "grad_norm": 3.196786880493164, |
| "learning_rate": 3.732289336316182e-05, |
| "loss": 3.9526, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.33716559805438867, |
| "grad_norm": 3.386678695678711, |
| "learning_rate": 3.726075068356948e-05, |
| "loss": 4.1347, |
| "step": 1525 |
| }, |
| { |
| "epoch": 0.3382710590316162, |
| "grad_norm": 2.9931721687316895, |
| "learning_rate": 3.719860800397713e-05, |
| "loss": 3.9369, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.33937652000884366, |
| "grad_norm": 3.7105250358581543, |
| "learning_rate": 3.713646532438479e-05, |
| "loss": 3.8733, |
| "step": 1535 |
| }, |
| { |
| "epoch": 0.3404819809860712, |
| "grad_norm": 3.0669617652893066, |
| "learning_rate": 3.7074322644792446e-05, |
| "loss": 3.8466, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.3415874419632987, |
| "grad_norm": 3.449889898300171, |
| "learning_rate": 3.70121799652001e-05, |
| "loss": 4.0733, |
| "step": 1545 |
| }, |
| { |
| "epoch": 0.3426929029405262, |
| "grad_norm": 3.4569785594940186, |
| "learning_rate": 3.695003728560776e-05, |
| "loss": 3.9711, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.3437983639177537, |
| "grad_norm": 3.4246673583984375, |
| "learning_rate": 3.6887894606015414e-05, |
| "loss": 4.0172, |
| "step": 1555 |
| }, |
| { |
| "epoch": 0.3449038248949812, |
| "grad_norm": 3.5262482166290283, |
| "learning_rate": 3.682575192642307e-05, |
| "loss": 4.1475, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.3460092858722087, |
| "grad_norm": 3.057406425476074, |
| "learning_rate": 3.6763609246830726e-05, |
| "loss": 4.0023, |
| "step": 1565 |
| }, |
| { |
| "epoch": 0.3471147468494362, |
| "grad_norm": 3.6714344024658203, |
| "learning_rate": 3.670146656723838e-05, |
| "loss": 3.9847, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.34822020782666374, |
| "grad_norm": 3.396587371826172, |
| "learning_rate": 3.663932388764604e-05, |
| "loss": 4.1175, |
| "step": 1575 |
| }, |
| { |
| "epoch": 0.3493256688038912, |
| "grad_norm": 3.11995530128479, |
| "learning_rate": 3.6577181208053695e-05, |
| "loss": 4.0171, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.35043112978111873, |
| "grad_norm": 3.4781930446624756, |
| "learning_rate": 3.651503852846135e-05, |
| "loss": 3.9343, |
| "step": 1585 |
| }, |
| { |
| "epoch": 0.35153659075834626, |
| "grad_norm": 3.264204263687134, |
| "learning_rate": 3.645289584886901e-05, |
| "loss": 4.1221, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.3526420517355737, |
| "grad_norm": 3.2987558841705322, |
| "learning_rate": 3.639075316927666e-05, |
| "loss": 4.1632, |
| "step": 1595 |
| }, |
| { |
| "epoch": 0.35374751271280125, |
| "grad_norm": 3.6787593364715576, |
| "learning_rate": 3.632861048968432e-05, |
| "loss": 3.9153, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.3548529736900287, |
| "grad_norm": 3.2717323303222656, |
| "learning_rate": 3.6266467810091976e-05, |
| "loss": 3.9902, |
| "step": 1605 |
| }, |
| { |
| "epoch": 0.35595843466725624, |
| "grad_norm": 3.1607632637023926, |
| "learning_rate": 3.6204325130499625e-05, |
| "loss": 4.0374, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.35706389564448376, |
| "grad_norm": 3.187629461288452, |
| "learning_rate": 3.614218245090729e-05, |
| "loss": 4.2059, |
| "step": 1615 |
| }, |
| { |
| "epoch": 0.35816935662171123, |
| "grad_norm": 3.6148953437805176, |
| "learning_rate": 3.608003977131494e-05, |
| "loss": 4.0533, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.35927481759893876, |
| "grad_norm": 3.3978331089019775, |
| "learning_rate": 3.6017897091722594e-05, |
| "loss": 4.0545, |
| "step": 1625 |
| }, |
| { |
| "epoch": 0.3603802785761663, |
| "grad_norm": 3.5654563903808594, |
| "learning_rate": 3.595575441213026e-05, |
| "loss": 4.0747, |
| "step": 1630 |
| }, |
| { |
| "epoch": 0.36148573955339375, |
| "grad_norm": 3.0887868404388428, |
| "learning_rate": 3.5893611732537906e-05, |
| "loss": 4.0406, |
| "step": 1635 |
| }, |
| { |
| "epoch": 0.36259120053062127, |
| "grad_norm": 2.8452141284942627, |
| "learning_rate": 3.583146905294556e-05, |
| "loss": 4.0105, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.3636966615078488, |
| "grad_norm": 3.3485066890716553, |
| "learning_rate": 3.576932637335322e-05, |
| "loss": 4.1587, |
| "step": 1645 |
| }, |
| { |
| "epoch": 0.36480212248507626, |
| "grad_norm": 3.476148843765259, |
| "learning_rate": 3.5707183693760875e-05, |
| "loss": 3.9972, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.3659075834623038, |
| "grad_norm": 3.3700621128082275, |
| "learning_rate": 3.564504101416854e-05, |
| "loss": 3.8875, |
| "step": 1655 |
| }, |
| { |
| "epoch": 0.3670130444395313, |
| "grad_norm": 3.48191237449646, |
| "learning_rate": 3.558289833457619e-05, |
| "loss": 4.0436, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.3681185054167588, |
| "grad_norm": 2.992255926132202, |
| "learning_rate": 3.552075565498384e-05, |
| "loss": 4.143, |
| "step": 1665 |
| }, |
| { |
| "epoch": 0.3692239663939863, |
| "grad_norm": 3.511962413787842, |
| "learning_rate": 3.5458612975391506e-05, |
| "loss": 4.0267, |
| "step": 1670 |
| }, |
| { |
| "epoch": 0.37032942737121377, |
| "grad_norm": 3.1641499996185303, |
| "learning_rate": 3.5396470295799155e-05, |
| "loss": 3.9213, |
| "step": 1675 |
| }, |
| { |
| "epoch": 0.3714348883484413, |
| "grad_norm": 3.7594759464263916, |
| "learning_rate": 3.533432761620681e-05, |
| "loss": 4.1522, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.3725403493256688, |
| "grad_norm": 3.7265207767486572, |
| "learning_rate": 3.527218493661447e-05, |
| "loss": 3.9366, |
| "step": 1685 |
| }, |
| { |
| "epoch": 0.3736458103028963, |
| "grad_norm": 3.301990270614624, |
| "learning_rate": 3.5210042257022124e-05, |
| "loss": 3.9142, |
| "step": 1690 |
| }, |
| { |
| "epoch": 0.3747512712801238, |
| "grad_norm": 3.2270445823669434, |
| "learning_rate": 3.514789957742978e-05, |
| "loss": 4.0301, |
| "step": 1695 |
| }, |
| { |
| "epoch": 0.37585673225735133, |
| "grad_norm": 3.4519598484039307, |
| "learning_rate": 3.5085756897837436e-05, |
| "loss": 3.9566, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.3769621932345788, |
| "grad_norm": 3.3497774600982666, |
| "learning_rate": 3.502361421824509e-05, |
| "loss": 3.9327, |
| "step": 1705 |
| }, |
| { |
| "epoch": 0.3780676542118063, |
| "grad_norm": 3.5343832969665527, |
| "learning_rate": 3.496147153865275e-05, |
| "loss": 4.0446, |
| "step": 1710 |
| }, |
| { |
| "epoch": 0.37917311518903385, |
| "grad_norm": 3.369101047515869, |
| "learning_rate": 3.4899328859060405e-05, |
| "loss": 3.9529, |
| "step": 1715 |
| }, |
| { |
| "epoch": 0.3802785761662613, |
| "grad_norm": 3.0477051734924316, |
| "learning_rate": 3.483718617946806e-05, |
| "loss": 3.8606, |
| "step": 1720 |
| }, |
| { |
| "epoch": 0.38138403714348884, |
| "grad_norm": 3.516953468322754, |
| "learning_rate": 3.477504349987572e-05, |
| "loss": 3.9936, |
| "step": 1725 |
| }, |
| { |
| "epoch": 0.38248949812071636, |
| "grad_norm": 3.628263235092163, |
| "learning_rate": 3.471290082028337e-05, |
| "loss": 3.9455, |
| "step": 1730 |
| }, |
| { |
| "epoch": 0.38359495909794383, |
| "grad_norm": 3.476489305496216, |
| "learning_rate": 3.465075814069103e-05, |
| "loss": 3.972, |
| "step": 1735 |
| }, |
| { |
| "epoch": 0.38470042007517136, |
| "grad_norm": 3.296743154525757, |
| "learning_rate": 3.4588615461098686e-05, |
| "loss": 4.093, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.3858058810523988, |
| "grad_norm": 3.523559331893921, |
| "learning_rate": 3.452647278150634e-05, |
| "loss": 3.9767, |
| "step": 1745 |
| }, |
| { |
| "epoch": 0.38691134202962635, |
| "grad_norm": 3.2359955310821533, |
| "learning_rate": 3.446433010191399e-05, |
| "loss": 3.9597, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.38801680300685387, |
| "grad_norm": 3.318793296813965, |
| "learning_rate": 3.4402187422321654e-05, |
| "loss": 4.1788, |
| "step": 1755 |
| }, |
| { |
| "epoch": 0.38912226398408134, |
| "grad_norm": 3.055785655975342, |
| "learning_rate": 3.434004474272931e-05, |
| "loss": 3.873, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.39022772496130886, |
| "grad_norm": 3.787897825241089, |
| "learning_rate": 3.427790206313696e-05, |
| "loss": 4.0092, |
| "step": 1765 |
| }, |
| { |
| "epoch": 0.3913331859385364, |
| "grad_norm": 3.6127915382385254, |
| "learning_rate": 3.421575938354462e-05, |
| "loss": 3.9295, |
| "step": 1770 |
| }, |
| { |
| "epoch": 0.39243864691576386, |
| "grad_norm": 3.254620313644409, |
| "learning_rate": 3.415361670395227e-05, |
| "loss": 3.965, |
| "step": 1775 |
| }, |
| { |
| "epoch": 0.3935441078929914, |
| "grad_norm": 3.480854034423828, |
| "learning_rate": 3.4091474024359935e-05, |
| "loss": 4.0151, |
| "step": 1780 |
| }, |
| { |
| "epoch": 0.3946495688702189, |
| "grad_norm": 3.200242280960083, |
| "learning_rate": 3.402933134476759e-05, |
| "loss": 4.0929, |
| "step": 1785 |
| }, |
| { |
| "epoch": 0.39575502984744637, |
| "grad_norm": 3.1364223957061768, |
| "learning_rate": 3.396718866517524e-05, |
| "loss": 3.9891, |
| "step": 1790 |
| }, |
| { |
| "epoch": 0.3968604908246739, |
| "grad_norm": 3.4453999996185303, |
| "learning_rate": 3.3905045985582904e-05, |
| "loss": 4.1357, |
| "step": 1795 |
| }, |
| { |
| "epoch": 0.3979659518019014, |
| "grad_norm": 3.265876531600952, |
| "learning_rate": 3.384290330599056e-05, |
| "loss": 3.8728, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.3990714127791289, |
| "grad_norm": 3.2799103260040283, |
| "learning_rate": 3.378076062639821e-05, |
| "loss": 4.1506, |
| "step": 1805 |
| }, |
| { |
| "epoch": 0.4001768737563564, |
| "grad_norm": 3.2966063022613525, |
| "learning_rate": 3.371861794680587e-05, |
| "loss": 3.8413, |
| "step": 1810 |
| }, |
| { |
| "epoch": 0.4012823347335839, |
| "grad_norm": 3.346560478210449, |
| "learning_rate": 3.365647526721352e-05, |
| "loss": 4.0029, |
| "step": 1815 |
| }, |
| { |
| "epoch": 0.4023877957108114, |
| "grad_norm": 3.191598892211914, |
| "learning_rate": 3.359433258762118e-05, |
| "loss": 4.0017, |
| "step": 1820 |
| }, |
| { |
| "epoch": 0.4034932566880389, |
| "grad_norm": 3.689901113510132, |
| "learning_rate": 3.353218990802884e-05, |
| "loss": 4.1346, |
| "step": 1825 |
| }, |
| { |
| "epoch": 0.4045987176652664, |
| "grad_norm": 3.4523544311523438, |
| "learning_rate": 3.347004722843649e-05, |
| "loss": 4.0593, |
| "step": 1830 |
| }, |
| { |
| "epoch": 0.4057041786424939, |
| "grad_norm": 3.3706953525543213, |
| "learning_rate": 3.3407904548844146e-05, |
| "loss": 4.1312, |
| "step": 1835 |
| }, |
| { |
| "epoch": 0.40680963961972144, |
| "grad_norm": 3.5654544830322266, |
| "learning_rate": 3.33457618692518e-05, |
| "loss": 4.0694, |
| "step": 1840 |
| }, |
| { |
| "epoch": 0.4079151005969489, |
| "grad_norm": 3.540480136871338, |
| "learning_rate": 3.328361918965946e-05, |
| "loss": 4.123, |
| "step": 1845 |
| }, |
| { |
| "epoch": 0.40902056157417643, |
| "grad_norm": 3.286994695663452, |
| "learning_rate": 3.3221476510067115e-05, |
| "loss": 4.1365, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.41012602255140396, |
| "grad_norm": 3.0457570552825928, |
| "learning_rate": 3.315933383047477e-05, |
| "loss": 3.9359, |
| "step": 1855 |
| }, |
| { |
| "epoch": 0.4112314835286314, |
| "grad_norm": 3.2751758098602295, |
| "learning_rate": 3.309719115088243e-05, |
| "loss": 4.0003, |
| "step": 1860 |
| }, |
| { |
| "epoch": 0.41233694450585895, |
| "grad_norm": 3.345170259475708, |
| "learning_rate": 3.303504847129008e-05, |
| "loss": 3.87, |
| "step": 1865 |
| }, |
| { |
| "epoch": 0.4134424054830865, |
| "grad_norm": 3.398428440093994, |
| "learning_rate": 3.297290579169774e-05, |
| "loss": 3.9499, |
| "step": 1870 |
| }, |
| { |
| "epoch": 0.41454786646031394, |
| "grad_norm": 3.3243329524993896, |
| "learning_rate": 3.2910763112105396e-05, |
| "loss": 4.0548, |
| "step": 1875 |
| }, |
| { |
| "epoch": 0.41565332743754146, |
| "grad_norm": 3.449658155441284, |
| "learning_rate": 3.284862043251305e-05, |
| "loss": 3.8984, |
| "step": 1880 |
| }, |
| { |
| "epoch": 0.41675878841476893, |
| "grad_norm": 3.741178035736084, |
| "learning_rate": 3.278647775292071e-05, |
| "loss": 4.1575, |
| "step": 1885 |
| }, |
| { |
| "epoch": 0.41786424939199646, |
| "grad_norm": 3.4483730792999268, |
| "learning_rate": 3.2724335073328364e-05, |
| "loss": 4.034, |
| "step": 1890 |
| }, |
| { |
| "epoch": 0.418969710369224, |
| "grad_norm": 3.176455020904541, |
| "learning_rate": 3.266219239373602e-05, |
| "loss": 3.9522, |
| "step": 1895 |
| }, |
| { |
| "epoch": 0.42007517134645145, |
| "grad_norm": 3.323781967163086, |
| "learning_rate": 3.2600049714143676e-05, |
| "loss": 4.04, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.42118063232367897, |
| "grad_norm": 3.125051498413086, |
| "learning_rate": 3.253790703455133e-05, |
| "loss": 3.9916, |
| "step": 1905 |
| }, |
| { |
| "epoch": 0.4222860933009065, |
| "grad_norm": 3.488311767578125, |
| "learning_rate": 3.247576435495899e-05, |
| "loss": 4.1544, |
| "step": 1910 |
| }, |
| { |
| "epoch": 0.42339155427813396, |
| "grad_norm": 3.3193490505218506, |
| "learning_rate": 3.2413621675366645e-05, |
| "loss": 3.8267, |
| "step": 1915 |
| }, |
| { |
| "epoch": 0.4244970152553615, |
| "grad_norm": 3.118138313293457, |
| "learning_rate": 3.23514789957743e-05, |
| "loss": 4.0021, |
| "step": 1920 |
| }, |
| { |
| "epoch": 0.425602476232589, |
| "grad_norm": 3.0843567848205566, |
| "learning_rate": 3.228933631618196e-05, |
| "loss": 4.0595, |
| "step": 1925 |
| }, |
| { |
| "epoch": 0.4267079372098165, |
| "grad_norm": 3.249384880065918, |
| "learning_rate": 3.222719363658961e-05, |
| "loss": 4.0343, |
| "step": 1930 |
| }, |
| { |
| "epoch": 0.427813398187044, |
| "grad_norm": 3.4635889530181885, |
| "learning_rate": 3.216505095699727e-05, |
| "loss": 4.1018, |
| "step": 1935 |
| }, |
| { |
| "epoch": 0.4289188591642715, |
| "grad_norm": 3.705624580383301, |
| "learning_rate": 3.2102908277404926e-05, |
| "loss": 4.0875, |
| "step": 1940 |
| }, |
| { |
| "epoch": 0.430024320141499, |
| "grad_norm": 3.6071228981018066, |
| "learning_rate": 3.2040765597812575e-05, |
| "loss": 4.0507, |
| "step": 1945 |
| }, |
| { |
| "epoch": 0.4311297811187265, |
| "grad_norm": 3.513573169708252, |
| "learning_rate": 3.197862291822024e-05, |
| "loss": 3.9596, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.432235242095954, |
| "grad_norm": 3.4200334548950195, |
| "learning_rate": 3.1916480238627894e-05, |
| "loss": 3.9723, |
| "step": 1955 |
| }, |
| { |
| "epoch": 0.4333407030731815, |
| "grad_norm": 3.472170114517212, |
| "learning_rate": 3.1854337559035544e-05, |
| "loss": 4.0224, |
| "step": 1960 |
| }, |
| { |
| "epoch": 0.43444616405040903, |
| "grad_norm": 3.499969482421875, |
| "learning_rate": 3.179219487944321e-05, |
| "loss": 3.935, |
| "step": 1965 |
| }, |
| { |
| "epoch": 0.4355516250276365, |
| "grad_norm": 3.5393736362457275, |
| "learning_rate": 3.1730052199850856e-05, |
| "loss": 4.1007, |
| "step": 1970 |
| }, |
| { |
| "epoch": 0.436657086004864, |
| "grad_norm": 3.557710647583008, |
| "learning_rate": 3.166790952025851e-05, |
| "loss": 4.1932, |
| "step": 1975 |
| }, |
| { |
| "epoch": 0.43776254698209155, |
| "grad_norm": 3.4602739810943604, |
| "learning_rate": 3.1605766840666175e-05, |
| "loss": 4.0741, |
| "step": 1980 |
| }, |
| { |
| "epoch": 0.438868007959319, |
| "grad_norm": 3.578395366668701, |
| "learning_rate": 3.1543624161073825e-05, |
| "loss": 4.0468, |
| "step": 1985 |
| }, |
| { |
| "epoch": 0.43997346893654654, |
| "grad_norm": 3.3289973735809326, |
| "learning_rate": 3.148148148148148e-05, |
| "loss": 3.9823, |
| "step": 1990 |
| }, |
| { |
| "epoch": 0.44107892991377406, |
| "grad_norm": 3.6602888107299805, |
| "learning_rate": 3.141933880188914e-05, |
| "loss": 4.0993, |
| "step": 1995 |
| }, |
| { |
| "epoch": 0.44218439089100153, |
| "grad_norm": 3.5060999393463135, |
| "learning_rate": 3.135719612229679e-05, |
| "loss": 3.9399, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.44328985186822906, |
| "grad_norm": 3.185040235519409, |
| "learning_rate": 3.1295053442704456e-05, |
| "loss": 4.0126, |
| "step": 2005 |
| }, |
| { |
| "epoch": 0.4443953128454566, |
| "grad_norm": 3.3001205921173096, |
| "learning_rate": 3.1232910763112105e-05, |
| "loss": 4.1115, |
| "step": 2010 |
| }, |
| { |
| "epoch": 0.44550077382268405, |
| "grad_norm": 3.4892706871032715, |
| "learning_rate": 3.117076808351976e-05, |
| "loss": 4.1656, |
| "step": 2015 |
| }, |
| { |
| "epoch": 0.4466062347999116, |
| "grad_norm": 3.1955862045288086, |
| "learning_rate": 3.110862540392742e-05, |
| "loss": 4.0902, |
| "step": 2020 |
| }, |
| { |
| "epoch": 0.44771169577713904, |
| "grad_norm": 3.3935418128967285, |
| "learning_rate": 3.1046482724335074e-05, |
| "loss": 3.9551, |
| "step": 2025 |
| }, |
| { |
| "epoch": 0.44881715675436656, |
| "grad_norm": 3.6117637157440186, |
| "learning_rate": 3.098434004474273e-05, |
| "loss": 4.1856, |
| "step": 2030 |
| }, |
| { |
| "epoch": 0.4499226177315941, |
| "grad_norm": 3.432446002960205, |
| "learning_rate": 3.0922197365150386e-05, |
| "loss": 4.1022, |
| "step": 2035 |
| }, |
| { |
| "epoch": 0.45102807870882156, |
| "grad_norm": 3.3948235511779785, |
| "learning_rate": 3.086005468555804e-05, |
| "loss": 4.0777, |
| "step": 2040 |
| }, |
| { |
| "epoch": 0.4521335396860491, |
| "grad_norm": 3.169699192047119, |
| "learning_rate": 3.07979120059657e-05, |
| "loss": 4.0572, |
| "step": 2045 |
| }, |
| { |
| "epoch": 0.4532390006632766, |
| "grad_norm": 3.3817138671875, |
| "learning_rate": 3.0735769326373355e-05, |
| "loss": 4.0533, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.45434446164050407, |
| "grad_norm": 3.4111692905426025, |
| "learning_rate": 3.067362664678101e-05, |
| "loss": 4.0991, |
| "step": 2055 |
| }, |
| { |
| "epoch": 0.4554499226177316, |
| "grad_norm": 3.7082407474517822, |
| "learning_rate": 3.061148396718867e-05, |
| "loss": 4.0745, |
| "step": 2060 |
| }, |
| { |
| "epoch": 0.4565553835949591, |
| "grad_norm": 3.393707036972046, |
| "learning_rate": 3.054934128759632e-05, |
| "loss": 3.9929, |
| "step": 2065 |
| }, |
| { |
| "epoch": 0.4576608445721866, |
| "grad_norm": 3.750239133834839, |
| "learning_rate": 3.048719860800398e-05, |
| "loss": 4.1835, |
| "step": 2070 |
| }, |
| { |
| "epoch": 0.4587663055494141, |
| "grad_norm": 3.366420030593872, |
| "learning_rate": 3.0425055928411632e-05, |
| "loss": 4.0436, |
| "step": 2075 |
| }, |
| { |
| "epoch": 0.45987176652664163, |
| "grad_norm": 3.3570804595947266, |
| "learning_rate": 3.0362913248819292e-05, |
| "loss": 3.9977, |
| "step": 2080 |
| }, |
| { |
| "epoch": 0.4609772275038691, |
| "grad_norm": 3.541613817214966, |
| "learning_rate": 3.0300770569226945e-05, |
| "loss": 4.0789, |
| "step": 2085 |
| }, |
| { |
| "epoch": 0.4620826884810966, |
| "grad_norm": 3.697382926940918, |
| "learning_rate": 3.02386278896346e-05, |
| "loss": 4.1316, |
| "step": 2090 |
| }, |
| { |
| "epoch": 0.4631881494583241, |
| "grad_norm": 3.375995397567749, |
| "learning_rate": 3.017648521004226e-05, |
| "loss": 3.935, |
| "step": 2095 |
| }, |
| { |
| "epoch": 0.4642936104355516, |
| "grad_norm": 3.3144774436950684, |
| "learning_rate": 3.0114342530449913e-05, |
| "loss": 4.1222, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.46539907141277914, |
| "grad_norm": 3.600338935852051, |
| "learning_rate": 3.005219985085757e-05, |
| "loss": 4.1123, |
| "step": 2105 |
| }, |
| { |
| "epoch": 0.4665045323900066, |
| "grad_norm": 3.3715898990631104, |
| "learning_rate": 2.999005717126523e-05, |
| "loss": 4.1952, |
| "step": 2110 |
| }, |
| { |
| "epoch": 0.46760999336723413, |
| "grad_norm": 3.2076468467712402, |
| "learning_rate": 2.992791449167288e-05, |
| "loss": 3.9456, |
| "step": 2115 |
| }, |
| { |
| "epoch": 0.46871545434446166, |
| "grad_norm": 3.7750439643859863, |
| "learning_rate": 2.986577181208054e-05, |
| "loss": 4.0785, |
| "step": 2120 |
| }, |
| { |
| "epoch": 0.4698209153216891, |
| "grad_norm": 3.3552026748657227, |
| "learning_rate": 2.980362913248819e-05, |
| "loss": 4.3222, |
| "step": 2125 |
| }, |
| { |
| "epoch": 0.47092637629891665, |
| "grad_norm": 3.4313700199127197, |
| "learning_rate": 2.974148645289585e-05, |
| "loss": 3.9145, |
| "step": 2130 |
| }, |
| { |
| "epoch": 0.4720318372761442, |
| "grad_norm": 3.4928014278411865, |
| "learning_rate": 2.967934377330351e-05, |
| "loss": 3.8454, |
| "step": 2135 |
| }, |
| { |
| "epoch": 0.47313729825337164, |
| "grad_norm": 3.6989784240722656, |
| "learning_rate": 2.9617201093711163e-05, |
| "loss": 3.9581, |
| "step": 2140 |
| }, |
| { |
| "epoch": 0.47424275923059916, |
| "grad_norm": 3.152308702468872, |
| "learning_rate": 2.955505841411882e-05, |
| "loss": 3.9159, |
| "step": 2145 |
| }, |
| { |
| "epoch": 0.4753482202078267, |
| "grad_norm": 3.2610297203063965, |
| "learning_rate": 2.949291573452647e-05, |
| "loss": 4.1646, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.47645368118505416, |
| "grad_norm": 3.4919862747192383, |
| "learning_rate": 2.943077305493413e-05, |
| "loss": 3.9627, |
| "step": 2155 |
| }, |
| { |
| "epoch": 0.4775591421622817, |
| "grad_norm": 3.323495388031006, |
| "learning_rate": 2.9368630375341787e-05, |
| "loss": 3.9826, |
| "step": 2160 |
| }, |
| { |
| "epoch": 0.47866460313950915, |
| "grad_norm": 3.4803435802459717, |
| "learning_rate": 2.930648769574944e-05, |
| "loss": 4.1256, |
| "step": 2165 |
| }, |
| { |
| "epoch": 0.47977006411673667, |
| "grad_norm": 3.3792881965637207, |
| "learning_rate": 2.92443450161571e-05, |
| "loss": 3.9697, |
| "step": 2170 |
| }, |
| { |
| "epoch": 0.4808755250939642, |
| "grad_norm": 3.5845255851745605, |
| "learning_rate": 2.9182202336564756e-05, |
| "loss": 4.1054, |
| "step": 2175 |
| }, |
| { |
| "epoch": 0.48198098607119166, |
| "grad_norm": 3.275973081588745, |
| "learning_rate": 2.912005965697241e-05, |
| "loss": 4.1417, |
| "step": 2180 |
| }, |
| { |
| "epoch": 0.4830864470484192, |
| "grad_norm": 3.3241536617279053, |
| "learning_rate": 2.9057916977380068e-05, |
| "loss": 4.0629, |
| "step": 2185 |
| }, |
| { |
| "epoch": 0.4841919080256467, |
| "grad_norm": 3.298708200454712, |
| "learning_rate": 2.899577429778772e-05, |
| "loss": 3.9206, |
| "step": 2190 |
| }, |
| { |
| "epoch": 0.4852973690028742, |
| "grad_norm": 3.18892502784729, |
| "learning_rate": 2.8933631618195377e-05, |
| "loss": 4.0769, |
| "step": 2195 |
| }, |
| { |
| "epoch": 0.4864028299801017, |
| "grad_norm": 3.206279993057251, |
| "learning_rate": 2.8871488938603037e-05, |
| "loss": 3.937, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.4875082909573292, |
| "grad_norm": 3.4408323764801025, |
| "learning_rate": 2.880934625901069e-05, |
| "loss": 4.0496, |
| "step": 2205 |
| }, |
| { |
| "epoch": 0.4886137519345567, |
| "grad_norm": 3.258359670639038, |
| "learning_rate": 2.8747203579418346e-05, |
| "loss": 4.038, |
| "step": 2210 |
| }, |
| { |
| "epoch": 0.4897192129117842, |
| "grad_norm": 3.336268424987793, |
| "learning_rate": 2.8685060899826e-05, |
| "loss": 4.0437, |
| "step": 2215 |
| }, |
| { |
| "epoch": 0.49082467388901174, |
| "grad_norm": 3.27437686920166, |
| "learning_rate": 2.8622918220233658e-05, |
| "loss": 4.1238, |
| "step": 2220 |
| }, |
| { |
| "epoch": 0.4919301348662392, |
| "grad_norm": 3.076141595840454, |
| "learning_rate": 2.8560775540641317e-05, |
| "loss": 3.987, |
| "step": 2225 |
| }, |
| { |
| "epoch": 0.49303559584346673, |
| "grad_norm": 3.2528483867645264, |
| "learning_rate": 2.8498632861048967e-05, |
| "loss": 3.9728, |
| "step": 2230 |
| }, |
| { |
| "epoch": 0.4941410568206942, |
| "grad_norm": 3.397096872329712, |
| "learning_rate": 2.8436490181456626e-05, |
| "loss": 4.1707, |
| "step": 2235 |
| }, |
| { |
| "epoch": 0.4952465177979217, |
| "grad_norm": 3.2209689617156982, |
| "learning_rate": 2.837434750186428e-05, |
| "loss": 4.0548, |
| "step": 2240 |
| }, |
| { |
| "epoch": 0.49635197877514925, |
| "grad_norm": 3.292736530303955, |
| "learning_rate": 2.831220482227194e-05, |
| "loss": 4.0244, |
| "step": 2245 |
| }, |
| { |
| "epoch": 0.4974574397523767, |
| "grad_norm": 3.461022138595581, |
| "learning_rate": 2.8250062142679595e-05, |
| "loss": 4.0763, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.49856290072960424, |
| "grad_norm": 3.4967451095581055, |
| "learning_rate": 2.8187919463087248e-05, |
| "loss": 4.1721, |
| "step": 2255 |
| }, |
| { |
| "epoch": 0.49966836170683177, |
| "grad_norm": 3.2440531253814697, |
| "learning_rate": 2.8125776783494907e-05, |
| "loss": 4.0546, |
| "step": 2260 |
| }, |
| { |
| "epoch": 0.5007738226840592, |
| "grad_norm": 3.318380355834961, |
| "learning_rate": 2.8063634103902563e-05, |
| "loss": 4.0751, |
| "step": 2265 |
| }, |
| { |
| "epoch": 0.5018792836612868, |
| "grad_norm": 3.1638567447662354, |
| "learning_rate": 2.8001491424310216e-05, |
| "loss": 3.9274, |
| "step": 2270 |
| }, |
| { |
| "epoch": 0.5029847446385143, |
| "grad_norm": 3.345717430114746, |
| "learning_rate": 2.7939348744717876e-05, |
| "loss": 4.1606, |
| "step": 2275 |
| }, |
| { |
| "epoch": 0.5040902056157418, |
| "grad_norm": 3.5760574340820312, |
| "learning_rate": 2.787720606512553e-05, |
| "loss": 3.9832, |
| "step": 2280 |
| }, |
| { |
| "epoch": 0.5051956665929692, |
| "grad_norm": 3.3899612426757812, |
| "learning_rate": 2.7815063385533185e-05, |
| "loss": 4.0456, |
| "step": 2285 |
| }, |
| { |
| "epoch": 0.5063011275701967, |
| "grad_norm": 3.3774311542510986, |
| "learning_rate": 2.7752920705940844e-05, |
| "loss": 4.0956, |
| "step": 2290 |
| }, |
| { |
| "epoch": 0.5074065885474243, |
| "grad_norm": 3.1358556747436523, |
| "learning_rate": 2.7690778026348497e-05, |
| "loss": 4.1611, |
| "step": 2295 |
| }, |
| { |
| "epoch": 0.5085120495246518, |
| "grad_norm": 3.3426547050476074, |
| "learning_rate": 2.7628635346756153e-05, |
| "loss": 4.1298, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.5096175105018793, |
| "grad_norm": 3.252143383026123, |
| "learning_rate": 2.7566492667163806e-05, |
| "loss": 4.0572, |
| "step": 2305 |
| }, |
| { |
| "epoch": 0.5107229714791068, |
| "grad_norm": 3.4557764530181885, |
| "learning_rate": 2.7504349987571466e-05, |
| "loss": 4.0824, |
| "step": 2310 |
| }, |
| { |
| "epoch": 0.5118284324563342, |
| "grad_norm": 3.2078895568847656, |
| "learning_rate": 2.7442207307979122e-05, |
| "loss": 3.9483, |
| "step": 2315 |
| }, |
| { |
| "epoch": 0.5129338934335618, |
| "grad_norm": 3.4674055576324463, |
| "learning_rate": 2.7380064628386775e-05, |
| "loss": 4.0843, |
| "step": 2320 |
| }, |
| { |
| "epoch": 0.5140393544107893, |
| "grad_norm": 3.7841782569885254, |
| "learning_rate": 2.7317921948794434e-05, |
| "loss": 4.2304, |
| "step": 2325 |
| }, |
| { |
| "epoch": 0.5151448153880168, |
| "grad_norm": 3.267167091369629, |
| "learning_rate": 2.725577926920209e-05, |
| "loss": 4.0463, |
| "step": 2330 |
| }, |
| { |
| "epoch": 0.5162502763652443, |
| "grad_norm": 3.782557725906372, |
| "learning_rate": 2.7193636589609743e-05, |
| "loss": 4.0149, |
| "step": 2335 |
| }, |
| { |
| "epoch": 0.5173557373424718, |
| "grad_norm": 3.4802868366241455, |
| "learning_rate": 2.7131493910017403e-05, |
| "loss": 3.9961, |
| "step": 2340 |
| }, |
| { |
| "epoch": 0.5184611983196993, |
| "grad_norm": 3.346196413040161, |
| "learning_rate": 2.7069351230425055e-05, |
| "loss": 3.9326, |
| "step": 2345 |
| }, |
| { |
| "epoch": 0.5195666592969268, |
| "grad_norm": 3.166124105453491, |
| "learning_rate": 2.7007208550832715e-05, |
| "loss": 3.9516, |
| "step": 2350 |
| }, |
| { |
| "epoch": 0.5206721202741543, |
| "grad_norm": 3.288295269012451, |
| "learning_rate": 2.694506587124037e-05, |
| "loss": 4.1143, |
| "step": 2355 |
| }, |
| { |
| "epoch": 0.5217775812513819, |
| "grad_norm": 3.3296289443969727, |
| "learning_rate": 2.6882923191648024e-05, |
| "loss": 3.8863, |
| "step": 2360 |
| }, |
| { |
| "epoch": 0.5228830422286094, |
| "grad_norm": 3.1221563816070557, |
| "learning_rate": 2.6820780512055683e-05, |
| "loss": 3.9889, |
| "step": 2365 |
| }, |
| { |
| "epoch": 0.5239885032058368, |
| "grad_norm": 3.225713014602661, |
| "learning_rate": 2.6758637832463336e-05, |
| "loss": 3.947, |
| "step": 2370 |
| }, |
| { |
| "epoch": 0.5250939641830643, |
| "grad_norm": 3.5291709899902344, |
| "learning_rate": 2.6696495152870992e-05, |
| "loss": 4.1917, |
| "step": 2375 |
| }, |
| { |
| "epoch": 0.5261994251602918, |
| "grad_norm": 3.4283344745635986, |
| "learning_rate": 2.6634352473278652e-05, |
| "loss": 4.0173, |
| "step": 2380 |
| }, |
| { |
| "epoch": 0.5273048861375194, |
| "grad_norm": 3.4083287715911865, |
| "learning_rate": 2.6572209793686305e-05, |
| "loss": 4.1016, |
| "step": 2385 |
| }, |
| { |
| "epoch": 0.5284103471147469, |
| "grad_norm": 3.3082547187805176, |
| "learning_rate": 2.651006711409396e-05, |
| "loss": 4.1025, |
| "step": 2390 |
| }, |
| { |
| "epoch": 0.5295158080919744, |
| "grad_norm": 3.645259141921997, |
| "learning_rate": 2.644792443450162e-05, |
| "loss": 3.9657, |
| "step": 2395 |
| }, |
| { |
| "epoch": 0.5306212690692018, |
| "grad_norm": 3.1570723056793213, |
| "learning_rate": 2.6385781754909273e-05, |
| "loss": 4.0965, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.5317267300464293, |
| "grad_norm": 3.387300491333008, |
| "learning_rate": 2.632363907531693e-05, |
| "loss": 4.0099, |
| "step": 2405 |
| }, |
| { |
| "epoch": 0.5328321910236569, |
| "grad_norm": 3.4514920711517334, |
| "learning_rate": 2.6261496395724582e-05, |
| "loss": 3.9037, |
| "step": 2410 |
| }, |
| { |
| "epoch": 0.5339376520008844, |
| "grad_norm": 3.7543208599090576, |
| "learning_rate": 2.6199353716132242e-05, |
| "loss": 4.0804, |
| "step": 2415 |
| }, |
| { |
| "epoch": 0.5350431129781119, |
| "grad_norm": 3.4875600337982178, |
| "learning_rate": 2.6137211036539898e-05, |
| "loss": 4.0105, |
| "step": 2420 |
| }, |
| { |
| "epoch": 0.5361485739553393, |
| "grad_norm": 3.4124867916107178, |
| "learning_rate": 2.607506835694755e-05, |
| "loss": 4.1436, |
| "step": 2425 |
| }, |
| { |
| "epoch": 0.5372540349325668, |
| "grad_norm": 3.392489194869995, |
| "learning_rate": 2.601292567735521e-05, |
| "loss": 4.087, |
| "step": 2430 |
| }, |
| { |
| "epoch": 0.5383594959097944, |
| "grad_norm": 3.3754377365112305, |
| "learning_rate": 2.5950782997762863e-05, |
| "loss": 4.0433, |
| "step": 2435 |
| }, |
| { |
| "epoch": 0.5394649568870219, |
| "grad_norm": 3.23037052154541, |
| "learning_rate": 2.588864031817052e-05, |
| "loss": 3.9529, |
| "step": 2440 |
| }, |
| { |
| "epoch": 0.5405704178642494, |
| "grad_norm": 3.4852147102355957, |
| "learning_rate": 2.582649763857818e-05, |
| "loss": 4.0165, |
| "step": 2445 |
| }, |
| { |
| "epoch": 0.5416758788414769, |
| "grad_norm": 3.5113587379455566, |
| "learning_rate": 2.576435495898583e-05, |
| "loss": 4.1145, |
| "step": 2450 |
| }, |
| { |
| "epoch": 0.5427813398187044, |
| "grad_norm": 3.569577693939209, |
| "learning_rate": 2.5702212279393488e-05, |
| "loss": 4.2112, |
| "step": 2455 |
| }, |
| { |
| "epoch": 0.5438868007959319, |
| "grad_norm": 3.2119925022125244, |
| "learning_rate": 2.564006959980114e-05, |
| "loss": 3.9315, |
| "step": 2460 |
| }, |
| { |
| "epoch": 0.5449922617731594, |
| "grad_norm": 3.502654790878296, |
| "learning_rate": 2.55779269202088e-05, |
| "loss": 4.0101, |
| "step": 2465 |
| }, |
| { |
| "epoch": 0.5460977227503869, |
| "grad_norm": 3.343017101287842, |
| "learning_rate": 2.551578424061646e-05, |
| "loss": 4.0618, |
| "step": 2470 |
| }, |
| { |
| "epoch": 0.5472031837276145, |
| "grad_norm": 3.0435657501220703, |
| "learning_rate": 2.545364156102411e-05, |
| "loss": 4.2169, |
| "step": 2475 |
| }, |
| { |
| "epoch": 0.548308644704842, |
| "grad_norm": 3.167151927947998, |
| "learning_rate": 2.539149888143177e-05, |
| "loss": 4.043, |
| "step": 2480 |
| }, |
| { |
| "epoch": 0.5494141056820694, |
| "grad_norm": 3.2351808547973633, |
| "learning_rate": 2.5329356201839428e-05, |
| "loss": 4.0381, |
| "step": 2485 |
| }, |
| { |
| "epoch": 0.5505195666592969, |
| "grad_norm": 3.1816964149475098, |
| "learning_rate": 2.526721352224708e-05, |
| "loss": 4.1283, |
| "step": 2490 |
| }, |
| { |
| "epoch": 0.5516250276365244, |
| "grad_norm": 3.2556283473968506, |
| "learning_rate": 2.5205070842654737e-05, |
| "loss": 4.1709, |
| "step": 2495 |
| }, |
| { |
| "epoch": 0.552730488613752, |
| "grad_norm": 3.2887418270111084, |
| "learning_rate": 2.514292816306239e-05, |
| "loss": 4.1116, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.5538359495909795, |
| "grad_norm": 3.559380531311035, |
| "learning_rate": 2.508078548347005e-05, |
| "loss": 4.0527, |
| "step": 2505 |
| }, |
| { |
| "epoch": 0.5549414105682069, |
| "grad_norm": 3.470162868499756, |
| "learning_rate": 2.5018642803877706e-05, |
| "loss": 4.0154, |
| "step": 2510 |
| }, |
| { |
| "epoch": 0.5560468715454344, |
| "grad_norm": 3.294788122177124, |
| "learning_rate": 2.495650012428536e-05, |
| "loss": 4.0073, |
| "step": 2515 |
| }, |
| { |
| "epoch": 0.5571523325226619, |
| "grad_norm": 3.3408074378967285, |
| "learning_rate": 2.4894357444693018e-05, |
| "loss": 4.1111, |
| "step": 2520 |
| }, |
| { |
| "epoch": 0.5582577934998895, |
| "grad_norm": 3.436032295227051, |
| "learning_rate": 2.4832214765100674e-05, |
| "loss": 4.0138, |
| "step": 2525 |
| }, |
| { |
| "epoch": 0.559363254477117, |
| "grad_norm": 3.383261203765869, |
| "learning_rate": 2.4770072085508327e-05, |
| "loss": 4.1234, |
| "step": 2530 |
| }, |
| { |
| "epoch": 0.5604687154543445, |
| "grad_norm": 3.479888916015625, |
| "learning_rate": 2.4707929405915983e-05, |
| "loss": 4.0519, |
| "step": 2535 |
| }, |
| { |
| "epoch": 0.5615741764315719, |
| "grad_norm": 3.390536069869995, |
| "learning_rate": 2.4645786726323643e-05, |
| "loss": 4.1424, |
| "step": 2540 |
| }, |
| { |
| "epoch": 0.5626796374087994, |
| "grad_norm": 3.320270538330078, |
| "learning_rate": 2.4583644046731296e-05, |
| "loss": 4.0054, |
| "step": 2545 |
| }, |
| { |
| "epoch": 0.563785098386027, |
| "grad_norm": 3.477365016937256, |
| "learning_rate": 2.452150136713895e-05, |
| "loss": 4.0191, |
| "step": 2550 |
| }, |
| { |
| "epoch": 0.5648905593632545, |
| "grad_norm": 3.547175884246826, |
| "learning_rate": 2.4459358687546608e-05, |
| "loss": 4.0718, |
| "step": 2555 |
| }, |
| { |
| "epoch": 0.565996020340482, |
| "grad_norm": 3.567544937133789, |
| "learning_rate": 2.4397216007954264e-05, |
| "loss": 4.1387, |
| "step": 2560 |
| }, |
| { |
| "epoch": 0.5671014813177094, |
| "grad_norm": 3.351850748062134, |
| "learning_rate": 2.433507332836192e-05, |
| "loss": 4.0413, |
| "step": 2565 |
| }, |
| { |
| "epoch": 0.568206942294937, |
| "grad_norm": 3.4294025897979736, |
| "learning_rate": 2.4272930648769576e-05, |
| "loss": 4.0404, |
| "step": 2570 |
| }, |
| { |
| "epoch": 0.5693124032721645, |
| "grad_norm": 3.4079086780548096, |
| "learning_rate": 2.4210787969177233e-05, |
| "loss": 4.1669, |
| "step": 2575 |
| }, |
| { |
| "epoch": 0.570417864249392, |
| "grad_norm": 3.6439168453216553, |
| "learning_rate": 2.4148645289584885e-05, |
| "loss": 4.109, |
| "step": 2580 |
| }, |
| { |
| "epoch": 0.5715233252266195, |
| "grad_norm": 3.3144097328186035, |
| "learning_rate": 2.4086502609992545e-05, |
| "loss": 3.9591, |
| "step": 2585 |
| }, |
| { |
| "epoch": 0.572628786203847, |
| "grad_norm": 3.3762526512145996, |
| "learning_rate": 2.40243599304002e-05, |
| "loss": 4.1867, |
| "step": 2590 |
| }, |
| { |
| "epoch": 0.5737342471810745, |
| "grad_norm": 3.2939674854278564, |
| "learning_rate": 2.3962217250807857e-05, |
| "loss": 4.1226, |
| "step": 2595 |
| }, |
| { |
| "epoch": 0.574839708158302, |
| "grad_norm": 3.094438314437866, |
| "learning_rate": 2.390007457121551e-05, |
| "loss": 3.9615, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.5759451691355295, |
| "grad_norm": 3.3845763206481934, |
| "learning_rate": 2.383793189162317e-05, |
| "loss": 3.9805, |
| "step": 2605 |
| }, |
| { |
| "epoch": 0.577050630112757, |
| "grad_norm": 3.696262836456299, |
| "learning_rate": 2.3775789212030826e-05, |
| "loss": 3.8625, |
| "step": 2610 |
| }, |
| { |
| "epoch": 0.5781560910899846, |
| "grad_norm": 3.3800036907196045, |
| "learning_rate": 2.371364653243848e-05, |
| "loss": 4.1462, |
| "step": 2615 |
| }, |
| { |
| "epoch": 0.5792615520672121, |
| "grad_norm": 3.573200225830078, |
| "learning_rate": 2.3651503852846135e-05, |
| "loss": 4.1071, |
| "step": 2620 |
| }, |
| { |
| "epoch": 0.5803670130444395, |
| "grad_norm": 3.651068925857544, |
| "learning_rate": 2.358936117325379e-05, |
| "loss": 4.0191, |
| "step": 2625 |
| }, |
| { |
| "epoch": 0.581472474021667, |
| "grad_norm": 3.1807289123535156, |
| "learning_rate": 2.3527218493661447e-05, |
| "loss": 4.1579, |
| "step": 2630 |
| }, |
| { |
| "epoch": 0.5825779349988945, |
| "grad_norm": 3.5472700595855713, |
| "learning_rate": 2.3465075814069103e-05, |
| "loss": 4.0699, |
| "step": 2635 |
| }, |
| { |
| "epoch": 0.5836833959761221, |
| "grad_norm": 3.3236019611358643, |
| "learning_rate": 2.340293313447676e-05, |
| "loss": 3.9927, |
| "step": 2640 |
| }, |
| { |
| "epoch": 0.5847888569533496, |
| "grad_norm": 3.5756359100341797, |
| "learning_rate": 2.3340790454884416e-05, |
| "loss": 4.2018, |
| "step": 2645 |
| }, |
| { |
| "epoch": 0.585894317930577, |
| "grad_norm": 3.5606160163879395, |
| "learning_rate": 2.3278647775292072e-05, |
| "loss": 4.0626, |
| "step": 2650 |
| }, |
| { |
| "epoch": 0.5869997789078045, |
| "grad_norm": 3.5119574069976807, |
| "learning_rate": 2.3216505095699728e-05, |
| "loss": 4.0997, |
| "step": 2655 |
| }, |
| { |
| "epoch": 0.588105239885032, |
| "grad_norm": 3.373201847076416, |
| "learning_rate": 2.3154362416107384e-05, |
| "loss": 3.9609, |
| "step": 2660 |
| }, |
| { |
| "epoch": 0.5892107008622596, |
| "grad_norm": 3.168120861053467, |
| "learning_rate": 2.309221973651504e-05, |
| "loss": 3.8898, |
| "step": 2665 |
| }, |
| { |
| "epoch": 0.5903161618394871, |
| "grad_norm": 3.260366678237915, |
| "learning_rate": 2.3030077056922693e-05, |
| "loss": 4.0445, |
| "step": 2670 |
| }, |
| { |
| "epoch": 0.5914216228167146, |
| "grad_norm": 3.53143572807312, |
| "learning_rate": 2.2967934377330353e-05, |
| "loss": 4.033, |
| "step": 2675 |
| }, |
| { |
| "epoch": 0.592527083793942, |
| "grad_norm": 3.4146888256073, |
| "learning_rate": 2.290579169773801e-05, |
| "loss": 3.9579, |
| "step": 2680 |
| }, |
| { |
| "epoch": 0.5936325447711696, |
| "grad_norm": 3.554407835006714, |
| "learning_rate": 2.284364901814566e-05, |
| "loss": 4.0876, |
| "step": 2685 |
| }, |
| { |
| "epoch": 0.5947380057483971, |
| "grad_norm": 3.302635431289673, |
| "learning_rate": 2.2781506338553318e-05, |
| "loss": 4.0015, |
| "step": 2690 |
| }, |
| { |
| "epoch": 0.5958434667256246, |
| "grad_norm": 2.994694948196411, |
| "learning_rate": 2.2719363658960977e-05, |
| "loss": 4.1925, |
| "step": 2695 |
| }, |
| { |
| "epoch": 0.5969489277028521, |
| "grad_norm": 3.191727876663208, |
| "learning_rate": 2.2657220979368633e-05, |
| "loss": 4.0834, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.5980543886800795, |
| "grad_norm": 3.187432050704956, |
| "learning_rate": 2.2595078299776286e-05, |
| "loss": 4.1476, |
| "step": 2705 |
| }, |
| { |
| "epoch": 0.5991598496573071, |
| "grad_norm": 3.8028817176818848, |
| "learning_rate": 2.2532935620183942e-05, |
| "loss": 4.0108, |
| "step": 2710 |
| }, |
| { |
| "epoch": 0.6002653106345346, |
| "grad_norm": 3.493286609649658, |
| "learning_rate": 2.2470792940591602e-05, |
| "loss": 4.1705, |
| "step": 2715 |
| }, |
| { |
| "epoch": 0.6013707716117621, |
| "grad_norm": 3.4640684127807617, |
| "learning_rate": 2.2408650260999255e-05, |
| "loss": 4.1311, |
| "step": 2720 |
| }, |
| { |
| "epoch": 0.6024762325889896, |
| "grad_norm": 3.8911242485046387, |
| "learning_rate": 2.234650758140691e-05, |
| "loss": 4.1535, |
| "step": 2725 |
| }, |
| { |
| "epoch": 0.6035816935662172, |
| "grad_norm": 3.4392147064208984, |
| "learning_rate": 2.2284364901814567e-05, |
| "loss": 4.1343, |
| "step": 2730 |
| }, |
| { |
| "epoch": 0.6046871545434446, |
| "grad_norm": 3.2995851039886475, |
| "learning_rate": 2.2222222222222223e-05, |
| "loss": 4.0273, |
| "step": 2735 |
| }, |
| { |
| "epoch": 0.6057926155206721, |
| "grad_norm": 3.1584272384643555, |
| "learning_rate": 2.216007954262988e-05, |
| "loss": 4.2191, |
| "step": 2740 |
| }, |
| { |
| "epoch": 0.6068980764978996, |
| "grad_norm": 3.7929775714874268, |
| "learning_rate": 2.2097936863037536e-05, |
| "loss": 3.9746, |
| "step": 2745 |
| }, |
| { |
| "epoch": 0.6080035374751271, |
| "grad_norm": 3.4396305084228516, |
| "learning_rate": 2.2035794183445192e-05, |
| "loss": 4.2164, |
| "step": 2750 |
| }, |
| { |
| "epoch": 0.6091089984523547, |
| "grad_norm": 3.2499279975891113, |
| "learning_rate": 2.1973651503852845e-05, |
| "loss": 3.9657, |
| "step": 2755 |
| }, |
| { |
| "epoch": 0.6102144594295822, |
| "grad_norm": 3.682943105697632, |
| "learning_rate": 2.1911508824260504e-05, |
| "loss": 4.0552, |
| "step": 2760 |
| }, |
| { |
| "epoch": 0.6113199204068096, |
| "grad_norm": 3.217568874359131, |
| "learning_rate": 2.184936614466816e-05, |
| "loss": 4.1355, |
| "step": 2765 |
| }, |
| { |
| "epoch": 0.6124253813840371, |
| "grad_norm": 3.696176528930664, |
| "learning_rate": 2.1787223465075816e-05, |
| "loss": 4.1971, |
| "step": 2770 |
| }, |
| { |
| "epoch": 0.6135308423612647, |
| "grad_norm": 3.366211175918579, |
| "learning_rate": 2.172508078548347e-05, |
| "loss": 4.1779, |
| "step": 2775 |
| }, |
| { |
| "epoch": 0.6146363033384922, |
| "grad_norm": 3.3090131282806396, |
| "learning_rate": 2.1662938105891125e-05, |
| "loss": 4.0138, |
| "step": 2780 |
| }, |
| { |
| "epoch": 0.6157417643157197, |
| "grad_norm": 3.492255210876465, |
| "learning_rate": 2.1600795426298785e-05, |
| "loss": 4.113, |
| "step": 2785 |
| }, |
| { |
| "epoch": 0.6168472252929471, |
| "grad_norm": 3.2298202514648438, |
| "learning_rate": 2.1538652746706438e-05, |
| "loss": 4.0822, |
| "step": 2790 |
| }, |
| { |
| "epoch": 0.6179526862701746, |
| "grad_norm": 3.3362765312194824, |
| "learning_rate": 2.1476510067114094e-05, |
| "loss": 4.1301, |
| "step": 2795 |
| }, |
| { |
| "epoch": 0.6190581472474022, |
| "grad_norm": 3.1772379875183105, |
| "learning_rate": 2.141436738752175e-05, |
| "loss": 4.0127, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.6201636082246297, |
| "grad_norm": 3.5195131301879883, |
| "learning_rate": 2.1352224707929406e-05, |
| "loss": 4.0411, |
| "step": 2805 |
| }, |
| { |
| "epoch": 0.6212690692018572, |
| "grad_norm": 3.1108715534210205, |
| "learning_rate": 2.1290082028337062e-05, |
| "loss": 3.9511, |
| "step": 2810 |
| }, |
| { |
| "epoch": 0.6223745301790847, |
| "grad_norm": 3.278776168823242, |
| "learning_rate": 2.122793934874472e-05, |
| "loss": 4.086, |
| "step": 2815 |
| }, |
| { |
| "epoch": 0.6234799911563121, |
| "grad_norm": 3.3844807147979736, |
| "learning_rate": 2.1165796669152375e-05, |
| "loss": 4.1389, |
| "step": 2820 |
| }, |
| { |
| "epoch": 0.6245854521335397, |
| "grad_norm": 3.547020673751831, |
| "learning_rate": 2.110365398956003e-05, |
| "loss": 4.1154, |
| "step": 2825 |
| }, |
| { |
| "epoch": 0.6256909131107672, |
| "grad_norm": 3.083136558532715, |
| "learning_rate": 2.1041511309967687e-05, |
| "loss": 3.9761, |
| "step": 2830 |
| }, |
| { |
| "epoch": 0.6267963740879947, |
| "grad_norm": 3.7824316024780273, |
| "learning_rate": 2.0979368630375343e-05, |
| "loss": 4.04, |
| "step": 2835 |
| }, |
| { |
| "epoch": 0.6279018350652222, |
| "grad_norm": 3.584540367126465, |
| "learning_rate": 2.0917225950783e-05, |
| "loss": 4.0237, |
| "step": 2840 |
| }, |
| { |
| "epoch": 0.6290072960424496, |
| "grad_norm": 3.4071264266967773, |
| "learning_rate": 2.0855083271190652e-05, |
| "loss": 4.0866, |
| "step": 2845 |
| }, |
| { |
| "epoch": 0.6301127570196772, |
| "grad_norm": 3.149873733520508, |
| "learning_rate": 2.0792940591598312e-05, |
| "loss": 4.0776, |
| "step": 2850 |
| }, |
| { |
| "epoch": 0.6312182179969047, |
| "grad_norm": 3.3021628856658936, |
| "learning_rate": 2.0730797912005968e-05, |
| "loss": 4.1142, |
| "step": 2855 |
| }, |
| { |
| "epoch": 0.6323236789741322, |
| "grad_norm": 3.379462957382202, |
| "learning_rate": 2.066865523241362e-05, |
| "loss": 4.197, |
| "step": 2860 |
| }, |
| { |
| "epoch": 0.6334291399513597, |
| "grad_norm": 3.624547243118286, |
| "learning_rate": 2.0606512552821277e-05, |
| "loss": 4.1014, |
| "step": 2865 |
| }, |
| { |
| "epoch": 0.6345346009285873, |
| "grad_norm": 3.391458511352539, |
| "learning_rate": 2.0544369873228937e-05, |
| "loss": 4.1709, |
| "step": 2870 |
| }, |
| { |
| "epoch": 0.6356400619058147, |
| "grad_norm": 3.3703296184539795, |
| "learning_rate": 2.0482227193636593e-05, |
| "loss": 4.0528, |
| "step": 2875 |
| }, |
| { |
| "epoch": 0.6367455228830422, |
| "grad_norm": 3.6773877143859863, |
| "learning_rate": 2.0420084514044246e-05, |
| "loss": 4.0063, |
| "step": 2880 |
| }, |
| { |
| "epoch": 0.6378509838602697, |
| "grad_norm": 3.203677177429199, |
| "learning_rate": 2.03579418344519e-05, |
| "loss": 4.0733, |
| "step": 2885 |
| }, |
| { |
| "epoch": 0.6389564448374973, |
| "grad_norm": 3.36698055267334, |
| "learning_rate": 2.0295799154859558e-05, |
| "loss": 4.0456, |
| "step": 2890 |
| }, |
| { |
| "epoch": 0.6400619058147248, |
| "grad_norm": 3.412586212158203, |
| "learning_rate": 2.0233656475267214e-05, |
| "loss": 4.0807, |
| "step": 2895 |
| }, |
| { |
| "epoch": 0.6411673667919523, |
| "grad_norm": 3.175722599029541, |
| "learning_rate": 2.017151379567487e-05, |
| "loss": 4.1196, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.6422728277691797, |
| "grad_norm": 3.315753936767578, |
| "learning_rate": 2.0109371116082526e-05, |
| "loss": 4.0987, |
| "step": 2905 |
| }, |
| { |
| "epoch": 0.6433782887464072, |
| "grad_norm": 3.3233401775360107, |
| "learning_rate": 2.0047228436490183e-05, |
| "loss": 4.0974, |
| "step": 2910 |
| }, |
| { |
| "epoch": 0.6444837497236348, |
| "grad_norm": 3.648879051208496, |
| "learning_rate": 1.998508575689784e-05, |
| "loss": 4.0625, |
| "step": 2915 |
| }, |
| { |
| "epoch": 0.6455892107008623, |
| "grad_norm": 3.3237850666046143, |
| "learning_rate": 1.9922943077305495e-05, |
| "loss": 4.138, |
| "step": 2920 |
| }, |
| { |
| "epoch": 0.6466946716780898, |
| "grad_norm": 3.314603090286255, |
| "learning_rate": 1.986080039771315e-05, |
| "loss": 4.2303, |
| "step": 2925 |
| }, |
| { |
| "epoch": 0.6478001326553172, |
| "grad_norm": 3.116244316101074, |
| "learning_rate": 1.9798657718120804e-05, |
| "loss": 4.1222, |
| "step": 2930 |
| }, |
| { |
| "epoch": 0.6489055936325447, |
| "grad_norm": 3.232257127761841, |
| "learning_rate": 1.9736515038528463e-05, |
| "loss": 4.1056, |
| "step": 2935 |
| }, |
| { |
| "epoch": 0.6500110546097723, |
| "grad_norm": 3.373582124710083, |
| "learning_rate": 1.967437235893612e-05, |
| "loss": 4.1316, |
| "step": 2940 |
| }, |
| { |
| "epoch": 0.6511165155869998, |
| "grad_norm": 3.2493808269500732, |
| "learning_rate": 1.9612229679343776e-05, |
| "loss": 4.1764, |
| "step": 2945 |
| }, |
| { |
| "epoch": 0.6522219765642273, |
| "grad_norm": 2.9851105213165283, |
| "learning_rate": 1.955008699975143e-05, |
| "loss": 4.074, |
| "step": 2950 |
| }, |
| { |
| "epoch": 0.6533274375414548, |
| "grad_norm": 3.526233196258545, |
| "learning_rate": 1.9487944320159085e-05, |
| "loss": 4.0382, |
| "step": 2955 |
| }, |
| { |
| "epoch": 0.6544328985186822, |
| "grad_norm": 3.4045310020446777, |
| "learning_rate": 1.9425801640566744e-05, |
| "loss": 4.012, |
| "step": 2960 |
| }, |
| { |
| "epoch": 0.6555383594959098, |
| "grad_norm": 3.5040388107299805, |
| "learning_rate": 1.9363658960974397e-05, |
| "loss": 3.9922, |
| "step": 2965 |
| }, |
| { |
| "epoch": 0.6566438204731373, |
| "grad_norm": 3.4251108169555664, |
| "learning_rate": 1.9301516281382053e-05, |
| "loss": 4.0577, |
| "step": 2970 |
| }, |
| { |
| "epoch": 0.6577492814503648, |
| "grad_norm": 3.363278388977051, |
| "learning_rate": 1.923937360178971e-05, |
| "loss": 4.1127, |
| "step": 2975 |
| }, |
| { |
| "epoch": 0.6588547424275923, |
| "grad_norm": 3.2592687606811523, |
| "learning_rate": 1.917723092219737e-05, |
| "loss": 4.0898, |
| "step": 2980 |
| }, |
| { |
| "epoch": 0.6599602034048198, |
| "grad_norm": 3.295732021331787, |
| "learning_rate": 1.9115088242605022e-05, |
| "loss": 4.0772, |
| "step": 2985 |
| }, |
| { |
| "epoch": 0.6610656643820473, |
| "grad_norm": 3.302295684814453, |
| "learning_rate": 1.9052945563012678e-05, |
| "loss": 4.1688, |
| "step": 2990 |
| }, |
| { |
| "epoch": 0.6621711253592748, |
| "grad_norm": 3.415590524673462, |
| "learning_rate": 1.8990802883420334e-05, |
| "loss": 4.0569, |
| "step": 2995 |
| }, |
| { |
| "epoch": 0.6632765863365023, |
| "grad_norm": 3.4967286586761475, |
| "learning_rate": 1.892866020382799e-05, |
| "loss": 4.0951, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.6643820473137299, |
| "grad_norm": 3.3429524898529053, |
| "learning_rate": 1.8866517524235646e-05, |
| "loss": 4.0436, |
| "step": 3005 |
| }, |
| { |
| "epoch": 0.6654875082909574, |
| "grad_norm": 3.2878565788269043, |
| "learning_rate": 1.8804374844643303e-05, |
| "loss": 4.0224, |
| "step": 3010 |
| }, |
| { |
| "epoch": 0.6665929692681848, |
| "grad_norm": 3.4439568519592285, |
| "learning_rate": 1.874223216505096e-05, |
| "loss": 3.9529, |
| "step": 3015 |
| }, |
| { |
| "epoch": 0.6676984302454123, |
| "grad_norm": 3.4221768379211426, |
| "learning_rate": 1.868008948545861e-05, |
| "loss": 4.0604, |
| "step": 3020 |
| }, |
| { |
| "epoch": 0.6688038912226398, |
| "grad_norm": 3.2308311462402344, |
| "learning_rate": 1.861794680586627e-05, |
| "loss": 4.0717, |
| "step": 3025 |
| }, |
| { |
| "epoch": 0.6699093521998674, |
| "grad_norm": 3.7637572288513184, |
| "learning_rate": 1.8555804126273927e-05, |
| "loss": 4.3161, |
| "step": 3030 |
| }, |
| { |
| "epoch": 0.6710148131770949, |
| "grad_norm": 3.2774343490600586, |
| "learning_rate": 1.849366144668158e-05, |
| "loss": 4.1447, |
| "step": 3035 |
| }, |
| { |
| "epoch": 0.6721202741543224, |
| "grad_norm": 3.3979032039642334, |
| "learning_rate": 1.8431518767089236e-05, |
| "loss": 4.2971, |
| "step": 3040 |
| }, |
| { |
| "epoch": 0.6732257351315498, |
| "grad_norm": 3.259497880935669, |
| "learning_rate": 1.8369376087496896e-05, |
| "loss": 4.2798, |
| "step": 3045 |
| }, |
| { |
| "epoch": 0.6743311961087773, |
| "grad_norm": 3.346216917037964, |
| "learning_rate": 1.8307233407904552e-05, |
| "loss": 4.2459, |
| "step": 3050 |
| }, |
| { |
| "epoch": 0.6754366570860049, |
| "grad_norm": 3.195192813873291, |
| "learning_rate": 1.8245090728312205e-05, |
| "loss": 4.1107, |
| "step": 3055 |
| }, |
| { |
| "epoch": 0.6765421180632324, |
| "grad_norm": 3.3949368000030518, |
| "learning_rate": 1.818294804871986e-05, |
| "loss": 4.1965, |
| "step": 3060 |
| }, |
| { |
| "epoch": 0.6776475790404599, |
| "grad_norm": 3.1918063163757324, |
| "learning_rate": 1.8120805369127517e-05, |
| "loss": 4.2232, |
| "step": 3065 |
| }, |
| { |
| "epoch": 0.6787530400176873, |
| "grad_norm": 3.080773115158081, |
| "learning_rate": 1.8058662689535173e-05, |
| "loss": 4.1366, |
| "step": 3070 |
| }, |
| { |
| "epoch": 0.6798585009949148, |
| "grad_norm": 3.573559284210205, |
| "learning_rate": 1.799652000994283e-05, |
| "loss": 4.0492, |
| "step": 3075 |
| }, |
| { |
| "epoch": 0.6809639619721424, |
| "grad_norm": 3.105289936065674, |
| "learning_rate": 1.7934377330350486e-05, |
| "loss": 4.019, |
| "step": 3080 |
| }, |
| { |
| "epoch": 0.6820694229493699, |
| "grad_norm": 3.233858108520508, |
| "learning_rate": 1.7872234650758142e-05, |
| "loss": 4.2052, |
| "step": 3085 |
| }, |
| { |
| "epoch": 0.6831748839265974, |
| "grad_norm": 3.489800214767456, |
| "learning_rate": 1.7810091971165798e-05, |
| "loss": 4.244, |
| "step": 3090 |
| }, |
| { |
| "epoch": 0.6842803449038249, |
| "grad_norm": 3.919562339782715, |
| "learning_rate": 1.7747949291573454e-05, |
| "loss": 4.2778, |
| "step": 3095 |
| }, |
| { |
| "epoch": 0.6853858058810524, |
| "grad_norm": 3.4953386783599854, |
| "learning_rate": 1.768580661198111e-05, |
| "loss": 4.0999, |
| "step": 3100 |
| }, |
| { |
| "epoch": 0.6864912668582799, |
| "grad_norm": 3.0462942123413086, |
| "learning_rate": 1.7623663932388766e-05, |
| "loss": 4.1613, |
| "step": 3105 |
| }, |
| { |
| "epoch": 0.6875967278355074, |
| "grad_norm": 3.604140520095825, |
| "learning_rate": 1.756152125279642e-05, |
| "loss": 4.037, |
| "step": 3110 |
| }, |
| { |
| "epoch": 0.6887021888127349, |
| "grad_norm": 3.4862539768218994, |
| "learning_rate": 1.749937857320408e-05, |
| "loss": 4.136, |
| "step": 3115 |
| }, |
| { |
| "epoch": 0.6898076497899625, |
| "grad_norm": 3.3312830924987793, |
| "learning_rate": 1.7437235893611735e-05, |
| "loss": 4.1436, |
| "step": 3120 |
| }, |
| { |
| "epoch": 0.6909131107671899, |
| "grad_norm": 3.4092671871185303, |
| "learning_rate": 1.7375093214019388e-05, |
| "loss": 4.2998, |
| "step": 3125 |
| }, |
| { |
| "epoch": 0.6920185717444174, |
| "grad_norm": 3.138869285583496, |
| "learning_rate": 1.7312950534427044e-05, |
| "loss": 3.9221, |
| "step": 3130 |
| }, |
| { |
| "epoch": 0.6931240327216449, |
| "grad_norm": 3.570099115371704, |
| "learning_rate": 1.7250807854834704e-05, |
| "loss": 4.1127, |
| "step": 3135 |
| }, |
| { |
| "epoch": 0.6942294936988724, |
| "grad_norm": 3.4143168926239014, |
| "learning_rate": 1.7188665175242356e-05, |
| "loss": 4.1529, |
| "step": 3140 |
| }, |
| { |
| "epoch": 0.6953349546761, |
| "grad_norm": 3.299022674560547, |
| "learning_rate": 1.7126522495650012e-05, |
| "loss": 4.094, |
| "step": 3145 |
| }, |
| { |
| "epoch": 0.6964404156533275, |
| "grad_norm": 3.2752246856689453, |
| "learning_rate": 1.706437981605767e-05, |
| "loss": 4.0729, |
| "step": 3150 |
| }, |
| { |
| "epoch": 0.6975458766305549, |
| "grad_norm": 3.453444004058838, |
| "learning_rate": 1.7002237136465328e-05, |
| "loss": 4.1417, |
| "step": 3155 |
| }, |
| { |
| "epoch": 0.6986513376077824, |
| "grad_norm": 3.2120327949523926, |
| "learning_rate": 1.694009445687298e-05, |
| "loss": 4.249, |
| "step": 3160 |
| }, |
| { |
| "epoch": 0.6997567985850099, |
| "grad_norm": 3.4823880195617676, |
| "learning_rate": 1.6877951777280637e-05, |
| "loss": 4.197, |
| "step": 3165 |
| }, |
| { |
| "epoch": 0.7008622595622375, |
| "grad_norm": 3.438119888305664, |
| "learning_rate": 1.6815809097688293e-05, |
| "loss": 4.1066, |
| "step": 3170 |
| }, |
| { |
| "epoch": 0.701967720539465, |
| "grad_norm": 3.4621167182922363, |
| "learning_rate": 1.675366641809595e-05, |
| "loss": 4.2766, |
| "step": 3175 |
| }, |
| { |
| "epoch": 0.7030731815166925, |
| "grad_norm": 3.3527414798736572, |
| "learning_rate": 1.6691523738503606e-05, |
| "loss": 4.0086, |
| "step": 3180 |
| }, |
| { |
| "epoch": 0.7041786424939199, |
| "grad_norm": 3.4415431022644043, |
| "learning_rate": 1.6629381058911262e-05, |
| "loss": 4.0336, |
| "step": 3185 |
| }, |
| { |
| "epoch": 0.7052841034711474, |
| "grad_norm": 3.243367910385132, |
| "learning_rate": 1.6567238379318918e-05, |
| "loss": 4.1119, |
| "step": 3190 |
| }, |
| { |
| "epoch": 0.706389564448375, |
| "grad_norm": 3.515403985977173, |
| "learning_rate": 1.650509569972657e-05, |
| "loss": 4.0391, |
| "step": 3195 |
| }, |
| { |
| "epoch": 0.7074950254256025, |
| "grad_norm": 3.0629870891571045, |
| "learning_rate": 1.644295302013423e-05, |
| "loss": 4.2706, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.70860048640283, |
| "grad_norm": 3.412379026412964, |
| "learning_rate": 1.6380810340541887e-05, |
| "loss": 4.3555, |
| "step": 3205 |
| }, |
| { |
| "epoch": 0.7097059473800574, |
| "grad_norm": 3.250455141067505, |
| "learning_rate": 1.631866766094954e-05, |
| "loss": 4.1877, |
| "step": 3210 |
| }, |
| { |
| "epoch": 0.710811408357285, |
| "grad_norm": 3.0698251724243164, |
| "learning_rate": 1.6256524981357195e-05, |
| "loss": 4.0128, |
| "step": 3215 |
| }, |
| { |
| "epoch": 0.7119168693345125, |
| "grad_norm": 3.3195056915283203, |
| "learning_rate": 1.619438230176485e-05, |
| "loss": 4.2022, |
| "step": 3220 |
| }, |
| { |
| "epoch": 0.71302233031174, |
| "grad_norm": 3.3622958660125732, |
| "learning_rate": 1.613223962217251e-05, |
| "loss": 4.187, |
| "step": 3225 |
| }, |
| { |
| "epoch": 0.7141277912889675, |
| "grad_norm": 3.3840930461883545, |
| "learning_rate": 1.6070096942580164e-05, |
| "loss": 4.2928, |
| "step": 3230 |
| }, |
| { |
| "epoch": 0.715233252266195, |
| "grad_norm": 3.4330742359161377, |
| "learning_rate": 1.600795426298782e-05, |
| "loss": 4.0841, |
| "step": 3235 |
| }, |
| { |
| "epoch": 0.7163387132434225, |
| "grad_norm": 3.258180856704712, |
| "learning_rate": 1.5945811583395476e-05, |
| "loss": 4.1938, |
| "step": 3240 |
| }, |
| { |
| "epoch": 0.71744417422065, |
| "grad_norm": 3.183001756668091, |
| "learning_rate": 1.5883668903803133e-05, |
| "loss": 4.078, |
| "step": 3245 |
| }, |
| { |
| "epoch": 0.7185496351978775, |
| "grad_norm": 3.0564966201782227, |
| "learning_rate": 1.582152622421079e-05, |
| "loss": 4.089, |
| "step": 3250 |
| }, |
| { |
| "epoch": 0.719655096175105, |
| "grad_norm": 3.324143648147583, |
| "learning_rate": 1.5759383544618445e-05, |
| "loss": 4.2551, |
| "step": 3255 |
| }, |
| { |
| "epoch": 0.7207605571523326, |
| "grad_norm": 3.4312210083007812, |
| "learning_rate": 1.56972408650261e-05, |
| "loss": 4.1726, |
| "step": 3260 |
| }, |
| { |
| "epoch": 0.72186601812956, |
| "grad_norm": 3.168652057647705, |
| "learning_rate": 1.5635098185433757e-05, |
| "loss": 4.0236, |
| "step": 3265 |
| }, |
| { |
| "epoch": 0.7229714791067875, |
| "grad_norm": 3.116694211959839, |
| "learning_rate": 1.5572955505841413e-05, |
| "loss": 4.2022, |
| "step": 3270 |
| }, |
| { |
| "epoch": 0.724076940084015, |
| "grad_norm": 3.235372543334961, |
| "learning_rate": 1.551081282624907e-05, |
| "loss": 3.8518, |
| "step": 3275 |
| }, |
| { |
| "epoch": 0.7251824010612425, |
| "grad_norm": 3.3609163761138916, |
| "learning_rate": 1.5448670146656726e-05, |
| "loss": 3.968, |
| "step": 3280 |
| }, |
| { |
| "epoch": 0.7262878620384701, |
| "grad_norm": 3.4579970836639404, |
| "learning_rate": 1.538652746706438e-05, |
| "loss": 4.212, |
| "step": 3285 |
| }, |
| { |
| "epoch": 0.7273933230156976, |
| "grad_norm": 3.582771062850952, |
| "learning_rate": 1.5324384787472038e-05, |
| "loss": 4.2005, |
| "step": 3290 |
| }, |
| { |
| "epoch": 0.728498783992925, |
| "grad_norm": 3.151522636413574, |
| "learning_rate": 1.5262242107879694e-05, |
| "loss": 4.0769, |
| "step": 3295 |
| }, |
| { |
| "epoch": 0.7296042449701525, |
| "grad_norm": 3.194068193435669, |
| "learning_rate": 1.5200099428287349e-05, |
| "loss": 4.2329, |
| "step": 3300 |
| }, |
| { |
| "epoch": 0.73070970594738, |
| "grad_norm": 3.24617600440979, |
| "learning_rate": 1.5137956748695003e-05, |
| "loss": 4.0845, |
| "step": 3305 |
| }, |
| { |
| "epoch": 0.7318151669246076, |
| "grad_norm": 3.347874641418457, |
| "learning_rate": 1.5075814069102661e-05, |
| "loss": 4.2557, |
| "step": 3310 |
| }, |
| { |
| "epoch": 0.7329206279018351, |
| "grad_norm": 3.392652988433838, |
| "learning_rate": 1.5013671389510317e-05, |
| "loss": 4.1908, |
| "step": 3315 |
| }, |
| { |
| "epoch": 0.7340260888790626, |
| "grad_norm": 3.364522933959961, |
| "learning_rate": 1.4951528709917972e-05, |
| "loss": 4.1181, |
| "step": 3320 |
| }, |
| { |
| "epoch": 0.73513154985629, |
| "grad_norm": 3.217658042907715, |
| "learning_rate": 1.4889386030325628e-05, |
| "loss": 4.1129, |
| "step": 3325 |
| }, |
| { |
| "epoch": 0.7362370108335176, |
| "grad_norm": 3.741403102874756, |
| "learning_rate": 1.4827243350733282e-05, |
| "loss": 4.1941, |
| "step": 3330 |
| }, |
| { |
| "epoch": 0.7373424718107451, |
| "grad_norm": 3.6244940757751465, |
| "learning_rate": 1.4765100671140942e-05, |
| "loss": 4.133, |
| "step": 3335 |
| }, |
| { |
| "epoch": 0.7384479327879726, |
| "grad_norm": 3.455331563949585, |
| "learning_rate": 1.4702957991548596e-05, |
| "loss": 4.1993, |
| "step": 3340 |
| }, |
| { |
| "epoch": 0.7395533937652001, |
| "grad_norm": 3.3067119121551514, |
| "learning_rate": 1.4640815311956253e-05, |
| "loss": 4.1962, |
| "step": 3345 |
| }, |
| { |
| "epoch": 0.7406588547424275, |
| "grad_norm": 3.3184375762939453, |
| "learning_rate": 1.4578672632363907e-05, |
| "loss": 4.0779, |
| "step": 3350 |
| }, |
| { |
| "epoch": 0.7417643157196551, |
| "grad_norm": 3.617077350616455, |
| "learning_rate": 1.4516529952771565e-05, |
| "loss": 3.995, |
| "step": 3355 |
| }, |
| { |
| "epoch": 0.7428697766968826, |
| "grad_norm": 3.471519947052002, |
| "learning_rate": 1.4454387273179221e-05, |
| "loss": 4.0302, |
| "step": 3360 |
| }, |
| { |
| "epoch": 0.7439752376741101, |
| "grad_norm": 3.3337936401367188, |
| "learning_rate": 1.4392244593586876e-05, |
| "loss": 4.1125, |
| "step": 3365 |
| }, |
| { |
| "epoch": 0.7450806986513376, |
| "grad_norm": 3.5475218296051025, |
| "learning_rate": 1.4330101913994532e-05, |
| "loss": 4.1158, |
| "step": 3370 |
| }, |
| { |
| "epoch": 0.7461861596285652, |
| "grad_norm": 3.225281238555908, |
| "learning_rate": 1.4267959234402186e-05, |
| "loss": 4.1048, |
| "step": 3375 |
| }, |
| { |
| "epoch": 0.7472916206057926, |
| "grad_norm": 2.9788243770599365, |
| "learning_rate": 1.4205816554809844e-05, |
| "loss": 4.1919, |
| "step": 3380 |
| }, |
| { |
| "epoch": 0.7483970815830201, |
| "grad_norm": 2.9584922790527344, |
| "learning_rate": 1.41436738752175e-05, |
| "loss": 3.9252, |
| "step": 3385 |
| }, |
| { |
| "epoch": 0.7495025425602476, |
| "grad_norm": 3.4342474937438965, |
| "learning_rate": 1.4081531195625155e-05, |
| "loss": 4.2655, |
| "step": 3390 |
| }, |
| { |
| "epoch": 0.7506080035374751, |
| "grad_norm": 3.157142400741577, |
| "learning_rate": 1.4019388516032811e-05, |
| "loss": 4.0619, |
| "step": 3395 |
| }, |
| { |
| "epoch": 0.7517134645147027, |
| "grad_norm": 3.739959716796875, |
| "learning_rate": 1.3957245836440469e-05, |
| "loss": 4.1531, |
| "step": 3400 |
| }, |
| { |
| "epoch": 0.7528189254919301, |
| "grad_norm": 3.4141812324523926, |
| "learning_rate": 1.3895103156848125e-05, |
| "loss": 4.0972, |
| "step": 3405 |
| }, |
| { |
| "epoch": 0.7539243864691576, |
| "grad_norm": 3.140306234359741, |
| "learning_rate": 1.383296047725578e-05, |
| "loss": 4.1615, |
| "step": 3410 |
| }, |
| { |
| "epoch": 0.7550298474463851, |
| "grad_norm": 3.495731830596924, |
| "learning_rate": 1.3770817797663436e-05, |
| "loss": 4.322, |
| "step": 3415 |
| }, |
| { |
| "epoch": 0.7561353084236127, |
| "grad_norm": 3.2486352920532227, |
| "learning_rate": 1.3708675118071093e-05, |
| "loss": 4.1291, |
| "step": 3420 |
| }, |
| { |
| "epoch": 0.7572407694008402, |
| "grad_norm": 3.405538320541382, |
| "learning_rate": 1.3646532438478748e-05, |
| "loss": 4.0567, |
| "step": 3425 |
| }, |
| { |
| "epoch": 0.7583462303780677, |
| "grad_norm": 3.2491066455841064, |
| "learning_rate": 1.3584389758886404e-05, |
| "loss": 4.2248, |
| "step": 3430 |
| }, |
| { |
| "epoch": 0.7594516913552951, |
| "grad_norm": 3.415019989013672, |
| "learning_rate": 1.3522247079294059e-05, |
| "loss": 4.2429, |
| "step": 3435 |
| }, |
| { |
| "epoch": 0.7605571523325226, |
| "grad_norm": 3.0789833068847656, |
| "learning_rate": 1.3460104399701715e-05, |
| "loss": 4.0823, |
| "step": 3440 |
| }, |
| { |
| "epoch": 0.7616626133097502, |
| "grad_norm": 3.2663156986236572, |
| "learning_rate": 1.3397961720109373e-05, |
| "loss": 4.1684, |
| "step": 3445 |
| }, |
| { |
| "epoch": 0.7627680742869777, |
| "grad_norm": 3.3702750205993652, |
| "learning_rate": 1.3335819040517029e-05, |
| "loss": 4.1521, |
| "step": 3450 |
| }, |
| { |
| "epoch": 0.7638735352642052, |
| "grad_norm": 3.318516731262207, |
| "learning_rate": 1.3273676360924683e-05, |
| "loss": 4.0572, |
| "step": 3455 |
| }, |
| { |
| "epoch": 0.7649789962414327, |
| "grad_norm": 3.307229995727539, |
| "learning_rate": 1.321153368133234e-05, |
| "loss": 4.2087, |
| "step": 3460 |
| }, |
| { |
| "epoch": 0.7660844572186601, |
| "grad_norm": 3.141308546066284, |
| "learning_rate": 1.3149391001739997e-05, |
| "loss": 4.2045, |
| "step": 3465 |
| }, |
| { |
| "epoch": 0.7671899181958877, |
| "grad_norm": 3.488524913787842, |
| "learning_rate": 1.3087248322147652e-05, |
| "loss": 4.1981, |
| "step": 3470 |
| }, |
| { |
| "epoch": 0.7682953791731152, |
| "grad_norm": 3.333773612976074, |
| "learning_rate": 1.3025105642555308e-05, |
| "loss": 4.0546, |
| "step": 3475 |
| }, |
| { |
| "epoch": 0.7694008401503427, |
| "grad_norm": 3.093600273132324, |
| "learning_rate": 1.2962962962962962e-05, |
| "loss": 4.1297, |
| "step": 3480 |
| }, |
| { |
| "epoch": 0.7705063011275702, |
| "grad_norm": 3.681091547012329, |
| "learning_rate": 1.2900820283370619e-05, |
| "loss": 4.2743, |
| "step": 3485 |
| }, |
| { |
| "epoch": 0.7716117621047976, |
| "grad_norm": 3.2113373279571533, |
| "learning_rate": 1.2838677603778276e-05, |
| "loss": 4.1716, |
| "step": 3490 |
| }, |
| { |
| "epoch": 0.7727172230820252, |
| "grad_norm": 3.22847843170166, |
| "learning_rate": 1.2776534924185931e-05, |
| "loss": 4.1038, |
| "step": 3495 |
| }, |
| { |
| "epoch": 0.7738226840592527, |
| "grad_norm": 3.2960784435272217, |
| "learning_rate": 1.2714392244593587e-05, |
| "loss": 4.2599, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.7749281450364802, |
| "grad_norm": 3.509111166000366, |
| "learning_rate": 1.2652249565001242e-05, |
| "loss": 4.2696, |
| "step": 3505 |
| }, |
| { |
| "epoch": 0.7760336060137077, |
| "grad_norm": 3.4601404666900635, |
| "learning_rate": 1.2590106885408901e-05, |
| "loss": 4.0995, |
| "step": 3510 |
| }, |
| { |
| "epoch": 0.7771390669909353, |
| "grad_norm": 3.166656017303467, |
| "learning_rate": 1.2527964205816556e-05, |
| "loss": 4.3323, |
| "step": 3515 |
| }, |
| { |
| "epoch": 0.7782445279681627, |
| "grad_norm": 3.115483522415161, |
| "learning_rate": 1.2465821526224212e-05, |
| "loss": 4.2784, |
| "step": 3520 |
| }, |
| { |
| "epoch": 0.7793499889453902, |
| "grad_norm": 3.377978563308716, |
| "learning_rate": 1.2403678846631868e-05, |
| "loss": 4.1576, |
| "step": 3525 |
| }, |
| { |
| "epoch": 0.7804554499226177, |
| "grad_norm": 3.291743278503418, |
| "learning_rate": 1.2341536167039522e-05, |
| "loss": 4.3317, |
| "step": 3530 |
| }, |
| { |
| "epoch": 0.7815609108998453, |
| "grad_norm": 3.091101884841919, |
| "learning_rate": 1.227939348744718e-05, |
| "loss": 4.2178, |
| "step": 3535 |
| }, |
| { |
| "epoch": 0.7826663718770728, |
| "grad_norm": 3.3874189853668213, |
| "learning_rate": 1.2217250807854835e-05, |
| "loss": 4.0266, |
| "step": 3540 |
| }, |
| { |
| "epoch": 0.7837718328543002, |
| "grad_norm": 3.4406089782714844, |
| "learning_rate": 1.2155108128262491e-05, |
| "loss": 4.2243, |
| "step": 3545 |
| }, |
| { |
| "epoch": 0.7848772938315277, |
| "grad_norm": 3.2707858085632324, |
| "learning_rate": 1.2092965448670147e-05, |
| "loss": 4.1444, |
| "step": 3550 |
| }, |
| { |
| "epoch": 0.7859827548087552, |
| "grad_norm": 3.2035396099090576, |
| "learning_rate": 1.2030822769077803e-05, |
| "loss": 3.981, |
| "step": 3555 |
| }, |
| { |
| "epoch": 0.7870882157859828, |
| "grad_norm": 3.3851969242095947, |
| "learning_rate": 1.196868008948546e-05, |
| "loss": 4.219, |
| "step": 3560 |
| }, |
| { |
| "epoch": 0.7881936767632103, |
| "grad_norm": 3.0952658653259277, |
| "learning_rate": 1.1906537409893114e-05, |
| "loss": 4.2355, |
| "step": 3565 |
| }, |
| { |
| "epoch": 0.7892991377404378, |
| "grad_norm": 3.3667149543762207, |
| "learning_rate": 1.1844394730300772e-05, |
| "loss": 4.2494, |
| "step": 3570 |
| }, |
| { |
| "epoch": 0.7904045987176652, |
| "grad_norm": 3.6815719604492188, |
| "learning_rate": 1.1782252050708426e-05, |
| "loss": 4.15, |
| "step": 3575 |
| }, |
| { |
| "epoch": 0.7915100596948927, |
| "grad_norm": 3.330397367477417, |
| "learning_rate": 1.1720109371116084e-05, |
| "loss": 4.0933, |
| "step": 3580 |
| }, |
| { |
| "epoch": 0.7926155206721203, |
| "grad_norm": 3.213534355163574, |
| "learning_rate": 1.1657966691523739e-05, |
| "loss": 4.0645, |
| "step": 3585 |
| }, |
| { |
| "epoch": 0.7937209816493478, |
| "grad_norm": 3.413196086883545, |
| "learning_rate": 1.1595824011931397e-05, |
| "loss": 4.2731, |
| "step": 3590 |
| }, |
| { |
| "epoch": 0.7948264426265753, |
| "grad_norm": 2.9504334926605225, |
| "learning_rate": 1.1533681332339051e-05, |
| "loss": 4.0869, |
| "step": 3595 |
| }, |
| { |
| "epoch": 0.7959319036038028, |
| "grad_norm": 3.48688006401062, |
| "learning_rate": 1.1471538652746707e-05, |
| "loss": 4.1732, |
| "step": 3600 |
| }, |
| { |
| "epoch": 0.7970373645810302, |
| "grad_norm": 3.202857494354248, |
| "learning_rate": 1.1409395973154363e-05, |
| "loss": 4.2084, |
| "step": 3605 |
| }, |
| { |
| "epoch": 0.7981428255582578, |
| "grad_norm": 3.460794687271118, |
| "learning_rate": 1.1347253293562018e-05, |
| "loss": 4.2956, |
| "step": 3610 |
| }, |
| { |
| "epoch": 0.7992482865354853, |
| "grad_norm": 3.3727447986602783, |
| "learning_rate": 1.1285110613969676e-05, |
| "loss": 4.1854, |
| "step": 3615 |
| }, |
| { |
| "epoch": 0.8003537475127128, |
| "grad_norm": 3.3435420989990234, |
| "learning_rate": 1.122296793437733e-05, |
| "loss": 4.3749, |
| "step": 3620 |
| }, |
| { |
| "epoch": 0.8014592084899403, |
| "grad_norm": 3.1651086807250977, |
| "learning_rate": 1.1160825254784988e-05, |
| "loss": 4.132, |
| "step": 3625 |
| }, |
| { |
| "epoch": 0.8025646694671678, |
| "grad_norm": 3.482461929321289, |
| "learning_rate": 1.1098682575192643e-05, |
| "loss": 4.3037, |
| "step": 3630 |
| }, |
| { |
| "epoch": 0.8036701304443953, |
| "grad_norm": 3.5828919410705566, |
| "learning_rate": 1.1036539895600299e-05, |
| "loss": 4.1466, |
| "step": 3635 |
| }, |
| { |
| "epoch": 0.8047755914216228, |
| "grad_norm": 3.344888687133789, |
| "learning_rate": 1.0974397216007955e-05, |
| "loss": 4.1947, |
| "step": 3640 |
| }, |
| { |
| "epoch": 0.8058810523988503, |
| "grad_norm": 3.2426233291625977, |
| "learning_rate": 1.091225453641561e-05, |
| "loss": 4.0683, |
| "step": 3645 |
| }, |
| { |
| "epoch": 0.8069865133760779, |
| "grad_norm": 3.2281033992767334, |
| "learning_rate": 1.0850111856823267e-05, |
| "loss": 4.107, |
| "step": 3650 |
| }, |
| { |
| "epoch": 0.8080919743533054, |
| "grad_norm": 3.1622958183288574, |
| "learning_rate": 1.0787969177230922e-05, |
| "loss": 4.2085, |
| "step": 3655 |
| }, |
| { |
| "epoch": 0.8091974353305328, |
| "grad_norm": 3.2309300899505615, |
| "learning_rate": 1.072582649763858e-05, |
| "loss": 4.3067, |
| "step": 3660 |
| }, |
| { |
| "epoch": 0.8103028963077603, |
| "grad_norm": 3.1198458671569824, |
| "learning_rate": 1.0663683818046234e-05, |
| "loss": 4.0849, |
| "step": 3665 |
| }, |
| { |
| "epoch": 0.8114083572849878, |
| "grad_norm": 3.5155203342437744, |
| "learning_rate": 1.060154113845389e-05, |
| "loss": 4.115, |
| "step": 3670 |
| }, |
| { |
| "epoch": 0.8125138182622154, |
| "grad_norm": 3.102889060974121, |
| "learning_rate": 1.0539398458861546e-05, |
| "loss": 4.1175, |
| "step": 3675 |
| }, |
| { |
| "epoch": 0.8136192792394429, |
| "grad_norm": 3.3019254207611084, |
| "learning_rate": 1.0477255779269203e-05, |
| "loss": 4.2803, |
| "step": 3680 |
| }, |
| { |
| "epoch": 0.8147247402166704, |
| "grad_norm": 3.5849218368530273, |
| "learning_rate": 1.0415113099676859e-05, |
| "loss": 4.2005, |
| "step": 3685 |
| }, |
| { |
| "epoch": 0.8158302011938978, |
| "grad_norm": 3.9152631759643555, |
| "learning_rate": 1.0352970420084515e-05, |
| "loss": 4.3163, |
| "step": 3690 |
| }, |
| { |
| "epoch": 0.8169356621711253, |
| "grad_norm": 3.0798897743225098, |
| "learning_rate": 1.0290827740492171e-05, |
| "loss": 4.1536, |
| "step": 3695 |
| }, |
| { |
| "epoch": 0.8180411231483529, |
| "grad_norm": 3.491821765899658, |
| "learning_rate": 1.0228685060899826e-05, |
| "loss": 4.3108, |
| "step": 3700 |
| }, |
| { |
| "epoch": 0.8191465841255804, |
| "grad_norm": 3.093750238418579, |
| "learning_rate": 1.0166542381307482e-05, |
| "loss": 4.0467, |
| "step": 3705 |
| }, |
| { |
| "epoch": 0.8202520451028079, |
| "grad_norm": 3.4779791831970215, |
| "learning_rate": 1.0104399701715138e-05, |
| "loss": 4.2484, |
| "step": 3710 |
| }, |
| { |
| "epoch": 0.8213575060800353, |
| "grad_norm": 3.1915061473846436, |
| "learning_rate": 1.0042257022122794e-05, |
| "loss": 4.3235, |
| "step": 3715 |
| }, |
| { |
| "epoch": 0.8224629670572629, |
| "grad_norm": 3.1019785404205322, |
| "learning_rate": 9.98011434253045e-06, |
| "loss": 4.1893, |
| "step": 3720 |
| }, |
| { |
| "epoch": 0.8235684280344904, |
| "grad_norm": 3.3659591674804688, |
| "learning_rate": 9.917971662938106e-06, |
| "loss": 4.1759, |
| "step": 3725 |
| }, |
| { |
| "epoch": 0.8246738890117179, |
| "grad_norm": 3.254364013671875, |
| "learning_rate": 9.855828983345763e-06, |
| "loss": 3.9382, |
| "step": 3730 |
| }, |
| { |
| "epoch": 0.8257793499889454, |
| "grad_norm": 3.1901118755340576, |
| "learning_rate": 9.793686303753419e-06, |
| "loss": 4.1601, |
| "step": 3735 |
| }, |
| { |
| "epoch": 0.826884810966173, |
| "grad_norm": 3.040501832962036, |
| "learning_rate": 9.731543624161075e-06, |
| "loss": 4.0918, |
| "step": 3740 |
| }, |
| { |
| "epoch": 0.8279902719434004, |
| "grad_norm": 3.3288450241088867, |
| "learning_rate": 9.669400944568731e-06, |
| "loss": 4.1557, |
| "step": 3745 |
| }, |
| { |
| "epoch": 0.8290957329206279, |
| "grad_norm": 3.145031213760376, |
| "learning_rate": 9.607258264976386e-06, |
| "loss": 4.2639, |
| "step": 3750 |
| }, |
| { |
| "epoch": 0.8302011938978554, |
| "grad_norm": 2.950425148010254, |
| "learning_rate": 9.545115585384042e-06, |
| "loss": 4.0413, |
| "step": 3755 |
| }, |
| { |
| "epoch": 0.8313066548750829, |
| "grad_norm": 3.336622714996338, |
| "learning_rate": 9.482972905791698e-06, |
| "loss": 4.2885, |
| "step": 3760 |
| }, |
| { |
| "epoch": 0.8324121158523105, |
| "grad_norm": 3.403669834136963, |
| "learning_rate": 9.420830226199354e-06, |
| "loss": 4.224, |
| "step": 3765 |
| }, |
| { |
| "epoch": 0.8335175768295379, |
| "grad_norm": 3.3747620582580566, |
| "learning_rate": 9.35868754660701e-06, |
| "loss": 4.1419, |
| "step": 3770 |
| }, |
| { |
| "epoch": 0.8346230378067654, |
| "grad_norm": 3.3672516345977783, |
| "learning_rate": 9.296544867014666e-06, |
| "loss": 4.2408, |
| "step": 3775 |
| }, |
| { |
| "epoch": 0.8357284987839929, |
| "grad_norm": 3.1235463619232178, |
| "learning_rate": 9.234402187422323e-06, |
| "loss": 4.2304, |
| "step": 3780 |
| }, |
| { |
| "epoch": 0.8368339597612204, |
| "grad_norm": 3.0135231018066406, |
| "learning_rate": 9.172259507829977e-06, |
| "loss": 4.3504, |
| "step": 3785 |
| }, |
| { |
| "epoch": 0.837939420738448, |
| "grad_norm": 3.669422149658203, |
| "learning_rate": 9.110116828237635e-06, |
| "loss": 4.2286, |
| "step": 3790 |
| }, |
| { |
| "epoch": 0.8390448817156755, |
| "grad_norm": 3.5061023235321045, |
| "learning_rate": 9.04797414864529e-06, |
| "loss": 4.043, |
| "step": 3795 |
| }, |
| { |
| "epoch": 0.8401503426929029, |
| "grad_norm": 3.188978672027588, |
| "learning_rate": 8.985831469052947e-06, |
| "loss": 4.2602, |
| "step": 3800 |
| }, |
| { |
| "epoch": 0.8412558036701304, |
| "grad_norm": 3.4181642532348633, |
| "learning_rate": 8.923688789460602e-06, |
| "loss": 4.1946, |
| "step": 3805 |
| }, |
| { |
| "epoch": 0.8423612646473579, |
| "grad_norm": 3.3051459789276123, |
| "learning_rate": 8.861546109868258e-06, |
| "loss": 4.1812, |
| "step": 3810 |
| }, |
| { |
| "epoch": 0.8434667256245855, |
| "grad_norm": 3.0405430793762207, |
| "learning_rate": 8.799403430275914e-06, |
| "loss": 4.2455, |
| "step": 3815 |
| }, |
| { |
| "epoch": 0.844572186601813, |
| "grad_norm": 3.1977388858795166, |
| "learning_rate": 8.737260750683569e-06, |
| "loss": 4.1665, |
| "step": 3820 |
| }, |
| { |
| "epoch": 0.8456776475790405, |
| "grad_norm": 3.153214693069458, |
| "learning_rate": 8.675118071091226e-06, |
| "loss": 4.1227, |
| "step": 3825 |
| }, |
| { |
| "epoch": 0.8467831085562679, |
| "grad_norm": 3.160295009613037, |
| "learning_rate": 8.612975391498881e-06, |
| "loss": 4.1928, |
| "step": 3830 |
| }, |
| { |
| "epoch": 0.8478885695334955, |
| "grad_norm": 3.522057294845581, |
| "learning_rate": 8.550832711906539e-06, |
| "loss": 4.3234, |
| "step": 3835 |
| }, |
| { |
| "epoch": 0.848994030510723, |
| "grad_norm": 3.3850722312927246, |
| "learning_rate": 8.488690032314193e-06, |
| "loss": 4.2035, |
| "step": 3840 |
| }, |
| { |
| "epoch": 0.8500994914879505, |
| "grad_norm": 3.237739324569702, |
| "learning_rate": 8.42654735272185e-06, |
| "loss": 4.0377, |
| "step": 3845 |
| }, |
| { |
| "epoch": 0.851204952465178, |
| "grad_norm": 3.3790619373321533, |
| "learning_rate": 8.364404673129506e-06, |
| "loss": 4.1112, |
| "step": 3850 |
| }, |
| { |
| "epoch": 0.8523104134424054, |
| "grad_norm": 3.395925760269165, |
| "learning_rate": 8.302261993537162e-06, |
| "loss": 4.3152, |
| "step": 3855 |
| }, |
| { |
| "epoch": 0.853415874419633, |
| "grad_norm": 2.8968868255615234, |
| "learning_rate": 8.240119313944818e-06, |
| "loss": 4.1642, |
| "step": 3860 |
| }, |
| { |
| "epoch": 0.8545213353968605, |
| "grad_norm": 3.6181344985961914, |
| "learning_rate": 8.177976634352472e-06, |
| "loss": 4.27, |
| "step": 3865 |
| }, |
| { |
| "epoch": 0.855626796374088, |
| "grad_norm": 3.3780412673950195, |
| "learning_rate": 8.11583395476013e-06, |
| "loss": 4.2319, |
| "step": 3870 |
| }, |
| { |
| "epoch": 0.8567322573513155, |
| "grad_norm": 3.0761659145355225, |
| "learning_rate": 8.053691275167785e-06, |
| "loss": 4.2244, |
| "step": 3875 |
| }, |
| { |
| "epoch": 0.857837718328543, |
| "grad_norm": 3.188369035720825, |
| "learning_rate": 7.991548595575441e-06, |
| "loss": 4.1855, |
| "step": 3880 |
| }, |
| { |
| "epoch": 0.8589431793057705, |
| "grad_norm": 3.280965805053711, |
| "learning_rate": 7.929405915983097e-06, |
| "loss": 4.2297, |
| "step": 3885 |
| }, |
| { |
| "epoch": 0.860048640282998, |
| "grad_norm": 3.428769111633301, |
| "learning_rate": 7.867263236390753e-06, |
| "loss": 4.2635, |
| "step": 3890 |
| }, |
| { |
| "epoch": 0.8611541012602255, |
| "grad_norm": 3.372145414352417, |
| "learning_rate": 7.80512055679841e-06, |
| "loss": 4.1799, |
| "step": 3895 |
| }, |
| { |
| "epoch": 0.862259562237453, |
| "grad_norm": 3.669572114944458, |
| "learning_rate": 7.742977877206066e-06, |
| "loss": 4.1279, |
| "step": 3900 |
| }, |
| { |
| "epoch": 0.8633650232146806, |
| "grad_norm": 3.3069515228271484, |
| "learning_rate": 7.680835197613722e-06, |
| "loss": 4.2423, |
| "step": 3905 |
| }, |
| { |
| "epoch": 0.864470484191908, |
| "grad_norm": 3.4965929985046387, |
| "learning_rate": 7.618692518021378e-06, |
| "loss": 4.2445, |
| "step": 3910 |
| }, |
| { |
| "epoch": 0.8655759451691355, |
| "grad_norm": 3.3007524013519287, |
| "learning_rate": 7.556549838429033e-06, |
| "loss": 4.3169, |
| "step": 3915 |
| }, |
| { |
| "epoch": 0.866681406146363, |
| "grad_norm": 3.3031368255615234, |
| "learning_rate": 7.494407158836689e-06, |
| "loss": 4.2489, |
| "step": 3920 |
| }, |
| { |
| "epoch": 0.8677868671235905, |
| "grad_norm": 3.3182923793792725, |
| "learning_rate": 7.432264479244346e-06, |
| "loss": 4.1043, |
| "step": 3925 |
| }, |
| { |
| "epoch": 0.8688923281008181, |
| "grad_norm": 3.1912918090820312, |
| "learning_rate": 7.370121799652001e-06, |
| "loss": 4.225, |
| "step": 3930 |
| }, |
| { |
| "epoch": 0.8699977890780456, |
| "grad_norm": 3.4221689701080322, |
| "learning_rate": 7.307979120059657e-06, |
| "loss": 4.2911, |
| "step": 3935 |
| }, |
| { |
| "epoch": 0.871103250055273, |
| "grad_norm": 3.3450770378112793, |
| "learning_rate": 7.2458364404673125e-06, |
| "loss": 4.4661, |
| "step": 3940 |
| }, |
| { |
| "epoch": 0.8722087110325005, |
| "grad_norm": 3.3857436180114746, |
| "learning_rate": 7.1836937608749695e-06, |
| "loss": 4.1062, |
| "step": 3945 |
| }, |
| { |
| "epoch": 0.873314172009728, |
| "grad_norm": 3.2162883281707764, |
| "learning_rate": 7.121551081282625e-06, |
| "loss": 4.2926, |
| "step": 3950 |
| }, |
| { |
| "epoch": 0.8744196329869556, |
| "grad_norm": 2.971797227859497, |
| "learning_rate": 7.059408401690282e-06, |
| "loss": 4.0731, |
| "step": 3955 |
| }, |
| { |
| "epoch": 0.8755250939641831, |
| "grad_norm": 3.228489875793457, |
| "learning_rate": 6.997265722097937e-06, |
| "loss": 4.1616, |
| "step": 3960 |
| }, |
| { |
| "epoch": 0.8766305549414106, |
| "grad_norm": 3.2910053730010986, |
| "learning_rate": 6.935123042505594e-06, |
| "loss": 4.2075, |
| "step": 3965 |
| }, |
| { |
| "epoch": 0.877736015918638, |
| "grad_norm": 3.1011228561401367, |
| "learning_rate": 6.8729803629132495e-06, |
| "loss": 4.1851, |
| "step": 3970 |
| }, |
| { |
| "epoch": 0.8788414768958656, |
| "grad_norm": 3.6701035499572754, |
| "learning_rate": 6.810837683320905e-06, |
| "loss": 4.1968, |
| "step": 3975 |
| }, |
| { |
| "epoch": 0.8799469378730931, |
| "grad_norm": 3.310450315475464, |
| "learning_rate": 6.748695003728561e-06, |
| "loss": 4.3885, |
| "step": 3980 |
| }, |
| { |
| "epoch": 0.8810523988503206, |
| "grad_norm": 3.3232550621032715, |
| "learning_rate": 6.686552324136216e-06, |
| "loss": 4.202, |
| "step": 3985 |
| }, |
| { |
| "epoch": 0.8821578598275481, |
| "grad_norm": 3.33705472946167, |
| "learning_rate": 6.624409644543873e-06, |
| "loss": 4.2345, |
| "step": 3990 |
| }, |
| { |
| "epoch": 0.8832633208047755, |
| "grad_norm": 3.648831605911255, |
| "learning_rate": 6.562266964951529e-06, |
| "loss": 4.2464, |
| "step": 3995 |
| }, |
| { |
| "epoch": 0.8843687817820031, |
| "grad_norm": 3.2218527793884277, |
| "learning_rate": 6.500124285359186e-06, |
| "loss": 4.0956, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.8854742427592306, |
| "grad_norm": 3.0550131797790527, |
| "learning_rate": 6.437981605766841e-06, |
| "loss": 4.1712, |
| "step": 4005 |
| }, |
| { |
| "epoch": 0.8865797037364581, |
| "grad_norm": 3.1984024047851562, |
| "learning_rate": 6.375838926174497e-06, |
| "loss": 4.2718, |
| "step": 4010 |
| }, |
| { |
| "epoch": 0.8876851647136856, |
| "grad_norm": 3.2509777545928955, |
| "learning_rate": 6.3136962465821526e-06, |
| "loss": 4.0173, |
| "step": 4015 |
| }, |
| { |
| "epoch": 0.8887906256909132, |
| "grad_norm": 3.146519899368286, |
| "learning_rate": 6.2515535669898096e-06, |
| "loss": 4.4115, |
| "step": 4020 |
| }, |
| { |
| "epoch": 0.8898960866681406, |
| "grad_norm": 3.422335624694824, |
| "learning_rate": 6.189410887397465e-06, |
| "loss": 4.3307, |
| "step": 4025 |
| }, |
| { |
| "epoch": 0.8910015476453681, |
| "grad_norm": 3.50016188621521, |
| "learning_rate": 6.127268207805121e-06, |
| "loss": 4.0675, |
| "step": 4030 |
| }, |
| { |
| "epoch": 0.8921070086225956, |
| "grad_norm": 3.059391975402832, |
| "learning_rate": 6.065125528212777e-06, |
| "loss": 4.2215, |
| "step": 4035 |
| }, |
| { |
| "epoch": 0.8932124695998231, |
| "grad_norm": 3.585162401199341, |
| "learning_rate": 6.002982848620433e-06, |
| "loss": 4.1206, |
| "step": 4040 |
| }, |
| { |
| "epoch": 0.8943179305770507, |
| "grad_norm": 3.1658449172973633, |
| "learning_rate": 5.940840169028089e-06, |
| "loss": 4.1826, |
| "step": 4045 |
| }, |
| { |
| "epoch": 0.8954233915542781, |
| "grad_norm": 3.30590558052063, |
| "learning_rate": 5.878697489435745e-06, |
| "loss": 4.07, |
| "step": 4050 |
| }, |
| { |
| "epoch": 0.8965288525315056, |
| "grad_norm": 3.5523128509521484, |
| "learning_rate": 5.8165548098434e-06, |
| "loss": 4.2302, |
| "step": 4055 |
| }, |
| { |
| "epoch": 0.8976343135087331, |
| "grad_norm": 3.2362444400787354, |
| "learning_rate": 5.754412130251056e-06, |
| "loss": 4.1555, |
| "step": 4060 |
| }, |
| { |
| "epoch": 0.8987397744859607, |
| "grad_norm": 2.9280905723571777, |
| "learning_rate": 5.692269450658713e-06, |
| "loss": 4.1708, |
| "step": 4065 |
| }, |
| { |
| "epoch": 0.8998452354631882, |
| "grad_norm": 3.277392625808716, |
| "learning_rate": 5.630126771066369e-06, |
| "loss": 4.1606, |
| "step": 4070 |
| }, |
| { |
| "epoch": 0.9009506964404157, |
| "grad_norm": 2.9546451568603516, |
| "learning_rate": 5.567984091474025e-06, |
| "loss": 4.1486, |
| "step": 4075 |
| }, |
| { |
| "epoch": 0.9020561574176431, |
| "grad_norm": 3.33906888961792, |
| "learning_rate": 5.50584141188168e-06, |
| "loss": 4.2423, |
| "step": 4080 |
| }, |
| { |
| "epoch": 0.9031616183948706, |
| "grad_norm": 3.414642572402954, |
| "learning_rate": 5.4436987322893364e-06, |
| "loss": 4.1806, |
| "step": 4085 |
| }, |
| { |
| "epoch": 0.9042670793720982, |
| "grad_norm": 3.1724166870117188, |
| "learning_rate": 5.381556052696993e-06, |
| "loss": 4.3395, |
| "step": 4090 |
| }, |
| { |
| "epoch": 0.9053725403493257, |
| "grad_norm": 3.3159971237182617, |
| "learning_rate": 5.319413373104649e-06, |
| "loss": 4.1692, |
| "step": 4095 |
| }, |
| { |
| "epoch": 0.9064780013265532, |
| "grad_norm": 3.149585008621216, |
| "learning_rate": 5.257270693512305e-06, |
| "loss": 4.1873, |
| "step": 4100 |
| }, |
| { |
| "epoch": 0.9075834623037807, |
| "grad_norm": 3.5617358684539795, |
| "learning_rate": 5.195128013919961e-06, |
| "loss": 4.2171, |
| "step": 4105 |
| }, |
| { |
| "epoch": 0.9086889232810081, |
| "grad_norm": 3.268549680709839, |
| "learning_rate": 5.1329853343276164e-06, |
| "loss": 4.1768, |
| "step": 4110 |
| }, |
| { |
| "epoch": 0.9097943842582357, |
| "grad_norm": 3.424433708190918, |
| "learning_rate": 5.070842654735273e-06, |
| "loss": 4.4327, |
| "step": 4115 |
| }, |
| { |
| "epoch": 0.9108998452354632, |
| "grad_norm": 3.495929479598999, |
| "learning_rate": 5.008699975142928e-06, |
| "loss": 4.1886, |
| "step": 4120 |
| }, |
| { |
| "epoch": 0.9120053062126907, |
| "grad_norm": 3.045023202896118, |
| "learning_rate": 4.946557295550584e-06, |
| "loss": 4.3829, |
| "step": 4125 |
| }, |
| { |
| "epoch": 0.9131107671899182, |
| "grad_norm": 3.1356985569000244, |
| "learning_rate": 4.88441461595824e-06, |
| "loss": 4.3304, |
| "step": 4130 |
| }, |
| { |
| "epoch": 0.9142162281671457, |
| "grad_norm": 3.389559507369995, |
| "learning_rate": 4.8222719363658965e-06, |
| "loss": 4.1715, |
| "step": 4135 |
| }, |
| { |
| "epoch": 0.9153216891443732, |
| "grad_norm": 3.1588001251220703, |
| "learning_rate": 4.760129256773553e-06, |
| "loss": 4.2491, |
| "step": 4140 |
| }, |
| { |
| "epoch": 0.9164271501216007, |
| "grad_norm": 3.5233826637268066, |
| "learning_rate": 4.697986577181209e-06, |
| "loss": 4.409, |
| "step": 4145 |
| }, |
| { |
| "epoch": 0.9175326110988282, |
| "grad_norm": 3.0876009464263916, |
| "learning_rate": 4.635843897588864e-06, |
| "loss": 4.1037, |
| "step": 4150 |
| }, |
| { |
| "epoch": 0.9186380720760557, |
| "grad_norm": 3.64609956741333, |
| "learning_rate": 4.57370121799652e-06, |
| "loss": 4.2202, |
| "step": 4155 |
| }, |
| { |
| "epoch": 0.9197435330532833, |
| "grad_norm": 3.119335174560547, |
| "learning_rate": 4.511558538404176e-06, |
| "loss": 4.2293, |
| "step": 4160 |
| }, |
| { |
| "epoch": 0.9208489940305107, |
| "grad_norm": 3.2007765769958496, |
| "learning_rate": 4.449415858811832e-06, |
| "loss": 4.2337, |
| "step": 4165 |
| }, |
| { |
| "epoch": 0.9219544550077382, |
| "grad_norm": 2.860046625137329, |
| "learning_rate": 4.387273179219488e-06, |
| "loss": 4.2855, |
| "step": 4170 |
| }, |
| { |
| "epoch": 0.9230599159849657, |
| "grad_norm": 3.472074270248413, |
| "learning_rate": 4.325130499627144e-06, |
| "loss": 4.2792, |
| "step": 4175 |
| }, |
| { |
| "epoch": 0.9241653769621933, |
| "grad_norm": 3.21456241607666, |
| "learning_rate": 4.2629878200348e-06, |
| "loss": 4.2083, |
| "step": 4180 |
| }, |
| { |
| "epoch": 0.9252708379394208, |
| "grad_norm": 3.0883960723876953, |
| "learning_rate": 4.2008451404424565e-06, |
| "loss": 4.2125, |
| "step": 4185 |
| }, |
| { |
| "epoch": 0.9263762989166482, |
| "grad_norm": 3.1821343898773193, |
| "learning_rate": 4.138702460850112e-06, |
| "loss": 4.3135, |
| "step": 4190 |
| }, |
| { |
| "epoch": 0.9274817598938757, |
| "grad_norm": 3.2891180515289307, |
| "learning_rate": 4.076559781257768e-06, |
| "loss": 4.2337, |
| "step": 4195 |
| }, |
| { |
| "epoch": 0.9285872208711032, |
| "grad_norm": 3.036611557006836, |
| "learning_rate": 4.014417101665424e-06, |
| "loss": 4.1799, |
| "step": 4200 |
| }, |
| { |
| "epoch": 0.9296926818483308, |
| "grad_norm": 3.262669086456299, |
| "learning_rate": 3.95227442207308e-06, |
| "loss": 4.3257, |
| "step": 4205 |
| }, |
| { |
| "epoch": 0.9307981428255583, |
| "grad_norm": 3.32913875579834, |
| "learning_rate": 3.8901317424807365e-06, |
| "loss": 4.2918, |
| "step": 4210 |
| }, |
| { |
| "epoch": 0.9319036038027858, |
| "grad_norm": 3.221358299255371, |
| "learning_rate": 3.827989062888392e-06, |
| "loss": 4.2922, |
| "step": 4215 |
| }, |
| { |
| "epoch": 0.9330090647800132, |
| "grad_norm": 3.131178617477417, |
| "learning_rate": 3.7658463832960476e-06, |
| "loss": 4.1484, |
| "step": 4220 |
| }, |
| { |
| "epoch": 0.9341145257572407, |
| "grad_norm": 3.0813159942626953, |
| "learning_rate": 3.7037037037037037e-06, |
| "loss": 4.2841, |
| "step": 4225 |
| }, |
| { |
| "epoch": 0.9352199867344683, |
| "grad_norm": 2.8390700817108154, |
| "learning_rate": 3.64156102411136e-06, |
| "loss": 4.0598, |
| "step": 4230 |
| }, |
| { |
| "epoch": 0.9363254477116958, |
| "grad_norm": 3.10927677154541, |
| "learning_rate": 3.5794183445190157e-06, |
| "loss": 4.1328, |
| "step": 4235 |
| }, |
| { |
| "epoch": 0.9374309086889233, |
| "grad_norm": 3.2241241931915283, |
| "learning_rate": 3.517275664926672e-06, |
| "loss": 4.2188, |
| "step": 4240 |
| }, |
| { |
| "epoch": 0.9385363696661508, |
| "grad_norm": 2.9095420837402344, |
| "learning_rate": 3.455132985334328e-06, |
| "loss": 4.068, |
| "step": 4245 |
| }, |
| { |
| "epoch": 0.9396418306433783, |
| "grad_norm": 3.1288955211639404, |
| "learning_rate": 3.3929903057419838e-06, |
| "loss": 4.2663, |
| "step": 4250 |
| }, |
| { |
| "epoch": 0.9407472916206058, |
| "grad_norm": 3.026554584503174, |
| "learning_rate": 3.33084762614964e-06, |
| "loss": 4.1512, |
| "step": 4255 |
| }, |
| { |
| "epoch": 0.9418527525978333, |
| "grad_norm": 3.222672462463379, |
| "learning_rate": 3.268704946557296e-06, |
| "loss": 4.235, |
| "step": 4260 |
| }, |
| { |
| "epoch": 0.9429582135750608, |
| "grad_norm": 3.381204605102539, |
| "learning_rate": 3.206562266964952e-06, |
| "loss": 4.1584, |
| "step": 4265 |
| }, |
| { |
| "epoch": 0.9440636745522883, |
| "grad_norm": 3.3569135665893555, |
| "learning_rate": 3.144419587372607e-06, |
| "loss": 4.2849, |
| "step": 4270 |
| }, |
| { |
| "epoch": 0.9451691355295158, |
| "grad_norm": 3.2201907634735107, |
| "learning_rate": 3.0822769077802638e-06, |
| "loss": 4.1318, |
| "step": 4275 |
| }, |
| { |
| "epoch": 0.9462745965067433, |
| "grad_norm": 3.078237771987915, |
| "learning_rate": 3.02013422818792e-06, |
| "loss": 4.2257, |
| "step": 4280 |
| }, |
| { |
| "epoch": 0.9473800574839708, |
| "grad_norm": 2.9291415214538574, |
| "learning_rate": 2.9579915485955753e-06, |
| "loss": 4.397, |
| "step": 4285 |
| }, |
| { |
| "epoch": 0.9484855184611983, |
| "grad_norm": 3.3114891052246094, |
| "learning_rate": 2.8958488690032314e-06, |
| "loss": 4.1599, |
| "step": 4290 |
| }, |
| { |
| "epoch": 0.9495909794384259, |
| "grad_norm": 3.3049850463867188, |
| "learning_rate": 2.8337061894108876e-06, |
| "loss": 4.2123, |
| "step": 4295 |
| }, |
| { |
| "epoch": 0.9506964404156534, |
| "grad_norm": 2.979609251022339, |
| "learning_rate": 2.7715635098185434e-06, |
| "loss": 4.1817, |
| "step": 4300 |
| }, |
| { |
| "epoch": 0.9518019013928808, |
| "grad_norm": 3.1335394382476807, |
| "learning_rate": 2.7094208302261995e-06, |
| "loss": 4.2932, |
| "step": 4305 |
| }, |
| { |
| "epoch": 0.9529073623701083, |
| "grad_norm": 3.3001952171325684, |
| "learning_rate": 2.6472781506338553e-06, |
| "loss": 4.4201, |
| "step": 4310 |
| }, |
| { |
| "epoch": 0.9540128233473358, |
| "grad_norm": 3.1160495281219482, |
| "learning_rate": 2.5851354710415115e-06, |
| "loss": 4.1786, |
| "step": 4315 |
| }, |
| { |
| "epoch": 0.9551182843245634, |
| "grad_norm": 2.8716208934783936, |
| "learning_rate": 2.522992791449167e-06, |
| "loss": 3.9942, |
| "step": 4320 |
| }, |
| { |
| "epoch": 0.9562237453017909, |
| "grad_norm": 3.0611040592193604, |
| "learning_rate": 2.4608501118568234e-06, |
| "loss": 4.4118, |
| "step": 4325 |
| }, |
| { |
| "epoch": 0.9573292062790183, |
| "grad_norm": 2.9500648975372314, |
| "learning_rate": 2.3987074322644795e-06, |
| "loss": 4.305, |
| "step": 4330 |
| }, |
| { |
| "epoch": 0.9584346672562458, |
| "grad_norm": 3.5862972736358643, |
| "learning_rate": 2.3365647526721353e-06, |
| "loss": 4.3046, |
| "step": 4335 |
| }, |
| { |
| "epoch": 0.9595401282334733, |
| "grad_norm": 3.304366111755371, |
| "learning_rate": 2.274422073079791e-06, |
| "loss": 4.3483, |
| "step": 4340 |
| }, |
| { |
| "epoch": 0.9606455892107009, |
| "grad_norm": 3.4040110111236572, |
| "learning_rate": 2.2122793934874472e-06, |
| "loss": 4.2975, |
| "step": 4345 |
| }, |
| { |
| "epoch": 0.9617510501879284, |
| "grad_norm": 3.197815179824829, |
| "learning_rate": 2.1501367138951034e-06, |
| "loss": 4.3031, |
| "step": 4350 |
| }, |
| { |
| "epoch": 0.9628565111651559, |
| "grad_norm": 3.365293502807617, |
| "learning_rate": 2.087994034302759e-06, |
| "loss": 4.2018, |
| "step": 4355 |
| }, |
| { |
| "epoch": 0.9639619721423833, |
| "grad_norm": 3.179311990737915, |
| "learning_rate": 2.0258513547104153e-06, |
| "loss": 4.3385, |
| "step": 4360 |
| }, |
| { |
| "epoch": 0.9650674331196109, |
| "grad_norm": 3.1740834712982178, |
| "learning_rate": 1.963708675118071e-06, |
| "loss": 4.4034, |
| "step": 4365 |
| }, |
| { |
| "epoch": 0.9661728940968384, |
| "grad_norm": 3.0727176666259766, |
| "learning_rate": 1.901565995525727e-06, |
| "loss": 4.2515, |
| "step": 4370 |
| }, |
| { |
| "epoch": 0.9672783550740659, |
| "grad_norm": 2.9758899211883545, |
| "learning_rate": 1.8394233159333832e-06, |
| "loss": 4.1974, |
| "step": 4375 |
| }, |
| { |
| "epoch": 0.9683838160512934, |
| "grad_norm": 3.014615774154663, |
| "learning_rate": 1.7772806363410391e-06, |
| "loss": 4.3097, |
| "step": 4380 |
| }, |
| { |
| "epoch": 0.969489277028521, |
| "grad_norm": 3.5511038303375244, |
| "learning_rate": 1.7151379567486951e-06, |
| "loss": 4.2784, |
| "step": 4385 |
| }, |
| { |
| "epoch": 0.9705947380057484, |
| "grad_norm": 2.977102518081665, |
| "learning_rate": 1.6529952771563513e-06, |
| "loss": 4.2234, |
| "step": 4390 |
| }, |
| { |
| "epoch": 0.9717001989829759, |
| "grad_norm": 2.964914083480835, |
| "learning_rate": 1.5908525975640068e-06, |
| "loss": 4.1375, |
| "step": 4395 |
| }, |
| { |
| "epoch": 0.9728056599602034, |
| "grad_norm": 2.916311025619507, |
| "learning_rate": 1.528709917971663e-06, |
| "loss": 4.1116, |
| "step": 4400 |
| }, |
| { |
| "epoch": 0.9739111209374309, |
| "grad_norm": 3.3200995922088623, |
| "learning_rate": 1.466567238379319e-06, |
| "loss": 4.3596, |
| "step": 4405 |
| }, |
| { |
| "epoch": 0.9750165819146585, |
| "grad_norm": 3.0481033325195312, |
| "learning_rate": 1.4044245587869751e-06, |
| "loss": 4.303, |
| "step": 4410 |
| }, |
| { |
| "epoch": 0.9761220428918859, |
| "grad_norm": 3.04089617729187, |
| "learning_rate": 1.3422818791946309e-06, |
| "loss": 4.3629, |
| "step": 4415 |
| }, |
| { |
| "epoch": 0.9772275038691134, |
| "grad_norm": 3.03387713432312, |
| "learning_rate": 1.280139199602287e-06, |
| "loss": 4.2679, |
| "step": 4420 |
| }, |
| { |
| "epoch": 0.9783329648463409, |
| "grad_norm": 3.1632862091064453, |
| "learning_rate": 1.2179965200099428e-06, |
| "loss": 4.153, |
| "step": 4425 |
| }, |
| { |
| "epoch": 0.9794384258235684, |
| "grad_norm": 3.382652759552002, |
| "learning_rate": 1.1558538404175988e-06, |
| "loss": 4.1147, |
| "step": 4430 |
| }, |
| { |
| "epoch": 0.980543886800796, |
| "grad_norm": 3.4399046897888184, |
| "learning_rate": 1.093711160825255e-06, |
| "loss": 4.2737, |
| "step": 4435 |
| }, |
| { |
| "epoch": 0.9816493477780235, |
| "grad_norm": 3.3583288192749023, |
| "learning_rate": 1.0315684812329107e-06, |
| "loss": 4.2274, |
| "step": 4440 |
| }, |
| { |
| "epoch": 0.9827548087552509, |
| "grad_norm": 3.291776657104492, |
| "learning_rate": 9.694258016405668e-07, |
| "loss": 4.1284, |
| "step": 4445 |
| }, |
| { |
| "epoch": 0.9838602697324784, |
| "grad_norm": 3.148688554763794, |
| "learning_rate": 9.072831220482228e-07, |
| "loss": 4.3734, |
| "step": 4450 |
| }, |
| { |
| "epoch": 0.9849657307097059, |
| "grad_norm": 2.98494553565979, |
| "learning_rate": 8.451404424558787e-07, |
| "loss": 4.2998, |
| "step": 4455 |
| }, |
| { |
| "epoch": 0.9860711916869335, |
| "grad_norm": 3.550734043121338, |
| "learning_rate": 7.829977628635347e-07, |
| "loss": 4.131, |
| "step": 4460 |
| }, |
| { |
| "epoch": 0.987176652664161, |
| "grad_norm": 3.148184299468994, |
| "learning_rate": 7.208550832711907e-07, |
| "loss": 4.211, |
| "step": 4465 |
| }, |
| { |
| "epoch": 0.9882821136413884, |
| "grad_norm": 3.389477491378784, |
| "learning_rate": 6.587124036788466e-07, |
| "loss": 4.3192, |
| "step": 4470 |
| }, |
| { |
| "epoch": 0.9893875746186159, |
| "grad_norm": 2.744230031967163, |
| "learning_rate": 5.965697240865026e-07, |
| "loss": 4.3994, |
| "step": 4475 |
| }, |
| { |
| "epoch": 0.9904930355958435, |
| "grad_norm": 3.189837694168091, |
| "learning_rate": 5.344270444941587e-07, |
| "loss": 4.3435, |
| "step": 4480 |
| }, |
| { |
| "epoch": 0.991598496573071, |
| "grad_norm": 3.2491848468780518, |
| "learning_rate": 4.722843649018146e-07, |
| "loss": 4.3766, |
| "step": 4485 |
| }, |
| { |
| "epoch": 0.9927039575502985, |
| "grad_norm": 3.1869592666625977, |
| "learning_rate": 4.1014168530947054e-07, |
| "loss": 4.393, |
| "step": 4490 |
| }, |
| { |
| "epoch": 0.993809418527526, |
| "grad_norm": 3.4105918407440186, |
| "learning_rate": 3.4799900571712656e-07, |
| "loss": 4.2419, |
| "step": 4495 |
| }, |
| { |
| "epoch": 0.9949148795047534, |
| "grad_norm": 3.1611382961273193, |
| "learning_rate": 2.858563261247825e-07, |
| "loss": 4.3572, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.996020340481981, |
| "grad_norm": 3.0471818447113037, |
| "learning_rate": 2.2371364653243848e-07, |
| "loss": 4.3163, |
| "step": 4505 |
| }, |
| { |
| "epoch": 0.9971258014592085, |
| "grad_norm": 2.9979894161224365, |
| "learning_rate": 1.6157096694009447e-07, |
| "loss": 4.1885, |
| "step": 4510 |
| }, |
| { |
| "epoch": 0.998231262436436, |
| "grad_norm": 3.4176154136657715, |
| "learning_rate": 9.942828734775043e-08, |
| "loss": 4.3076, |
| "step": 4515 |
| }, |
| { |
| "epoch": 0.9993367234136635, |
| "grad_norm": 3.594446897506714, |
| "learning_rate": 3.728560775540641e-08, |
| "loss": 4.184, |
| "step": 4520 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 4523, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.7364382421434368e+16, |
| "train_batch_size": 64, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|