| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.0, | |
| "eval_steps": 500, | |
| "global_step": 3099, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0048408568316592035, | |
| "grad_norm": 178.71523526371087, | |
| "learning_rate": 6.451612903225807e-07, | |
| "loss": 15.8295, | |
| "num_input_tokens_seen": 1971904, | |
| "step": 5, | |
| "train_runtime": 965.9569, | |
| "train_tokens_per_second": 2041.4 | |
| }, | |
| { | |
| "epoch": 0.009681713663318407, | |
| "grad_norm": 157.57466219746485, | |
| "learning_rate": 1.4516129032258066e-06, | |
| "loss": 15.3582, | |
| "num_input_tokens_seen": 4038576, | |
| "step": 10, | |
| "train_runtime": 1957.3451, | |
| "train_tokens_per_second": 2063.293 | |
| }, | |
| { | |
| "epoch": 0.014522570494977611, | |
| "grad_norm": 156.50950938238927, | |
| "learning_rate": 2.2580645161290324e-06, | |
| "loss": 12.8373, | |
| "num_input_tokens_seen": 5891616, | |
| "step": 15, | |
| "train_runtime": 2767.3168, | |
| "train_tokens_per_second": 2128.999 | |
| }, | |
| { | |
| "epoch": 0.019363427326636814, | |
| "grad_norm": 114.22743265072562, | |
| "learning_rate": 3.064516129032258e-06, | |
| "loss": 9.4205, | |
| "num_input_tokens_seen": 7849472, | |
| "step": 20, | |
| "train_runtime": 3522.7348, | |
| "train_tokens_per_second": 2228.232 | |
| }, | |
| { | |
| "epoch": 0.024204284158296018, | |
| "grad_norm": 135.40188397844682, | |
| "learning_rate": 3.870967741935484e-06, | |
| "loss": 7.6571, | |
| "num_input_tokens_seen": 9689648, | |
| "step": 25, | |
| "train_runtime": 3820.9748, | |
| "train_tokens_per_second": 2535.91 | |
| }, | |
| { | |
| "epoch": 0.029045140989955223, | |
| "grad_norm": 210.55574450271726, | |
| "learning_rate": 4.67741935483871e-06, | |
| "loss": 6.7016, | |
| "num_input_tokens_seen": 11652144, | |
| "step": 30, | |
| "train_runtime": 4196.5025, | |
| "train_tokens_per_second": 2776.632 | |
| }, | |
| { | |
| "epoch": 0.03388599782161442, | |
| "grad_norm": 208.41807492778292, | |
| "learning_rate": 5.483870967741936e-06, | |
| "loss": 5.8834, | |
| "num_input_tokens_seen": 13544176, | |
| "step": 35, | |
| "train_runtime": 4483.8305, | |
| "train_tokens_per_second": 3020.671 | |
| }, | |
| { | |
| "epoch": 0.03872685465327363, | |
| "grad_norm": 202.64781308250403, | |
| "learning_rate": 6.290322580645161e-06, | |
| "loss": 5.0909, | |
| "num_input_tokens_seen": 15315344, | |
| "step": 40, | |
| "train_runtime": 4739.2398, | |
| "train_tokens_per_second": 3231.604 | |
| }, | |
| { | |
| "epoch": 0.04356771148493283, | |
| "grad_norm": 208.02120960809935, | |
| "learning_rate": 7.096774193548387e-06, | |
| "loss": 4.3397, | |
| "num_input_tokens_seen": 17094640, | |
| "step": 45, | |
| "train_runtime": 4992.3854, | |
| "train_tokens_per_second": 3424.143 | |
| }, | |
| { | |
| "epoch": 0.048408568316592036, | |
| "grad_norm": 200.84606442004377, | |
| "learning_rate": 7.903225806451613e-06, | |
| "loss": 3.5344, | |
| "num_input_tokens_seen": 18990000, | |
| "step": 50, | |
| "train_runtime": 5321.5443, | |
| "train_tokens_per_second": 3568.513 | |
| }, | |
| { | |
| "epoch": 0.05324942514825124, | |
| "grad_norm": 180.4972165263306, | |
| "learning_rate": 8.70967741935484e-06, | |
| "loss": 2.5445, | |
| "num_input_tokens_seen": 20854912, | |
| "step": 55, | |
| "train_runtime": 5860.7159, | |
| "train_tokens_per_second": 3558.424 | |
| }, | |
| { | |
| "epoch": 0.058090281979910445, | |
| "grad_norm": 142.268336355056, | |
| "learning_rate": 9.516129032258064e-06, | |
| "loss": 1.5114, | |
| "num_input_tokens_seen": 22734480, | |
| "step": 60, | |
| "train_runtime": 6689.9051, | |
| "train_tokens_per_second": 3398.326 | |
| }, | |
| { | |
| "epoch": 0.06293113881156964, | |
| "grad_norm": 57.87038800361589, | |
| "learning_rate": 1.0322580645161291e-05, | |
| "loss": 0.681, | |
| "num_input_tokens_seen": 24705984, | |
| "step": 65, | |
| "train_runtime": 7695.5578, | |
| "train_tokens_per_second": 3210.421 | |
| }, | |
| { | |
| "epoch": 0.06777199564322885, | |
| "grad_norm": 11.382633199665975, | |
| "learning_rate": 1.1129032258064517e-05, | |
| "loss": 0.3083, | |
| "num_input_tokens_seen": 26623312, | |
| "step": 70, | |
| "train_runtime": 8523.5587, | |
| "train_tokens_per_second": 3123.497 | |
| }, | |
| { | |
| "epoch": 0.07261285247488805, | |
| "grad_norm": 1.7740955778794014, | |
| "learning_rate": 1.1935483870967743e-05, | |
| "loss": 0.2031, | |
| "num_input_tokens_seen": 28560464, | |
| "step": 75, | |
| "train_runtime": 9449.2243, | |
| "train_tokens_per_second": 3022.519 | |
| }, | |
| { | |
| "epoch": 0.07745370930654726, | |
| "grad_norm": 2.7185275952966643, | |
| "learning_rate": 1.2741935483870968e-05, | |
| "loss": 0.1776, | |
| "num_input_tokens_seen": 30353664, | |
| "step": 80, | |
| "train_runtime": 10242.2399, | |
| "train_tokens_per_second": 2963.577 | |
| }, | |
| { | |
| "epoch": 0.08229456613820646, | |
| "grad_norm": 11.173889555440166, | |
| "learning_rate": 1.3548387096774195e-05, | |
| "loss": 0.1556, | |
| "num_input_tokens_seen": 32276544, | |
| "step": 85, | |
| "train_runtime": 11127.6455, | |
| "train_tokens_per_second": 2900.573 | |
| }, | |
| { | |
| "epoch": 0.08713542296986566, | |
| "grad_norm": 0.43982142593021933, | |
| "learning_rate": 1.4354838709677421e-05, | |
| "loss": 0.1393, | |
| "num_input_tokens_seen": 34137648, | |
| "step": 90, | |
| "train_runtime": 11810.6178, | |
| "train_tokens_per_second": 2890.42 | |
| }, | |
| { | |
| "epoch": 0.09197627980152487, | |
| "grad_norm": 0.41292625320897214, | |
| "learning_rate": 1.5161290322580646e-05, | |
| "loss": 0.1302, | |
| "num_input_tokens_seen": 35998352, | |
| "step": 95, | |
| "train_runtime": 12094.7097, | |
| "train_tokens_per_second": 2976.372 | |
| }, | |
| { | |
| "epoch": 0.09681713663318407, | |
| "grad_norm": 0.2858556913615977, | |
| "learning_rate": 1.596774193548387e-05, | |
| "loss": 0.1201, | |
| "num_input_tokens_seen": 37869184, | |
| "step": 100, | |
| "train_runtime": 12394.8839, | |
| "train_tokens_per_second": 3055.227 | |
| }, | |
| { | |
| "epoch": 0.10165799346484328, | |
| "grad_norm": 0.30648690802872136, | |
| "learning_rate": 1.6774193548387098e-05, | |
| "loss": 0.1209, | |
| "num_input_tokens_seen": 39694272, | |
| "step": 105, | |
| "train_runtime": 12703.266, | |
| "train_tokens_per_second": 3124.73 | |
| }, | |
| { | |
| "epoch": 0.10649885029650248, | |
| "grad_norm": 0.2626276819380433, | |
| "learning_rate": 1.7580645161290325e-05, | |
| "loss": 0.1209, | |
| "num_input_tokens_seen": 41530000, | |
| "step": 110, | |
| "train_runtime": 12989.8073, | |
| "train_tokens_per_second": 3197.122 | |
| }, | |
| { | |
| "epoch": 0.11133970712816169, | |
| "grad_norm": 0.25596054422612646, | |
| "learning_rate": 1.838709677419355e-05, | |
| "loss": 0.1105, | |
| "num_input_tokens_seen": 43396048, | |
| "step": 115, | |
| "train_runtime": 13306.8176, | |
| "train_tokens_per_second": 3261.189 | |
| }, | |
| { | |
| "epoch": 0.11618056395982089, | |
| "grad_norm": 0.233381326668112, | |
| "learning_rate": 1.9193548387096774e-05, | |
| "loss": 0.1074, | |
| "num_input_tokens_seen": 45343968, | |
| "step": 120, | |
| "train_runtime": 14110.9269, | |
| "train_tokens_per_second": 3213.394 | |
| }, | |
| { | |
| "epoch": 0.1210214207914801, | |
| "grad_norm": 0.21251277353342254, | |
| "learning_rate": 2e-05, | |
| "loss": 0.1037, | |
| "num_input_tokens_seen": 47322080, | |
| "step": 125, | |
| "train_runtime": 15041.1565, | |
| "train_tokens_per_second": 3146.173 | |
| }, | |
| { | |
| "epoch": 0.12586227762313928, | |
| "grad_norm": 0.2127294082203629, | |
| "learning_rate": 2.0806451612903226e-05, | |
| "loss": 0.1062, | |
| "num_input_tokens_seen": 49256592, | |
| "step": 130, | |
| "train_runtime": 16039.8532, | |
| "train_tokens_per_second": 3070.888 | |
| }, | |
| { | |
| "epoch": 0.1307031344547985, | |
| "grad_norm": 0.37877204299340766, | |
| "learning_rate": 2.1612903225806454e-05, | |
| "loss": 0.1022, | |
| "num_input_tokens_seen": 51154720, | |
| "step": 135, | |
| "train_runtime": 16882.3542, | |
| "train_tokens_per_second": 3030.07 | |
| }, | |
| { | |
| "epoch": 0.1355439912864577, | |
| "grad_norm": 0.23637714784511715, | |
| "learning_rate": 2.2419354838709678e-05, | |
| "loss": 0.0953, | |
| "num_input_tokens_seen": 53264704, | |
| "step": 140, | |
| "train_runtime": 18022.3173, | |
| "train_tokens_per_second": 2955.486 | |
| }, | |
| { | |
| "epoch": 0.1403848481181169, | |
| "grad_norm": 0.25592037593094186, | |
| "learning_rate": 2.3225806451612906e-05, | |
| "loss": 0.1071, | |
| "num_input_tokens_seen": 55099456, | |
| "step": 145, | |
| "train_runtime": 18802.202, | |
| "train_tokens_per_second": 2930.479 | |
| }, | |
| { | |
| "epoch": 0.1452257049497761, | |
| "grad_norm": 0.24437969699285353, | |
| "learning_rate": 2.403225806451613e-05, | |
| "loss": 0.1, | |
| "num_input_tokens_seen": 57033824, | |
| "step": 150, | |
| "train_runtime": 19705.9937, | |
| "train_tokens_per_second": 2894.237 | |
| }, | |
| { | |
| "epoch": 0.15006656178143532, | |
| "grad_norm": 0.21535509944605138, | |
| "learning_rate": 2.4838709677419354e-05, | |
| "loss": 0.1024, | |
| "num_input_tokens_seen": 58876656, | |
| "step": 155, | |
| "train_runtime": 20511.3799, | |
| "train_tokens_per_second": 2870.439 | |
| }, | |
| { | |
| "epoch": 0.1549074186130945, | |
| "grad_norm": 0.2173650300325744, | |
| "learning_rate": 2.5645161290322582e-05, | |
| "loss": 0.0978, | |
| "num_input_tokens_seen": 60877568, | |
| "step": 160, | |
| "train_runtime": 21475.5689, | |
| "train_tokens_per_second": 2834.736 | |
| }, | |
| { | |
| "epoch": 0.15974827544475373, | |
| "grad_norm": 0.22756324955154553, | |
| "learning_rate": 2.645161290322581e-05, | |
| "loss": 0.1081, | |
| "num_input_tokens_seen": 62756608, | |
| "step": 165, | |
| "train_runtime": 22319.9154, | |
| "train_tokens_per_second": 2811.687 | |
| }, | |
| { | |
| "epoch": 0.16458913227641292, | |
| "grad_norm": 0.17479824979429293, | |
| "learning_rate": 2.7258064516129034e-05, | |
| "loss": 0.0934, | |
| "num_input_tokens_seen": 64688544, | |
| "step": 170, | |
| "train_runtime": 23223.6296, | |
| "train_tokens_per_second": 2785.462 | |
| }, | |
| { | |
| "epoch": 0.16942998910807214, | |
| "grad_norm": 0.18928070472014152, | |
| "learning_rate": 2.806451612903226e-05, | |
| "loss": 0.0956, | |
| "num_input_tokens_seen": 66503488, | |
| "step": 175, | |
| "train_runtime": 23940.3182, | |
| "train_tokens_per_second": 2777.887 | |
| }, | |
| { | |
| "epoch": 0.17427084593973133, | |
| "grad_norm": 0.19529886904869875, | |
| "learning_rate": 2.8870967741935483e-05, | |
| "loss": 0.0945, | |
| "num_input_tokens_seen": 68426064, | |
| "step": 180, | |
| "train_runtime": 24825.8993, | |
| "train_tokens_per_second": 2756.237 | |
| }, | |
| { | |
| "epoch": 0.17911170277139055, | |
| "grad_norm": 0.19453179688267283, | |
| "learning_rate": 2.967741935483871e-05, | |
| "loss": 0.0957, | |
| "num_input_tokens_seen": 70300224, | |
| "step": 185, | |
| "train_runtime": 25652.738, | |
| "train_tokens_per_second": 2740.457 | |
| }, | |
| { | |
| "epoch": 0.18395255960304974, | |
| "grad_norm": 0.21743752742438213, | |
| "learning_rate": 3.0483870967741935e-05, | |
| "loss": 0.1048, | |
| "num_input_tokens_seen": 72153584, | |
| "step": 190, | |
| "train_runtime": 26532.9803, | |
| "train_tokens_per_second": 2719.392 | |
| }, | |
| { | |
| "epoch": 0.18879341643470895, | |
| "grad_norm": 0.17245846000709703, | |
| "learning_rate": 3.1290322580645166e-05, | |
| "loss": 0.1038, | |
| "num_input_tokens_seen": 73989040, | |
| "step": 195, | |
| "train_runtime": 27354.3265, | |
| "train_tokens_per_second": 2704.839 | |
| }, | |
| { | |
| "epoch": 0.19363427326636815, | |
| "grad_norm": 0.18251260903855057, | |
| "learning_rate": 3.2096774193548393e-05, | |
| "loss": 0.106, | |
| "num_input_tokens_seen": 75844448, | |
| "step": 200, | |
| "train_runtime": 28180.8862, | |
| "train_tokens_per_second": 2691.344 | |
| }, | |
| { | |
| "epoch": 0.19847513009802736, | |
| "grad_norm": 0.17893448735702916, | |
| "learning_rate": 3.2903225806451614e-05, | |
| "loss": 0.0979, | |
| "num_input_tokens_seen": 77627520, | |
| "step": 205, | |
| "train_runtime": 28955.3576, | |
| "train_tokens_per_second": 2680.938 | |
| }, | |
| { | |
| "epoch": 0.20331598692968655, | |
| "grad_norm": 0.20201575024600307, | |
| "learning_rate": 3.370967741935484e-05, | |
| "loss": 0.0953, | |
| "num_input_tokens_seen": 79416736, | |
| "step": 210, | |
| "train_runtime": 29709.3856, | |
| "train_tokens_per_second": 2673.119 | |
| }, | |
| { | |
| "epoch": 0.20815684376134574, | |
| "grad_norm": 0.1752100891136059, | |
| "learning_rate": 3.451612903225806e-05, | |
| "loss": 0.0936, | |
| "num_input_tokens_seen": 81375920, | |
| "step": 215, | |
| "train_runtime": 30576.7229, | |
| "train_tokens_per_second": 2661.368 | |
| }, | |
| { | |
| "epoch": 0.21299770059300496, | |
| "grad_norm": 0.1691448592145883, | |
| "learning_rate": 3.532258064516129e-05, | |
| "loss": 0.0954, | |
| "num_input_tokens_seen": 83230304, | |
| "step": 220, | |
| "train_runtime": 31349.3154, | |
| "train_tokens_per_second": 2654.932 | |
| }, | |
| { | |
| "epoch": 0.21783855742466415, | |
| "grad_norm": 0.19940515973456982, | |
| "learning_rate": 3.612903225806452e-05, | |
| "loss": 0.095, | |
| "num_input_tokens_seen": 85271952, | |
| "step": 225, | |
| "train_runtime": 32261.937, | |
| "train_tokens_per_second": 2643.113 | |
| }, | |
| { | |
| "epoch": 0.22267941425632337, | |
| "grad_norm": 0.20741829321407812, | |
| "learning_rate": 3.6935483870967746e-05, | |
| "loss": 0.0898, | |
| "num_input_tokens_seen": 87088336, | |
| "step": 230, | |
| "train_runtime": 33023.2573, | |
| "train_tokens_per_second": 2637.182 | |
| }, | |
| { | |
| "epoch": 0.22752027108798256, | |
| "grad_norm": 0.19208547326415293, | |
| "learning_rate": 3.7741935483870974e-05, | |
| "loss": 0.1018, | |
| "num_input_tokens_seen": 88804864, | |
| "step": 235, | |
| "train_runtime": 33715.7377, | |
| "train_tokens_per_second": 2633.929 | |
| }, | |
| { | |
| "epoch": 0.23236112791964178, | |
| "grad_norm": 0.1778620676104918, | |
| "learning_rate": 3.8548387096774195e-05, | |
| "loss": 0.1003, | |
| "num_input_tokens_seen": 90678512, | |
| "step": 240, | |
| "train_runtime": 34461.3908, | |
| "train_tokens_per_second": 2631.307 | |
| }, | |
| { | |
| "epoch": 0.23720198475130097, | |
| "grad_norm": 0.19195813050583843, | |
| "learning_rate": 3.935483870967742e-05, | |
| "loss": 0.1012, | |
| "num_input_tokens_seen": 92516880, | |
| "step": 245, | |
| "train_runtime": 35260.5603, | |
| "train_tokens_per_second": 2623.806 | |
| }, | |
| { | |
| "epoch": 0.2420428415829602, | |
| "grad_norm": 0.1935559412076288, | |
| "learning_rate": 4.016129032258065e-05, | |
| "loss": 0.0944, | |
| "num_input_tokens_seen": 94387008, | |
| "step": 250, | |
| "train_runtime": 36080.7203, | |
| "train_tokens_per_second": 2615.996 | |
| }, | |
| { | |
| "epoch": 0.24688369841461938, | |
| "grad_norm": 0.1871264431371236, | |
| "learning_rate": 4.096774193548387e-05, | |
| "loss": 0.0952, | |
| "num_input_tokens_seen": 96291040, | |
| "step": 255, | |
| "train_runtime": 36929.8595, | |
| "train_tokens_per_second": 2607.403 | |
| }, | |
| { | |
| "epoch": 0.25172455524627857, | |
| "grad_norm": 0.16780208844120814, | |
| "learning_rate": 4.17741935483871e-05, | |
| "loss": 0.1029, | |
| "num_input_tokens_seen": 98122448, | |
| "step": 260, | |
| "train_runtime": 37715.1617, | |
| "train_tokens_per_second": 2601.671 | |
| }, | |
| { | |
| "epoch": 0.2565654120779378, | |
| "grad_norm": 0.15596752998410582, | |
| "learning_rate": 4.258064516129032e-05, | |
| "loss": 0.0955, | |
| "num_input_tokens_seen": 100020384, | |
| "step": 265, | |
| "train_runtime": 38487.5175, | |
| "train_tokens_per_second": 2598.775 | |
| }, | |
| { | |
| "epoch": 0.261406268909597, | |
| "grad_norm": 0.1887794985586671, | |
| "learning_rate": 4.3387096774193554e-05, | |
| "loss": 0.0966, | |
| "num_input_tokens_seen": 101811184, | |
| "step": 270, | |
| "train_runtime": 39201.4963, | |
| "train_tokens_per_second": 2597.125 | |
| }, | |
| { | |
| "epoch": 0.2662471257412562, | |
| "grad_norm": 0.21763986935352733, | |
| "learning_rate": 4.4193548387096775e-05, | |
| "loss": 0.0977, | |
| "num_input_tokens_seen": 103544960, | |
| "step": 275, | |
| "train_runtime": 39839.3963, | |
| "train_tokens_per_second": 2599.059 | |
| }, | |
| { | |
| "epoch": 0.2710879825729154, | |
| "grad_norm": 0.1422251923467383, | |
| "learning_rate": 4.5e-05, | |
| "loss": 0.0941, | |
| "num_input_tokens_seen": 105431200, | |
| "step": 280, | |
| "train_runtime": 40502.3645, | |
| "train_tokens_per_second": 2603.088 | |
| }, | |
| { | |
| "epoch": 0.27592883940457463, | |
| "grad_norm": 0.19060031607841202, | |
| "learning_rate": 4.580645161290323e-05, | |
| "loss": 0.0973, | |
| "num_input_tokens_seen": 107368704, | |
| "step": 285, | |
| "train_runtime": 40864.2265, | |
| "train_tokens_per_second": 2627.45 | |
| }, | |
| { | |
| "epoch": 0.2807696962362338, | |
| "grad_norm": 0.15547346024027353, | |
| "learning_rate": 4.661290322580645e-05, | |
| "loss": 0.0949, | |
| "num_input_tokens_seen": 109257200, | |
| "step": 290, | |
| "train_runtime": 41191.3176, | |
| "train_tokens_per_second": 2652.433 | |
| }, | |
| { | |
| "epoch": 0.285610553067893, | |
| "grad_norm": 0.18137966749300305, | |
| "learning_rate": 4.741935483870968e-05, | |
| "loss": 0.0953, | |
| "num_input_tokens_seen": 111150704, | |
| "step": 295, | |
| "train_runtime": 41496.46, | |
| "train_tokens_per_second": 2678.559 | |
| }, | |
| { | |
| "epoch": 0.2904514098995522, | |
| "grad_norm": 0.1726911372012115, | |
| "learning_rate": 4.822580645161291e-05, | |
| "loss": 0.097, | |
| "num_input_tokens_seen": 113034512, | |
| "step": 300, | |
| "train_runtime": 41825.6854, | |
| "train_tokens_per_second": 2702.514 | |
| }, | |
| { | |
| "epoch": 0.29529226673121145, | |
| "grad_norm": 0.14521468889499597, | |
| "learning_rate": 4.903225806451613e-05, | |
| "loss": 0.0897, | |
| "num_input_tokens_seen": 114937056, | |
| "step": 305, | |
| "train_runtime": 42162.0241, | |
| "train_tokens_per_second": 2726.08 | |
| }, | |
| { | |
| "epoch": 0.30013312356287064, | |
| "grad_norm": 0.15231521973585768, | |
| "learning_rate": 4.983870967741936e-05, | |
| "loss": 0.0887, | |
| "num_input_tokens_seen": 116955552, | |
| "step": 310, | |
| "train_runtime": 42539.7887, | |
| "train_tokens_per_second": 2749.321 | |
| }, | |
| { | |
| "epoch": 0.30497398039452983, | |
| "grad_norm": 0.1534116557864518, | |
| "learning_rate": 4.999974623485097e-05, | |
| "loss": 0.0981, | |
| "num_input_tokens_seen": 118830688, | |
| "step": 315, | |
| "train_runtime": 42871.2685, | |
| "train_tokens_per_second": 2771.802 | |
| }, | |
| { | |
| "epoch": 0.309814837226189, | |
| "grad_norm": 0.1686940018305674, | |
| "learning_rate": 4.999871532276243e-05, | |
| "loss": 0.0974, | |
| "num_input_tokens_seen": 120691504, | |
| "step": 320, | |
| "train_runtime": 43165.3478, | |
| "train_tokens_per_second": 2796.028 | |
| }, | |
| { | |
| "epoch": 0.3146556940578482, | |
| "grad_norm": 0.16433905364856274, | |
| "learning_rate": 4.999689143608857e-05, | |
| "loss": 0.0948, | |
| "num_input_tokens_seen": 122560384, | |
| "step": 325, | |
| "train_runtime": 43458.1159, | |
| "train_tokens_per_second": 2820.196 | |
| }, | |
| { | |
| "epoch": 0.31949655088950746, | |
| "grad_norm": 0.14174604664203966, | |
| "learning_rate": 4.999427463268419e-05, | |
| "loss": 0.0901, | |
| "num_input_tokens_seen": 124493200, | |
| "step": 330, | |
| "train_runtime": 43797.1282, | |
| "train_tokens_per_second": 2842.497 | |
| }, | |
| { | |
| "epoch": 0.32433740772116665, | |
| "grad_norm": 0.1695048202968491, | |
| "learning_rate": 4.999086499555591e-05, | |
| "loss": 0.09, | |
| "num_input_tokens_seen": 126353344, | |
| "step": 335, | |
| "train_runtime": 44105.2533, | |
| "train_tokens_per_second": 2864.814 | |
| }, | |
| { | |
| "epoch": 0.32917826455282584, | |
| "grad_norm": 0.17629027656795873, | |
| "learning_rate": 4.9986662632859504e-05, | |
| "loss": 0.0943, | |
| "num_input_tokens_seen": 128177040, | |
| "step": 340, | |
| "train_runtime": 44411.4535, | |
| "train_tokens_per_second": 2886.126 | |
| }, | |
| { | |
| "epoch": 0.33401912138448503, | |
| "grad_norm": 0.16367775596487932, | |
| "learning_rate": 4.9981667677896506e-05, | |
| "loss": 0.0921, | |
| "num_input_tokens_seen": 129970848, | |
| "step": 345, | |
| "train_runtime": 44670.9184, | |
| "train_tokens_per_second": 2909.518 | |
| }, | |
| { | |
| "epoch": 0.3388599782161443, | |
| "grad_norm": 0.14429236107461654, | |
| "learning_rate": 4.997588028910993e-05, | |
| "loss": 0.0933, | |
| "num_input_tokens_seen": 131934512, | |
| "step": 350, | |
| "train_runtime": 45023.4447, | |
| "train_tokens_per_second": 2930.351 | |
| }, | |
| { | |
| "epoch": 0.34370083504780347, | |
| "grad_norm": 0.14127408264764416, | |
| "learning_rate": 4.996930065007932e-05, | |
| "loss": 0.0919, | |
| "num_input_tokens_seen": 133828000, | |
| "step": 355, | |
| "train_runtime": 45336.3241, | |
| "train_tokens_per_second": 2951.893 | |
| }, | |
| { | |
| "epoch": 0.34854169187946266, | |
| "grad_norm": 0.1480021003298484, | |
| "learning_rate": 4.996192896951485e-05, | |
| "loss": 0.0894, | |
| "num_input_tokens_seen": 135655808, | |
| "step": 360, | |
| "train_runtime": 45602.4522, | |
| "train_tokens_per_second": 2974.748 | |
| }, | |
| { | |
| "epoch": 0.35338254871112185, | |
| "grad_norm": 0.16542232457655492, | |
| "learning_rate": 4.995376548125075e-05, | |
| "loss": 0.089, | |
| "num_input_tokens_seen": 137547952, | |
| "step": 365, | |
| "train_runtime": 45922.7124, | |
| "train_tokens_per_second": 2995.205 | |
| }, | |
| { | |
| "epoch": 0.3582234055427811, | |
| "grad_norm": 0.15136452715359272, | |
| "learning_rate": 4.994481044423788e-05, | |
| "loss": 0.0963, | |
| "num_input_tokens_seen": 139408624, | |
| "step": 370, | |
| "train_runtime": 46232.1379, | |
| "train_tokens_per_second": 3015.405 | |
| }, | |
| { | |
| "epoch": 0.3630642623744403, | |
| "grad_norm": 0.1569038436076336, | |
| "learning_rate": 4.99350641425355e-05, | |
| "loss": 0.098, | |
| "num_input_tokens_seen": 141146720, | |
| "step": 375, | |
| "train_runtime": 46495.1626, | |
| "train_tokens_per_second": 3035.729 | |
| }, | |
| { | |
| "epoch": 0.3679051192060995, | |
| "grad_norm": 0.15583043362247376, | |
| "learning_rate": 4.99245268853023e-05, | |
| "loss": 0.0927, | |
| "num_input_tokens_seen": 142875840, | |
| "step": 380, | |
| "train_runtime": 46725.0644, | |
| "train_tokens_per_second": 3057.799 | |
| }, | |
| { | |
| "epoch": 0.37274597603775866, | |
| "grad_norm": 0.17120656256693706, | |
| "learning_rate": 4.9913199006786515e-05, | |
| "loss": 0.0881, | |
| "num_input_tokens_seen": 144878720, | |
| "step": 385, | |
| "train_runtime": 47082.6128, | |
| "train_tokens_per_second": 3077.117 | |
| }, | |
| { | |
| "epoch": 0.3775868328694179, | |
| "grad_norm": 0.16312830261625755, | |
| "learning_rate": 4.9901080866315416e-05, | |
| "loss": 0.1036, | |
| "num_input_tokens_seen": 146604080, | |
| "step": 390, | |
| "train_runtime": 47321.9205, | |
| "train_tokens_per_second": 3098.016 | |
| }, | |
| { | |
| "epoch": 0.3824276897010771, | |
| "grad_norm": 0.11584886103538528, | |
| "learning_rate": 4.988817284828386e-05, | |
| "loss": 0.0879, | |
| "num_input_tokens_seen": 148505760, | |
| "step": 395, | |
| "train_runtime": 47625.9364, | |
| "train_tokens_per_second": 3118.17 | |
| }, | |
| { | |
| "epoch": 0.3872685465327363, | |
| "grad_norm": 0.1146886026314572, | |
| "learning_rate": 4.9874475362142076e-05, | |
| "loss": 0.0893, | |
| "num_input_tokens_seen": 150426000, | |
| "step": 400, | |
| "train_runtime": 47956.2272, | |
| "train_tokens_per_second": 3136.735 | |
| }, | |
| { | |
| "epoch": 0.3921094033643955, | |
| "grad_norm": 0.12910217232559762, | |
| "learning_rate": 4.9859988842382744e-05, | |
| "loss": 0.0909, | |
| "num_input_tokens_seen": 152243872, | |
| "step": 405, | |
| "train_runtime": 48242.6162, | |
| "train_tokens_per_second": 3155.796 | |
| }, | |
| { | |
| "epoch": 0.3969502601960547, | |
| "grad_norm": 0.15108152268102795, | |
| "learning_rate": 4.984471374852715e-05, | |
| "loss": 0.0947, | |
| "num_input_tokens_seen": 154052240, | |
| "step": 410, | |
| "train_runtime": 48527.6459, | |
| "train_tokens_per_second": 3174.525 | |
| }, | |
| { | |
| "epoch": 0.4017911170277139, | |
| "grad_norm": 0.14679850444524517, | |
| "learning_rate": 4.9828650565110644e-05, | |
| "loss": 0.0907, | |
| "num_input_tokens_seen": 156020992, | |
| "step": 415, | |
| "train_runtime": 48888.6576, | |
| "train_tokens_per_second": 3191.354 | |
| }, | |
| { | |
| "epoch": 0.4066319738593731, | |
| "grad_norm": 0.1327811810612439, | |
| "learning_rate": 4.981179980166726e-05, | |
| "loss": 0.09, | |
| "num_input_tokens_seen": 157936240, | |
| "step": 420, | |
| "train_runtime": 49230.7981, | |
| "train_tokens_per_second": 3208.078 | |
| }, | |
| { | |
| "epoch": 0.4114728306910323, | |
| "grad_norm": 0.14604735675352598, | |
| "learning_rate": 4.979416199271354e-05, | |
| "loss": 0.0903, | |
| "num_input_tokens_seen": 159733040, | |
| "step": 425, | |
| "train_runtime": 49491.6444, | |
| "train_tokens_per_second": 3227.475 | |
| }, | |
| { | |
| "epoch": 0.4163136875226915, | |
| "grad_norm": 0.14435246296113152, | |
| "learning_rate": 4.977573769773162e-05, | |
| "loss": 0.093, | |
| "num_input_tokens_seen": 161662848, | |
| "step": 430, | |
| "train_runtime": 49847.5614, | |
| "train_tokens_per_second": 3243.145 | |
| }, | |
| { | |
| "epoch": 0.42115454435435073, | |
| "grad_norm": 0.14601625994718231, | |
| "learning_rate": 4.975652750115143e-05, | |
| "loss": 0.0953, | |
| "num_input_tokens_seen": 163510512, | |
| "step": 435, | |
| "train_runtime": 50144.9198, | |
| "train_tokens_per_second": 3260.759 | |
| }, | |
| { | |
| "epoch": 0.4259954011860099, | |
| "grad_norm": 0.13966377448924783, | |
| "learning_rate": 4.973653201233219e-05, | |
| "loss": 0.0925, | |
| "num_input_tokens_seen": 165482800, | |
| "step": 440, | |
| "train_runtime": 50546.5604, | |
| "train_tokens_per_second": 3273.869 | |
| }, | |
| { | |
| "epoch": 0.4308362580176691, | |
| "grad_norm": 0.1417296980128132, | |
| "learning_rate": 4.971575186554307e-05, | |
| "loss": 0.0893, | |
| "num_input_tokens_seen": 167426896, | |
| "step": 445, | |
| "train_runtime": 50915.2604, | |
| "train_tokens_per_second": 3288.344 | |
| }, | |
| { | |
| "epoch": 0.4356771148493283, | |
| "grad_norm": 0.13565426401676745, | |
| "learning_rate": 4.969418771994309e-05, | |
| "loss": 0.0943, | |
| "num_input_tokens_seen": 169233824, | |
| "step": 450, | |
| "train_runtime": 51185.0637, | |
| "train_tokens_per_second": 3306.313 | |
| }, | |
| { | |
| "epoch": 0.44051797168098755, | |
| "grad_norm": 0.1454259863391591, | |
| "learning_rate": 4.967184025956015e-05, | |
| "loss": 0.0938, | |
| "num_input_tokens_seen": 171055232, | |
| "step": 455, | |
| "train_runtime": 51458.1503, | |
| "train_tokens_per_second": 3324.162 | |
| }, | |
| { | |
| "epoch": 0.44535882851264674, | |
| "grad_norm": 0.13730748863216408, | |
| "learning_rate": 4.9648710193269424e-05, | |
| "loss": 0.0915, | |
| "num_input_tokens_seen": 172893920, | |
| "step": 460, | |
| "train_runtime": 51744.3108, | |
| "train_tokens_per_second": 3341.313 | |
| }, | |
| { | |
| "epoch": 0.45019968534430593, | |
| "grad_norm": 0.12243741977778125, | |
| "learning_rate": 4.962479825477079e-05, | |
| "loss": 0.0849, | |
| "num_input_tokens_seen": 174782016, | |
| "step": 465, | |
| "train_runtime": 52060.903, | |
| "train_tokens_per_second": 3357.261 | |
| }, | |
| { | |
| "epoch": 0.4550405421759651, | |
| "grad_norm": 0.11854640411216295, | |
| "learning_rate": 4.9600105202565605e-05, | |
| "loss": 0.0886, | |
| "num_input_tokens_seen": 176671136, | |
| "step": 470, | |
| "train_runtime": 52381.9286, | |
| "train_tokens_per_second": 3372.75 | |
| }, | |
| { | |
| "epoch": 0.45988139900762437, | |
| "grad_norm": 0.14968248763796943, | |
| "learning_rate": 4.9574631819932646e-05, | |
| "loss": 0.0924, | |
| "num_input_tokens_seen": 178524848, | |
| "step": 475, | |
| "train_runtime": 52697.5978, | |
| "train_tokens_per_second": 3387.723 | |
| }, | |
| { | |
| "epoch": 0.46472225583928356, | |
| "grad_norm": 0.1435093288248635, | |
| "learning_rate": 4.9548378914903225e-05, | |
| "loss": 0.0872, | |
| "num_input_tokens_seen": 180401664, | |
| "step": 480, | |
| "train_runtime": 53014.3662, | |
| "train_tokens_per_second": 3402.883 | |
| }, | |
| { | |
| "epoch": 0.46956311267094275, | |
| "grad_norm": 0.15828157632384718, | |
| "learning_rate": 4.95213473202356e-05, | |
| "loss": 0.0948, | |
| "num_input_tokens_seen": 182234608, | |
| "step": 485, | |
| "train_runtime": 53294.1919, | |
| "train_tokens_per_second": 3419.408 | |
| }, | |
| { | |
| "epoch": 0.47440396950260194, | |
| "grad_norm": 0.11560281909563656, | |
| "learning_rate": 4.9493537893388544e-05, | |
| "loss": 0.0884, | |
| "num_input_tokens_seen": 184157296, | |
| "step": 490, | |
| "train_runtime": 53645.7615, | |
| "train_tokens_per_second": 3432.84 | |
| }, | |
| { | |
| "epoch": 0.4792448263342612, | |
| "grad_norm": 0.14531956629532314, | |
| "learning_rate": 4.9464951516494116e-05, | |
| "loss": 0.0916, | |
| "num_input_tokens_seen": 185978048, | |
| "step": 495, | |
| "train_runtime": 53924.698, | |
| "train_tokens_per_second": 3448.847 | |
| }, | |
| { | |
| "epoch": 0.4840856831659204, | |
| "grad_norm": 0.11828670163218845, | |
| "learning_rate": 4.943558909632975e-05, | |
| "loss": 0.0877, | |
| "num_input_tokens_seen": 187867312, | |
| "step": 500, | |
| "train_runtime": 54232.0578, | |
| "train_tokens_per_second": 3464.138 | |
| }, | |
| { | |
| "epoch": 0.48892653999757957, | |
| "grad_norm": 0.1315673057494712, | |
| "learning_rate": 4.94054515642894e-05, | |
| "loss": 0.0962, | |
| "num_input_tokens_seen": 189760400, | |
| "step": 505, | |
| "train_runtime": 54567.8241, | |
| "train_tokens_per_second": 3477.515 | |
| }, | |
| { | |
| "epoch": 0.49376739682923876, | |
| "grad_norm": 0.13676145087673666, | |
| "learning_rate": 4.937453987635408e-05, | |
| "loss": 0.084, | |
| "num_input_tokens_seen": 191773264, | |
| "step": 510, | |
| "train_runtime": 54988.129, | |
| "train_tokens_per_second": 3487.539 | |
| }, | |
| { | |
| "epoch": 0.498608253660898, | |
| "grad_norm": 0.1274509635019909, | |
| "learning_rate": 4.934285501306152e-05, | |
| "loss": 0.0868, | |
| "num_input_tokens_seen": 193750176, | |
| "step": 515, | |
| "train_runtime": 55385.4065, | |
| "train_tokens_per_second": 3498.217 | |
| }, | |
| { | |
| "epoch": 0.5034491104925571, | |
| "grad_norm": 0.12982283309243445, | |
| "learning_rate": 4.931039797947498e-05, | |
| "loss": 0.0895, | |
| "num_input_tokens_seen": 195664256, | |
| "step": 520, | |
| "train_runtime": 55687.4295, | |
| "train_tokens_per_second": 3513.616 | |
| }, | |
| { | |
| "epoch": 0.5082899673242164, | |
| "grad_norm": 0.13281239797421768, | |
| "learning_rate": 4.9277169805151496e-05, | |
| "loss": 0.0875, | |
| "num_input_tokens_seen": 197577536, | |
| "step": 525, | |
| "train_runtime": 56037.147, | |
| "train_tokens_per_second": 3525.831 | |
| }, | |
| { | |
| "epoch": 0.5131308241558756, | |
| "grad_norm": 0.127865250498207, | |
| "learning_rate": 4.924317154410915e-05, | |
| "loss": 0.0907, | |
| "num_input_tokens_seen": 199441664, | |
| "step": 530, | |
| "train_runtime": 56386.1137, | |
| "train_tokens_per_second": 3537.071 | |
| }, | |
| { | |
| "epoch": 0.5179716809875348, | |
| "grad_norm": 0.12535903140665275, | |
| "learning_rate": 4.9208404274793615e-05, | |
| "loss": 0.0851, | |
| "num_input_tokens_seen": 201395760, | |
| "step": 535, | |
| "train_runtime": 56725.1327, | |
| "train_tokens_per_second": 3550.38 | |
| }, | |
| { | |
| "epoch": 0.522812537819194, | |
| "grad_norm": 0.11973273742173074, | |
| "learning_rate": 4.917286910004402e-05, | |
| "loss": 0.0859, | |
| "num_input_tokens_seen": 203289168, | |
| "step": 540, | |
| "train_runtime": 57048.2676, | |
| "train_tokens_per_second": 3563.459 | |
| }, | |
| { | |
| "epoch": 0.5276533946508531, | |
| "grad_norm": 0.13617614252406723, | |
| "learning_rate": 4.9136567147057886e-05, | |
| "loss": 0.0932, | |
| "num_input_tokens_seen": 205086960, | |
| "step": 545, | |
| "train_runtime": 57320.4179, | |
| "train_tokens_per_second": 3577.904 | |
| }, | |
| { | |
| "epoch": 0.5324942514825124, | |
| "grad_norm": 0.12228835012992155, | |
| "learning_rate": 4.9099499567355465e-05, | |
| "loss": 0.0877, | |
| "num_input_tokens_seen": 207038560, | |
| "step": 550, | |
| "train_runtime": 57676.6952, | |
| "train_tokens_per_second": 3589.64 | |
| }, | |
| { | |
| "epoch": 0.5373351083141716, | |
| "grad_norm": 0.11616581415998756, | |
| "learning_rate": 4.9061667536743096e-05, | |
| "loss": 0.0849, | |
| "num_input_tokens_seen": 208994656, | |
| "step": 555, | |
| "train_runtime": 58017.1366, | |
| "train_tokens_per_second": 3602.292 | |
| }, | |
| { | |
| "epoch": 0.5421759651458308, | |
| "grad_norm": 0.1227132194885445, | |
| "learning_rate": 4.9023072255276e-05, | |
| "loss": 0.0924, | |
| "num_input_tokens_seen": 210860592, | |
| "step": 560, | |
| "train_runtime": 58355.6811, | |
| "train_tokens_per_second": 3613.369 | |
| }, | |
| { | |
| "epoch": 0.54701682197749, | |
| "grad_norm": 0.1331754681110147, | |
| "learning_rate": 4.89837149472202e-05, | |
| "loss": 0.0852, | |
| "num_input_tokens_seen": 212822400, | |
| "step": 565, | |
| "train_runtime": 58727.1386, | |
| "train_tokens_per_second": 3623.919 | |
| }, | |
| { | |
| "epoch": 0.5518576788091493, | |
| "grad_norm": 0.11555719706146601, | |
| "learning_rate": 4.894359686101363e-05, | |
| "loss": 0.0894, | |
| "num_input_tokens_seen": 214750096, | |
| "step": 570, | |
| "train_runtime": 59082.0703, | |
| "train_tokens_per_second": 3634.776 | |
| }, | |
| { | |
| "epoch": 0.5566985356408084, | |
| "grad_norm": 0.12553710069358454, | |
| "learning_rate": 4.890271926922661e-05, | |
| "loss": 0.0859, | |
| "num_input_tokens_seen": 216744256, | |
| "step": 575, | |
| "train_runtime": 59467.6278, | |
| "train_tokens_per_second": 3644.744 | |
| }, | |
| { | |
| "epoch": 0.5615393924724676, | |
| "grad_norm": 0.12296197236193043, | |
| "learning_rate": 4.886108346852142e-05, | |
| "loss": 0.0843, | |
| "num_input_tokens_seen": 218618416, | |
| "step": 580, | |
| "train_runtime": 59760.4082, | |
| "train_tokens_per_second": 3658.248 | |
| }, | |
| { | |
| "epoch": 0.5663802493041268, | |
| "grad_norm": 0.13282536365892234, | |
| "learning_rate": 4.881869077961119e-05, | |
| "loss": 0.0881, | |
| "num_input_tokens_seen": 220473312, | |
| "step": 585, | |
| "train_runtime": 60072.1334, | |
| "train_tokens_per_second": 3670.143 | |
| }, | |
| { | |
| "epoch": 0.571221106135786, | |
| "grad_norm": 0.1192404447399879, | |
| "learning_rate": 4.8775542547218035e-05, | |
| "loss": 0.0838, | |
| "num_input_tokens_seen": 222434400, | |
| "step": 590, | |
| "train_runtime": 60432.6507, | |
| "train_tokens_per_second": 3680.699 | |
| }, | |
| { | |
| "epoch": 0.5760619629674453, | |
| "grad_norm": 0.12137471825767925, | |
| "learning_rate": 4.873164014003035e-05, | |
| "loss": 0.0823, | |
| "num_input_tokens_seen": 224344864, | |
| "step": 595, | |
| "train_runtime": 60761.8507, | |
| "train_tokens_per_second": 3692.199 | |
| }, | |
| { | |
| "epoch": 0.5809028197991044, | |
| "grad_norm": 0.10630144681377657, | |
| "learning_rate": 4.868698495065942e-05, | |
| "loss": 0.0908, | |
| "num_input_tokens_seen": 226325424, | |
| "step": 600, | |
| "train_runtime": 61173.0901, | |
| "train_tokens_per_second": 3699.755 | |
| }, | |
| { | |
| "epoch": 0.5857436766307637, | |
| "grad_norm": 0.12903663659157497, | |
| "learning_rate": 4.8641578395595244e-05, | |
| "loss": 0.0841, | |
| "num_input_tokens_seen": 228223680, | |
| "step": 605, | |
| "train_runtime": 61472.6009, | |
| "train_tokens_per_second": 3712.608 | |
| }, | |
| { | |
| "epoch": 0.5905845334624229, | |
| "grad_norm": 0.1120445766713908, | |
| "learning_rate": 4.8595421915161606e-05, | |
| "loss": 0.09, | |
| "num_input_tokens_seen": 230051616, | |
| "step": 610, | |
| "train_runtime": 61773.5624, | |
| "train_tokens_per_second": 3724.111 | |
| }, | |
| { | |
| "epoch": 0.595425390294082, | |
| "grad_norm": 0.1403828596714754, | |
| "learning_rate": 4.854851697347038e-05, | |
| "loss": 0.0953, | |
| "num_input_tokens_seen": 231784640, | |
| "step": 615, | |
| "train_runtime": 62017.5435, | |
| "train_tokens_per_second": 3737.404 | |
| }, | |
| { | |
| "epoch": 0.6002662471257413, | |
| "grad_norm": 0.12026744008912688, | |
| "learning_rate": 4.8500865058375084e-05, | |
| "loss": 0.0818, | |
| "num_input_tokens_seen": 233624880, | |
| "step": 620, | |
| "train_runtime": 62301.1537, | |
| "train_tokens_per_second": 3749.929 | |
| }, | |
| { | |
| "epoch": 0.6051071039574004, | |
| "grad_norm": 0.13228231302304957, | |
| "learning_rate": 4.845246768142371e-05, | |
| "loss": 0.0919, | |
| "num_input_tokens_seen": 235580064, | |
| "step": 625, | |
| "train_runtime": 62644.2313, | |
| "train_tokens_per_second": 3760.603 | |
| }, | |
| { | |
| "epoch": 0.6099479607890597, | |
| "grad_norm": 0.11162603952629033, | |
| "learning_rate": 4.840332637781072e-05, | |
| "loss": 0.0806, | |
| "num_input_tokens_seen": 237579456, | |
| "step": 630, | |
| "train_runtime": 63013.8952, | |
| "train_tokens_per_second": 3770.271 | |
| }, | |
| { | |
| "epoch": 0.6147888176207189, | |
| "grad_norm": 0.13202350019993933, | |
| "learning_rate": 4.835344270632844e-05, | |
| "loss": 0.0894, | |
| "num_input_tokens_seen": 239379872, | |
| "step": 635, | |
| "train_runtime": 63306.9343, | |
| "train_tokens_per_second": 3781.258 | |
| }, | |
| { | |
| "epoch": 0.619629674452378, | |
| "grad_norm": 0.1262730134357675, | |
| "learning_rate": 4.830281824931751e-05, | |
| "loss": 0.0917, | |
| "num_input_tokens_seen": 241343936, | |
| "step": 640, | |
| "train_runtime": 63660.7557, | |
| "train_tokens_per_second": 3791.094 | |
| }, | |
| { | |
| "epoch": 0.6244705312840373, | |
| "grad_norm": 0.12514465647043127, | |
| "learning_rate": 4.825145461261677e-05, | |
| "loss": 0.0949, | |
| "num_input_tokens_seen": 243126480, | |
| "step": 645, | |
| "train_runtime": 63956.927, | |
| "train_tokens_per_second": 3801.41 | |
| }, | |
| { | |
| "epoch": 0.6293113881156964, | |
| "grad_norm": 0.11879787426434715, | |
| "learning_rate": 4.8199353425512276e-05, | |
| "loss": 0.0836, | |
| "num_input_tokens_seen": 245036080, | |
| "step": 650, | |
| "train_runtime": 64282.9428, | |
| "train_tokens_per_second": 3811.837 | |
| }, | |
| { | |
| "epoch": 0.6341522449473557, | |
| "grad_norm": 0.12807215290275195, | |
| "learning_rate": 4.814651634068565e-05, | |
| "loss": 0.0919, | |
| "num_input_tokens_seen": 246836544, | |
| "step": 655, | |
| "train_runtime": 64554.9622, | |
| "train_tokens_per_second": 3823.665 | |
| }, | |
| { | |
| "epoch": 0.6389931017790149, | |
| "grad_norm": 0.13318265083798486, | |
| "learning_rate": 4.809294503416164e-05, | |
| "loss": 0.0885, | |
| "num_input_tokens_seen": 248715200, | |
| "step": 660, | |
| "train_runtime": 64862.1325, | |
| "train_tokens_per_second": 3834.521 | |
| }, | |
| { | |
| "epoch": 0.643833958610674, | |
| "grad_norm": 0.11706198751336716, | |
| "learning_rate": 4.8038641205254945e-05, | |
| "loss": 0.0845, | |
| "num_input_tokens_seen": 250584112, | |
| "step": 665, | |
| "train_runtime": 65162.2083, | |
| "train_tokens_per_second": 3845.544 | |
| }, | |
| { | |
| "epoch": 0.6486748154423333, | |
| "grad_norm": 0.10849730614741822, | |
| "learning_rate": 4.7983606576516335e-05, | |
| "loss": 0.0834, | |
| "num_input_tokens_seen": 252446000, | |
| "step": 670, | |
| "train_runtime": 65447.5256, | |
| "train_tokens_per_second": 3857.228 | |
| }, | |
| { | |
| "epoch": 0.6535156722739925, | |
| "grad_norm": 0.11644681119826476, | |
| "learning_rate": 4.792784289367799e-05, | |
| "loss": 0.0889, | |
| "num_input_tokens_seen": 254336512, | |
| "step": 675, | |
| "train_runtime": 65807.5992, | |
| "train_tokens_per_second": 3864.85 | |
| }, | |
| { | |
| "epoch": 0.6583565291056517, | |
| "grad_norm": 0.12648798160840266, | |
| "learning_rate": 4.787135192559814e-05, | |
| "loss": 0.081, | |
| "num_input_tokens_seen": 256248240, | |
| "step": 680, | |
| "train_runtime": 66118.3408, | |
| "train_tokens_per_second": 3875.6 | |
| }, | |
| { | |
| "epoch": 0.6631973859373109, | |
| "grad_norm": 0.13179948151989662, | |
| "learning_rate": 4.781413546420494e-05, | |
| "loss": 0.0925, | |
| "num_input_tokens_seen": 258018928, | |
| "step": 685, | |
| "train_runtime": 66397.3901, | |
| "train_tokens_per_second": 3885.98 | |
| }, | |
| { | |
| "epoch": 0.6680382427689701, | |
| "grad_norm": 0.11355476378512105, | |
| "learning_rate": 4.775619532443965e-05, | |
| "loss": 0.081, | |
| "num_input_tokens_seen": 260040928, | |
| "step": 690, | |
| "train_runtime": 66781.9966, | |
| "train_tokens_per_second": 3893.878 | |
| }, | |
| { | |
| "epoch": 0.6728790996006293, | |
| "grad_norm": 0.1102920346549403, | |
| "learning_rate": 4.7697533344199054e-05, | |
| "loss": 0.081, | |
| "num_input_tokens_seen": 261986240, | |
| "step": 695, | |
| "train_runtime": 67112.4268, | |
| "train_tokens_per_second": 3903.692 | |
| }, | |
| { | |
| "epoch": 0.6777199564322886, | |
| "grad_norm": 0.11923784097224148, | |
| "learning_rate": 4.763815138427713e-05, | |
| "loss": 0.0834, | |
| "num_input_tokens_seen": 263808880, | |
| "step": 700, | |
| "train_runtime": 67389.9808, | |
| "train_tokens_per_second": 3914.66 | |
| }, | |
| { | |
| "epoch": 0.6825608132639477, | |
| "grad_norm": 0.13058866115819348, | |
| "learning_rate": 4.757805132830609e-05, | |
| "loss": 0.0858, | |
| "num_input_tokens_seen": 265662032, | |
| "step": 705, | |
| "train_runtime": 67694.3239, | |
| "train_tokens_per_second": 3924.436 | |
| }, | |
| { | |
| "epoch": 0.6874016700956069, | |
| "grad_norm": 0.10903443202471683, | |
| "learning_rate": 4.751723508269659e-05, | |
| "loss": 0.084, | |
| "num_input_tokens_seen": 267501344, | |
| "step": 710, | |
| "train_runtime": 68025.3342, | |
| "train_tokens_per_second": 3932.378 | |
| }, | |
| { | |
| "epoch": 0.6922425269272662, | |
| "grad_norm": 0.1065038057537401, | |
| "learning_rate": 4.745570457657722e-05, | |
| "loss": 0.0843, | |
| "num_input_tokens_seen": 269394176, | |
| "step": 715, | |
| "train_runtime": 68343.6923, | |
| "train_tokens_per_second": 3941.756 | |
| }, | |
| { | |
| "epoch": 0.6970833837589253, | |
| "grad_norm": 0.11228653074537234, | |
| "learning_rate": 4.7393461761733395e-05, | |
| "loss": 0.0911, | |
| "num_input_tokens_seen": 271288208, | |
| "step": 720, | |
| "train_runtime": 68705.6867, | |
| "train_tokens_per_second": 3948.555 | |
| }, | |
| { | |
| "epoch": 0.7019242405905846, | |
| "grad_norm": 0.10423741587634047, | |
| "learning_rate": 4.733050861254538e-05, | |
| "loss": 0.0759, | |
| "num_input_tokens_seen": 273149952, | |
| "step": 725, | |
| "train_runtime": 69004.9126, | |
| "train_tokens_per_second": 3958.413 | |
| }, | |
| { | |
| "epoch": 0.7067650974222437, | |
| "grad_norm": 0.12067320290718164, | |
| "learning_rate": 4.7266847125925686e-05, | |
| "loss": 0.0874, | |
| "num_input_tokens_seen": 275017888, | |
| "step": 730, | |
| "train_runtime": 69310.7545, | |
| "train_tokens_per_second": 3967.896 | |
| }, | |
| { | |
| "epoch": 0.7116059542539029, | |
| "grad_norm": 0.126476130474256, | |
| "learning_rate": 4.720247932125572e-05, | |
| "loss": 0.0877, | |
| "num_input_tokens_seen": 276821728, | |
| "step": 735, | |
| "train_runtime": 69607.9909, | |
| "train_tokens_per_second": 3976.867 | |
| }, | |
| { | |
| "epoch": 0.7164468110855622, | |
| "grad_norm": 0.14675215045590576, | |
| "learning_rate": 4.713740724032173e-05, | |
| "loss": 0.0854, | |
| "num_input_tokens_seen": 278648688, | |
| "step": 740, | |
| "train_runtime": 69890.2265, | |
| "train_tokens_per_second": 3986.948 | |
| }, | |
| { | |
| "epoch": 0.7212876679172213, | |
| "grad_norm": 0.10393986794688335, | |
| "learning_rate": 4.7071632947250056e-05, | |
| "loss": 0.0856, | |
| "num_input_tokens_seen": 280573744, | |
| "step": 745, | |
| "train_runtime": 70220.8361, | |
| "train_tokens_per_second": 3995.591 | |
| }, | |
| { | |
| "epoch": 0.7261285247488806, | |
| "grad_norm": 0.12245429208403327, | |
| "learning_rate": 4.700515852844161e-05, | |
| "loss": 0.088, | |
| "num_input_tokens_seen": 282386080, | |
| "step": 750, | |
| "train_runtime": 70532.5051, | |
| "train_tokens_per_second": 4003.63 | |
| }, | |
| { | |
| "epoch": 0.7309693815805397, | |
| "grad_norm": 0.15490079892082556, | |
| "learning_rate": 4.6937986092505734e-05, | |
| "loss": 0.0853, | |
| "num_input_tokens_seen": 284163392, | |
| "step": 755, | |
| "train_runtime": 70777.4141, | |
| "train_tokens_per_second": 4014.888 | |
| }, | |
| { | |
| "epoch": 0.735810238412199, | |
| "grad_norm": 0.1355824935484372, | |
| "learning_rate": 4.687011777019331e-05, | |
| "loss": 0.0897, | |
| "num_input_tokens_seen": 285979472, | |
| "step": 760, | |
| "train_runtime": 71065.8783, | |
| "train_tokens_per_second": 4024.146 | |
| }, | |
| { | |
| "epoch": 0.7406510952438582, | |
| "grad_norm": 0.10890370807480093, | |
| "learning_rate": 4.680155571432915e-05, | |
| "loss": 0.0866, | |
| "num_input_tokens_seen": 287803904, | |
| "step": 765, | |
| "train_runtime": 71342.8364, | |
| "train_tokens_per_second": 4034.097 | |
| }, | |
| { | |
| "epoch": 0.7454919520755173, | |
| "grad_norm": 0.10818940985882454, | |
| "learning_rate": 4.673230209974372e-05, | |
| "loss": 0.084, | |
| "num_input_tokens_seen": 289770544, | |
| "step": 770, | |
| "train_runtime": 71692.4983, | |
| "train_tokens_per_second": 4041.853 | |
| }, | |
| { | |
| "epoch": 0.7503328089071766, | |
| "grad_norm": 0.11393295664454559, | |
| "learning_rate": 4.666235912320416e-05, | |
| "loss": 0.0847, | |
| "num_input_tokens_seen": 291639680, | |
| "step": 775, | |
| "train_runtime": 71991.759, | |
| "train_tokens_per_second": 4051.015 | |
| }, | |
| { | |
| "epoch": 0.7551736657388358, | |
| "grad_norm": 0.10791548067564018, | |
| "learning_rate": 4.6591729003344604e-05, | |
| "loss": 0.0855, | |
| "num_input_tokens_seen": 293493632, | |
| "step": 780, | |
| "train_runtime": 72290.0262, | |
| "train_tokens_per_second": 4059.946 | |
| }, | |
| { | |
| "epoch": 0.760014522570495, | |
| "grad_norm": 0.12476206555821662, | |
| "learning_rate": 4.652041398059577e-05, | |
| "loss": 0.0874, | |
| "num_input_tokens_seen": 295416672, | |
| "step": 785, | |
| "train_runtime": 72613.7692, | |
| "train_tokens_per_second": 4068.329 | |
| }, | |
| { | |
| "epoch": 0.7648553794021542, | |
| "grad_norm": 0.11581041520648833, | |
| "learning_rate": 4.644841631711393e-05, | |
| "loss": 0.0917, | |
| "num_input_tokens_seen": 297230848, | |
| "step": 790, | |
| "train_runtime": 72898.9964, | |
| "train_tokens_per_second": 4077.297 | |
| }, | |
| { | |
| "epoch": 0.7696962362338133, | |
| "grad_norm": 0.1207889618919906, | |
| "learning_rate": 4.637573829670913e-05, | |
| "loss": 0.0862, | |
| "num_input_tokens_seen": 299220432, | |
| "step": 795, | |
| "train_runtime": 73281.0367, | |
| "train_tokens_per_second": 4083.19 | |
| }, | |
| { | |
| "epoch": 0.7745370930654726, | |
| "grad_norm": 0.13046576053931733, | |
| "learning_rate": 4.630238222477277e-05, | |
| "loss": 0.0876, | |
| "num_input_tokens_seen": 301103984, | |
| "step": 800, | |
| "train_runtime": 73600.0892, | |
| "train_tokens_per_second": 4091.082 | |
| }, | |
| { | |
| "epoch": 0.7793779498971318, | |
| "grad_norm": 0.11722404450994561, | |
| "learning_rate": 4.622835042820445e-05, | |
| "loss": 0.0832, | |
| "num_input_tokens_seen": 303004240, | |
| "step": 805, | |
| "train_runtime": 73903.033, | |
| "train_tokens_per_second": 4100.024 | |
| }, | |
| { | |
| "epoch": 0.784218806728791, | |
| "grad_norm": 0.11528092874791906, | |
| "learning_rate": 4.615364525533817e-05, | |
| "loss": 0.0875, | |
| "num_input_tokens_seen": 304776752, | |
| "step": 810, | |
| "train_runtime": 74158.7012, | |
| "train_tokens_per_second": 4109.791 | |
| }, | |
| { | |
| "epoch": 0.7890596635604502, | |
| "grad_norm": 0.1118105371174961, | |
| "learning_rate": 4.6078269075867844e-05, | |
| "loss": 0.0838, | |
| "num_input_tokens_seen": 306564704, | |
| "step": 815, | |
| "train_runtime": 74444.0338, | |
| "train_tokens_per_second": 4118.056 | |
| }, | |
| { | |
| "epoch": 0.7939005203921095, | |
| "grad_norm": 0.12405349561980368, | |
| "learning_rate": 4.600222428077212e-05, | |
| "loss": 0.0831, | |
| "num_input_tokens_seen": 308443424, | |
| "step": 820, | |
| "train_runtime": 74777.6015, | |
| "train_tokens_per_second": 4124.81 | |
| }, | |
| { | |
| "epoch": 0.7987413772237686, | |
| "grad_norm": 0.10166638613764127, | |
| "learning_rate": 4.592551328223854e-05, | |
| "loss": 0.0861, | |
| "num_input_tokens_seen": 310326496, | |
| "step": 825, | |
| "train_runtime": 75075.0708, | |
| "train_tokens_per_second": 4133.549 | |
| }, | |
| { | |
| "epoch": 0.8035822340554278, | |
| "grad_norm": 0.1304318839178686, | |
| "learning_rate": 4.584813851358705e-05, | |
| "loss": 0.0868, | |
| "num_input_tokens_seen": 312324656, | |
| "step": 830, | |
| "train_runtime": 75496.3515, | |
| "train_tokens_per_second": 4136.95 | |
| }, | |
| { | |
| "epoch": 0.808423090887087, | |
| "grad_norm": 0.11074066557269138, | |
| "learning_rate": 4.577010242919277e-05, | |
| "loss": 0.0823, | |
| "num_input_tokens_seen": 314264240, | |
| "step": 835, | |
| "train_runtime": 75859.0005, | |
| "train_tokens_per_second": 4142.742 | |
| }, | |
| { | |
| "epoch": 0.8132639477187462, | |
| "grad_norm": 0.1163799358603999, | |
| "learning_rate": 4.569140750440817e-05, | |
| "loss": 0.0823, | |
| "num_input_tokens_seen": 316091424, | |
| "step": 840, | |
| "train_runtime": 76138.2624, | |
| "train_tokens_per_second": 4151.545 | |
| }, | |
| { | |
| "epoch": 0.8181048045504055, | |
| "grad_norm": 0.11515834663049564, | |
| "learning_rate": 4.561205623548453e-05, | |
| "loss": 0.0816, | |
| "num_input_tokens_seen": 318022304, | |
| "step": 845, | |
| "train_runtime": 76454.3449, | |
| "train_tokens_per_second": 4159.637 | |
| }, | |
| { | |
| "epoch": 0.8229456613820646, | |
| "grad_norm": 0.10167337175748853, | |
| "learning_rate": 4.5532051139492784e-05, | |
| "loss": 0.0814, | |
| "num_input_tokens_seen": 319889920, | |
| "step": 850, | |
| "train_runtime": 76782.5683, | |
| "train_tokens_per_second": 4166.179 | |
| }, | |
| { | |
| "epoch": 0.8277865182137238, | |
| "grad_norm": 0.10009892012288468, | |
| "learning_rate": 4.545139475424366e-05, | |
| "loss": 0.0855, | |
| "num_input_tokens_seen": 321805040, | |
| "step": 855, | |
| "train_runtime": 77138.4208, | |
| "train_tokens_per_second": 4171.787 | |
| }, | |
| { | |
| "epoch": 0.832627375045383, | |
| "grad_norm": 0.10222131699800209, | |
| "learning_rate": 4.537008963820717e-05, | |
| "loss": 0.0859, | |
| "num_input_tokens_seen": 323627168, | |
| "step": 860, | |
| "train_runtime": 77426.607, | |
| "train_tokens_per_second": 4179.793 | |
| }, | |
| { | |
| "epoch": 0.8374682318770422, | |
| "grad_norm": 0.10643170835147328, | |
| "learning_rate": 4.5288138370431464e-05, | |
| "loss": 0.0866, | |
| "num_input_tokens_seen": 325450784, | |
| "step": 865, | |
| "train_runtime": 77714.3766, | |
| "train_tokens_per_second": 4187.781 | |
| }, | |
| { | |
| "epoch": 0.8423090887087015, | |
| "grad_norm": 0.1002014887379627, | |
| "learning_rate": 4.520554355046105e-05, | |
| "loss": 0.0794, | |
| "num_input_tokens_seen": 327331824, | |
| "step": 870, | |
| "train_runtime": 78022.0251, | |
| "train_tokens_per_second": 4195.377 | |
| }, | |
| { | |
| "epoch": 0.8471499455403606, | |
| "grad_norm": 0.12980276451699957, | |
| "learning_rate": 4.512230779825427e-05, | |
| "loss": 0.0853, | |
| "num_input_tokens_seen": 329167072, | |
| "step": 875, | |
| "train_runtime": 78294.26, | |
| "train_tokens_per_second": 4204.23 | |
| }, | |
| { | |
| "epoch": 0.8519908023720199, | |
| "grad_norm": 0.12209635825164061, | |
| "learning_rate": 4.503843375410024e-05, | |
| "loss": 0.0862, | |
| "num_input_tokens_seen": 331011792, | |
| "step": 880, | |
| "train_runtime": 78589.3442, | |
| "train_tokens_per_second": 4211.917 | |
| }, | |
| { | |
| "epoch": 0.8568316592036791, | |
| "grad_norm": 0.12814219418396117, | |
| "learning_rate": 4.49539240785351e-05, | |
| "loss": 0.0862, | |
| "num_input_tokens_seen": 332759152, | |
| "step": 885, | |
| "train_runtime": 78840.0264, | |
| "train_tokens_per_second": 4220.688 | |
| }, | |
| { | |
| "epoch": 0.8616725160353382, | |
| "grad_norm": 0.10182567961465826, | |
| "learning_rate": 4.4868781452257604e-05, | |
| "loss": 0.0854, | |
| "num_input_tokens_seen": 334548544, | |
| "step": 890, | |
| "train_runtime": 79107.638, | |
| "train_tokens_per_second": 4229.03 | |
| }, | |
| { | |
| "epoch": 0.8665133728669975, | |
| "grad_norm": 0.1059374934901486, | |
| "learning_rate": 4.478300857604407e-05, | |
| "loss": 0.081, | |
| "num_input_tokens_seen": 336480736, | |
| "step": 895, | |
| "train_runtime": 79448.3723, | |
| "train_tokens_per_second": 4235.212 | |
| }, | |
| { | |
| "epoch": 0.8713542296986566, | |
| "grad_norm": 0.11153870028757291, | |
| "learning_rate": 4.469660817066277e-05, | |
| "loss": 0.0799, | |
| "num_input_tokens_seen": 338427280, | |
| "step": 900, | |
| "train_runtime": 79789.1685, | |
| "train_tokens_per_second": 4241.519 | |
| }, | |
| { | |
| "epoch": 0.8761950865303159, | |
| "grad_norm": 0.13350987905341574, | |
| "learning_rate": 4.460958297678756e-05, | |
| "loss": 0.0828, | |
| "num_input_tokens_seen": 340350320, | |
| "step": 905, | |
| "train_runtime": 80140.7635, | |
| "train_tokens_per_second": 4246.906 | |
| }, | |
| { | |
| "epoch": 0.8810359433619751, | |
| "grad_norm": 0.10558711020322371, | |
| "learning_rate": 4.452193575491096e-05, | |
| "loss": 0.0815, | |
| "num_input_tokens_seen": 342226288, | |
| "step": 910, | |
| "train_runtime": 80448.6307, | |
| "train_tokens_per_second": 4253.973 | |
| }, | |
| { | |
| "epoch": 0.8858768001936342, | |
| "grad_norm": 0.1139445337081212, | |
| "learning_rate": 4.443366928525663e-05, | |
| "loss": 0.0813, | |
| "num_input_tokens_seen": 344202544, | |
| "step": 915, | |
| "train_runtime": 80804.3894, | |
| "train_tokens_per_second": 4259.701 | |
| }, | |
| { | |
| "epoch": 0.8907176570252935, | |
| "grad_norm": 0.12328404863523318, | |
| "learning_rate": 4.4344786367691125e-05, | |
| "loss": 0.0889, | |
| "num_input_tokens_seen": 346094384, | |
| "step": 920, | |
| "train_runtime": 81120.3268, | |
| "train_tokens_per_second": 4266.432 | |
| }, | |
| { | |
| "epoch": 0.8955585138569527, | |
| "grad_norm": 0.0973559450919178, | |
| "learning_rate": 4.425528982163512e-05, | |
| "loss": 0.0806, | |
| "num_input_tokens_seen": 348009840, | |
| "step": 925, | |
| "train_runtime": 81458.3407, | |
| "train_tokens_per_second": 4272.243 | |
| }, | |
| { | |
| "epoch": 0.9003993706886119, | |
| "grad_norm": 0.09888756400918433, | |
| "learning_rate": 4.416518248597395e-05, | |
| "loss": 0.0816, | |
| "num_input_tokens_seen": 349923616, | |
| "step": 930, | |
| "train_runtime": 81791.2966, | |
| "train_tokens_per_second": 4278.25 | |
| }, | |
| { | |
| "epoch": 0.9052402275202711, | |
| "grad_norm": 0.10273344580681865, | |
| "learning_rate": 4.4074467218967594e-05, | |
| "loss": 0.0808, | |
| "num_input_tokens_seen": 351982800, | |
| "step": 935, | |
| "train_runtime": 82224.6329, | |
| "train_tokens_per_second": 4280.746 | |
| }, | |
| { | |
| "epoch": 0.9100810843519302, | |
| "grad_norm": 0.1137919757065847, | |
| "learning_rate": 4.398314689815995e-05, | |
| "loss": 0.086, | |
| "num_input_tokens_seen": 353803568, | |
| "step": 940, | |
| "train_runtime": 82523.3997, | |
| "train_tokens_per_second": 4287.312 | |
| }, | |
| { | |
| "epoch": 0.9149219411835895, | |
| "grad_norm": 0.1129658121707594, | |
| "learning_rate": 4.389122442028762e-05, | |
| "loss": 0.0792, | |
| "num_input_tokens_seen": 355770400, | |
| "step": 945, | |
| "train_runtime": 82863.0664, | |
| "train_tokens_per_second": 4293.474 | |
| }, | |
| { | |
| "epoch": 0.9197627980152487, | |
| "grad_norm": 0.12295404647246533, | |
| "learning_rate": 4.379870270118801e-05, | |
| "loss": 0.0789, | |
| "num_input_tokens_seen": 357722704, | |
| "step": 950, | |
| "train_runtime": 83211.7587, | |
| "train_tokens_per_second": 4298.944 | |
| }, | |
| { | |
| "epoch": 0.9246036548469079, | |
| "grad_norm": 0.10702291365375034, | |
| "learning_rate": 4.370558467570678e-05, | |
| "loss": 0.0784, | |
| "num_input_tokens_seen": 359625392, | |
| "step": 955, | |
| "train_runtime": 83503.0123, | |
| "train_tokens_per_second": 4306.736 | |
| }, | |
| { | |
| "epoch": 0.9294445116785671, | |
| "grad_norm": 0.10671020858504487, | |
| "learning_rate": 4.361187329760483e-05, | |
| "loss": 0.0812, | |
| "num_input_tokens_seen": 361478880, | |
| "step": 960, | |
| "train_runtime": 83796.3829, | |
| "train_tokens_per_second": 4313.777 | |
| }, | |
| { | |
| "epoch": 0.9342853685102263, | |
| "grad_norm": 0.10376407616031118, | |
| "learning_rate": 4.351757153946456e-05, | |
| "loss": 0.0831, | |
| "num_input_tokens_seen": 363401552, | |
| "step": 965, | |
| "train_runtime": 84125.1709, | |
| "train_tokens_per_second": 4319.772 | |
| }, | |
| { | |
| "epoch": 0.9391262253418855, | |
| "grad_norm": 0.09840753061510307, | |
| "learning_rate": 4.3422682392595594e-05, | |
| "loss": 0.077, | |
| "num_input_tokens_seen": 365256960, | |
| "step": 970, | |
| "train_runtime": 84396.4737, | |
| "train_tokens_per_second": 4327.87 | |
| }, | |
| { | |
| "epoch": 0.9439670821735447, | |
| "grad_norm": 0.09244245286343136, | |
| "learning_rate": 4.332720886693987e-05, | |
| "loss": 0.0781, | |
| "num_input_tokens_seen": 367167264, | |
| "step": 975, | |
| "train_runtime": 84737.6756, | |
| "train_tokens_per_second": 4332.987 | |
| }, | |
| { | |
| "epoch": 0.9488079390052039, | |
| "grad_norm": 0.10909448781256045, | |
| "learning_rate": 4.32311539909762e-05, | |
| "loss": 0.084, | |
| "num_input_tokens_seen": 368976208, | |
| "step": 980, | |
| "train_runtime": 85107.1009, | |
| "train_tokens_per_second": 4335.434 | |
| }, | |
| { | |
| "epoch": 0.9536487958368631, | |
| "grad_norm": 0.10797216762579882, | |
| "learning_rate": 4.313452081162416e-05, | |
| "loss": 0.0829, | |
| "num_input_tokens_seen": 370862208, | |
| "step": 985, | |
| "train_runtime": 85937.0527, | |
| "train_tokens_per_second": 4315.51 | |
| }, | |
| { | |
| "epoch": 0.9584896526685224, | |
| "grad_norm": 0.09730637257808095, | |
| "learning_rate": 4.303731239414749e-05, | |
| "loss": 0.0859, | |
| "num_input_tokens_seen": 372719648, | |
| "step": 990, | |
| "train_runtime": 86738.8585, | |
| "train_tokens_per_second": 4297.032 | |
| }, | |
| { | |
| "epoch": 0.9633305095001815, | |
| "grad_norm": 0.10271769317718264, | |
| "learning_rate": 4.2939531822056815e-05, | |
| "loss": 0.0869, | |
| "num_input_tokens_seen": 374624944, | |
| "step": 995, | |
| "train_runtime": 87558.6912, | |
| "train_tokens_per_second": 4278.558 | |
| }, | |
| { | |
| "epoch": 0.9681713663318408, | |
| "grad_norm": 0.10573584364906888, | |
| "learning_rate": 4.284118219701187e-05, | |
| "loss": 0.0794, | |
| "num_input_tokens_seen": 376525888, | |
| "step": 1000, | |
| "train_runtime": 88323.819, | |
| "train_tokens_per_second": 4263.016 | |
| }, | |
| { | |
| "epoch": 0.9730122231634999, | |
| "grad_norm": 0.10710167799127884, | |
| "learning_rate": 4.2742266638723096e-05, | |
| "loss": 0.0804, | |
| "num_input_tokens_seen": 378537840, | |
| "step": 1005, | |
| "train_runtime": 89245.7806, | |
| "train_tokens_per_second": 4241.521 | |
| }, | |
| { | |
| "epoch": 0.9778530799951591, | |
| "grad_norm": 0.11197233290927573, | |
| "learning_rate": 4.264278828485267e-05, | |
| "loss": 0.0845, | |
| "num_input_tokens_seen": 380397552, | |
| "step": 1010, | |
| "train_runtime": 90080.3151, | |
| "train_tokens_per_second": 4222.871 | |
| }, | |
| { | |
| "epoch": 0.9826939368268184, | |
| "grad_norm": 0.11297920761471489, | |
| "learning_rate": 4.254275029091501e-05, | |
| "loss": 0.089, | |
| "num_input_tokens_seen": 382191440, | |
| "step": 1015, | |
| "train_runtime": 90923.6166, | |
| "train_tokens_per_second": 4203.434 | |
| }, | |
| { | |
| "epoch": 0.9875347936584775, | |
| "grad_norm": 0.11866337547317915, | |
| "learning_rate": 4.2442155830176655e-05, | |
| "loss": 0.0887, | |
| "num_input_tokens_seen": 384056752, | |
| "step": 1020, | |
| "train_runtime": 91754.463, | |
| "train_tokens_per_second": 4185.701 | |
| }, | |
| { | |
| "epoch": 0.9923756504901368, | |
| "grad_norm": 0.10420016956278523, | |
| "learning_rate": 4.2341008093555604e-05, | |
| "loss": 0.0833, | |
| "num_input_tokens_seen": 385974032, | |
| "step": 1025, | |
| "train_runtime": 92603.2825, | |
| "train_tokens_per_second": 4168.038 | |
| }, | |
| { | |
| "epoch": 0.997216507321796, | |
| "grad_norm": 0.11976582014616471, | |
| "learning_rate": 4.22393102895201e-05, | |
| "loss": 0.0807, | |
| "num_input_tokens_seen": 387884720, | |
| "step": 1030, | |
| "train_runtime": 93427.3132, | |
| "train_tokens_per_second": 4151.727 | |
| }, | |
| { | |
| "epoch": 1.0019363427326637, | |
| "grad_norm": 0.12104509350691006, | |
| "learning_rate": 4.213706564398688e-05, | |
| "loss": 0.0727, | |
| "num_input_tokens_seen": 389685952, | |
| "step": 1035, | |
| "train_runtime": 94255.5336, | |
| "train_tokens_per_second": 4134.356 | |
| }, | |
| { | |
| "epoch": 1.0067771995643229, | |
| "grad_norm": 0.10790601727390471, | |
| "learning_rate": 4.203427740021884e-05, | |
| "loss": 0.0713, | |
| "num_input_tokens_seen": 391624192, | |
| "step": 1040, | |
| "train_runtime": 95169.1567, | |
| "train_tokens_per_second": 4115.033 | |
| }, | |
| { | |
| "epoch": 1.011618056395982, | |
| "grad_norm": 0.09855811615223968, | |
| "learning_rate": 4.1930948818722104e-05, | |
| "loss": 0.0671, | |
| "num_input_tokens_seen": 393660384, | |
| "step": 1045, | |
| "train_runtime": 96151.6752, | |
| "train_tokens_per_second": 4094.16 | |
| }, | |
| { | |
| "epoch": 1.0164589132276414, | |
| "grad_norm": 0.10511029272951843, | |
| "learning_rate": 4.182708317714267e-05, | |
| "loss": 0.0686, | |
| "num_input_tokens_seen": 395529008, | |
| "step": 1050, | |
| "train_runtime": 96960.8327, | |
| "train_tokens_per_second": 4079.266 | |
| }, | |
| { | |
| "epoch": 1.0212997700593005, | |
| "grad_norm": 0.10834149804451805, | |
| "learning_rate": 4.172268377016241e-05, | |
| "loss": 0.0684, | |
| "num_input_tokens_seen": 397347360, | |
| "step": 1055, | |
| "train_runtime": 97712.5511, | |
| "train_tokens_per_second": 4066.493 | |
| }, | |
| { | |
| "epoch": 1.0261406268909596, | |
| "grad_norm": 0.09606298518108944, | |
| "learning_rate": 4.161775390939454e-05, | |
| "loss": 0.0664, | |
| "num_input_tokens_seen": 399294608, | |
| "step": 1060, | |
| "train_runtime": 98616.012, | |
| "train_tokens_per_second": 4048.984 | |
| }, | |
| { | |
| "epoch": 1.030981483722619, | |
| "grad_norm": 0.10742642762254552, | |
| "learning_rate": 4.151229692327863e-05, | |
| "loss": 0.0657, | |
| "num_input_tokens_seen": 401232320, | |
| "step": 1065, | |
| "train_runtime": 99525.5766, | |
| "train_tokens_per_second": 4031.449 | |
| }, | |
| { | |
| "epoch": 1.0358223405542781, | |
| "grad_norm": 0.099355578010755, | |
| "learning_rate": 4.1406316156974965e-05, | |
| "loss": 0.0668, | |
| "num_input_tokens_seen": 403156576, | |
| "step": 1070, | |
| "train_runtime": 100492.0641, | |
| "train_tokens_per_second": 4011.825 | |
| }, | |
| { | |
| "epoch": 1.0406631973859373, | |
| "grad_norm": 0.09750969581285164, | |
| "learning_rate": 4.1299814972258466e-05, | |
| "loss": 0.0685, | |
| "num_input_tokens_seen": 404973808, | |
| "step": 1075, | |
| "train_runtime": 101227.6948, | |
| "train_tokens_per_second": 4000.623 | |
| }, | |
| { | |
| "epoch": 1.0455040542175964, | |
| "grad_norm": 0.10948901174240346, | |
| "learning_rate": 4.1192796747412046e-05, | |
| "loss": 0.0739, | |
| "num_input_tokens_seen": 406856480, | |
| "step": 1080, | |
| "train_runtime": 102069.5624, | |
| "train_tokens_per_second": 3986.071 | |
| }, | |
| { | |
| "epoch": 1.0503449110492558, | |
| "grad_norm": 0.10315111502114589, | |
| "learning_rate": 4.108526487711944e-05, | |
| "loss": 0.0743, | |
| "num_input_tokens_seen": 408659088, | |
| "step": 1085, | |
| "train_runtime": 102905.3681, | |
| "train_tokens_per_second": 3971.213 | |
| }, | |
| { | |
| "epoch": 1.055185767880915, | |
| "grad_norm": 0.12136499885985239, | |
| "learning_rate": 4.0977222772357545e-05, | |
| "loss": 0.0674, | |
| "num_input_tokens_seen": 410533120, | |
| "step": 1090, | |
| "train_runtime": 103694.499, | |
| "train_tokens_per_second": 3959.064 | |
| }, | |
| { | |
| "epoch": 1.060026624712574, | |
| "grad_norm": 0.12011224246251123, | |
| "learning_rate": 4.0868673860288196e-05, | |
| "loss": 0.0723, | |
| "num_input_tokens_seen": 412377808, | |
| "step": 1095, | |
| "train_runtime": 104483.7209, | |
| "train_tokens_per_second": 3946.814 | |
| }, | |
| { | |
| "epoch": 1.0648674815442334, | |
| "grad_norm": 0.11631933894778271, | |
| "learning_rate": 4.075962158414948e-05, | |
| "loss": 0.0659, | |
| "num_input_tokens_seen": 414390304, | |
| "step": 1100, | |
| "train_runtime": 105480.5472, | |
| "train_tokens_per_second": 3928.595 | |
| }, | |
| { | |
| "epoch": 1.0697083383758925, | |
| "grad_norm": 0.10080944032903205, | |
| "learning_rate": 4.0650069403146484e-05, | |
| "loss": 0.0671, | |
| "num_input_tokens_seen": 416413616, | |
| "step": 1105, | |
| "train_runtime": 106510.3272, | |
| "train_tokens_per_second": 3909.608 | |
| }, | |
| { | |
| "epoch": 1.0745491952075517, | |
| "grad_norm": 0.09617342271272442, | |
| "learning_rate": 4.0540020792341605e-05, | |
| "loss": 0.0723, | |
| "num_input_tokens_seen": 418267408, | |
| "step": 1110, | |
| "train_runtime": 107384.6318, | |
| "train_tokens_per_second": 3895.04 | |
| }, | |
| { | |
| "epoch": 1.079390052039211, | |
| "grad_norm": 0.09814369288730117, | |
| "learning_rate": 4.0429479242544285e-05, | |
| "loss": 0.0676, | |
| "num_input_tokens_seen": 420122000, | |
| "step": 1115, | |
| "train_runtime": 108179.5693, | |
| "train_tokens_per_second": 3883.561 | |
| }, | |
| { | |
| "epoch": 1.0842309088708701, | |
| "grad_norm": 0.09566981500512636, | |
| "learning_rate": 4.031844826020028e-05, | |
| "loss": 0.0673, | |
| "num_input_tokens_seen": 421979504, | |
| "step": 1120, | |
| "train_runtime": 108984.7112, | |
| "train_tokens_per_second": 3871.915 | |
| }, | |
| { | |
| "epoch": 1.0890717657025293, | |
| "grad_norm": 0.11481624591628409, | |
| "learning_rate": 4.020693136728047e-05, | |
| "loss": 0.0716, | |
| "num_input_tokens_seen": 423852128, | |
| "step": 1125, | |
| "train_runtime": 109849.6222, | |
| "train_tokens_per_second": 3858.476 | |
| }, | |
| { | |
| "epoch": 1.0939126225341886, | |
| "grad_norm": 0.11408237955687323, | |
| "learning_rate": 4.0094932101169116e-05, | |
| "loss": 0.066, | |
| "num_input_tokens_seen": 425865744, | |
| "step": 1130, | |
| "train_runtime": 110829.9796, | |
| "train_tokens_per_second": 3842.514 | |
| }, | |
| { | |
| "epoch": 1.0987534793658478, | |
| "grad_norm": 0.09876683127617192, | |
| "learning_rate": 3.9982454014551626e-05, | |
| "loss": 0.0671, | |
| "num_input_tokens_seen": 427825520, | |
| "step": 1135, | |
| "train_runtime": 111678.4669, | |
| "train_tokens_per_second": 3830.868 | |
| }, | |
| { | |
| "epoch": 1.103594336197507, | |
| "grad_norm": 0.08998178840723907, | |
| "learning_rate": 3.9869500675301925e-05, | |
| "loss": 0.0659, | |
| "num_input_tokens_seen": 429650464, | |
| "step": 1140, | |
| "train_runtime": 112452.9235, | |
| "train_tokens_per_second": 3820.714 | |
| }, | |
| { | |
| "epoch": 1.1084351930291663, | |
| "grad_norm": 0.09112012767906727, | |
| "learning_rate": 3.975607566636921e-05, | |
| "loss": 0.0674, | |
| "num_input_tokens_seen": 431560992, | |
| "step": 1145, | |
| "train_runtime": 113300.5965, | |
| "train_tokens_per_second": 3808.991 | |
| }, | |
| { | |
| "epoch": 1.1132760498608254, | |
| "grad_norm": 0.09193978880262148, | |
| "learning_rate": 3.964218258566436e-05, | |
| "loss": 0.0679, | |
| "num_input_tokens_seen": 433452176, | |
| "step": 1150, | |
| "train_runtime": 114121.3844, | |
| "train_tokens_per_second": 3798.168 | |
| }, | |
| { | |
| "epoch": 1.1181169066924845, | |
| "grad_norm": 0.10253244394594298, | |
| "learning_rate": 3.952782504594574e-05, | |
| "loss": 0.0683, | |
| "num_input_tokens_seen": 435371936, | |
| "step": 1155, | |
| "train_runtime": 114932.6591, | |
| "train_tokens_per_second": 3788.061 | |
| }, | |
| { | |
| "epoch": 1.1229577635241437, | |
| "grad_norm": 0.11604949971136329, | |
| "learning_rate": 3.9413006674704684e-05, | |
| "loss": 0.0696, | |
| "num_input_tokens_seen": 437191424, | |
| "step": 1160, | |
| "train_runtime": 115724.1081, | |
| "train_tokens_per_second": 3777.877 | |
| }, | |
| { | |
| "epoch": 1.127798620355803, | |
| "grad_norm": 0.09600367492582994, | |
| "learning_rate": 3.929773111405034e-05, | |
| "loss": 0.0693, | |
| "num_input_tokens_seen": 439045440, | |
| "step": 1165, | |
| "train_runtime": 116553.6206, | |
| "train_tokens_per_second": 3766.897 | |
| }, | |
| { | |
| "epoch": 1.1326394771874622, | |
| "grad_norm": 0.10626811630147584, | |
| "learning_rate": 3.9182002020594235e-05, | |
| "loss": 0.0705, | |
| "num_input_tokens_seen": 440950928, | |
| "step": 1170, | |
| "train_runtime": 117462.4396, | |
| "train_tokens_per_second": 3753.974 | |
| }, | |
| { | |
| "epoch": 1.1374803340191213, | |
| "grad_norm": 0.09282379898506365, | |
| "learning_rate": 3.906582306533418e-05, | |
| "loss": 0.0638, | |
| "num_input_tokens_seen": 442914960, | |
| "step": 1175, | |
| "train_runtime": 118455.494, | |
| "train_tokens_per_second": 3739.083 | |
| }, | |
| { | |
| "epoch": 1.1423211908507807, | |
| "grad_norm": 0.09929776152503765, | |
| "learning_rate": 3.8949197933537916e-05, | |
| "loss": 0.0689, | |
| "num_input_tokens_seen": 444832176, | |
| "step": 1180, | |
| "train_runtime": 119337.8677, | |
| "train_tokens_per_second": 3727.502 | |
| }, | |
| { | |
| "epoch": 1.1471620476824398, | |
| "grad_norm": 0.1011962511269963, | |
| "learning_rate": 3.883213032462617e-05, | |
| "loss": 0.0682, | |
| "num_input_tokens_seen": 446780448, | |
| "step": 1185, | |
| "train_runtime": 120254.4758, | |
| "train_tokens_per_second": 3715.292 | |
| }, | |
| { | |
| "epoch": 1.152002904514099, | |
| "grad_norm": 0.09174519283988156, | |
| "learning_rate": 3.871462395205531e-05, | |
| "loss": 0.071, | |
| "num_input_tokens_seen": 448550288, | |
| "step": 1190, | |
| "train_runtime": 121004.0348, | |
| "train_tokens_per_second": 3706.904 | |
| }, | |
| { | |
| "epoch": 1.1568437613457583, | |
| "grad_norm": 0.10621267015218695, | |
| "learning_rate": 3.8596682543199546e-05, | |
| "loss": 0.068, | |
| "num_input_tokens_seen": 450450928, | |
| "step": 1195, | |
| "train_runtime": 121845.7134, | |
| "train_tokens_per_second": 3696.896 | |
| }, | |
| { | |
| "epoch": 1.1616846181774174, | |
| "grad_norm": 0.09664344676790702, | |
| "learning_rate": 3.847830983923273e-05, | |
| "loss": 0.0687, | |
| "num_input_tokens_seen": 452419728, | |
| "step": 1200, | |
| "train_runtime": 122865.555, | |
| "train_tokens_per_second": 3682.234 | |
| }, | |
| { | |
| "epoch": 1.1665254750090766, | |
| "grad_norm": 0.09223910440789163, | |
| "learning_rate": 3.835950959500963e-05, | |
| "loss": 0.0719, | |
| "num_input_tokens_seen": 454259472, | |
| "step": 1205, | |
| "train_runtime": 123677.075, | |
| "train_tokens_per_second": 3672.948 | |
| }, | |
| { | |
| "epoch": 1.1713663318407357, | |
| "grad_norm": 0.10736751066204356, | |
| "learning_rate": 3.8240285578946904e-05, | |
| "loss": 0.069, | |
| "num_input_tokens_seen": 456165264, | |
| "step": 1210, | |
| "train_runtime": 124627.8308, | |
| "train_tokens_per_second": 3660.22 | |
| }, | |
| { | |
| "epoch": 1.176207188672395, | |
| "grad_norm": 0.09115327170920039, | |
| "learning_rate": 3.812064157290345e-05, | |
| "loss": 0.0666, | |
| "num_input_tokens_seen": 458044704, | |
| "step": 1215, | |
| "train_runtime": 125470.063, | |
| "train_tokens_per_second": 3650.629 | |
| }, | |
| { | |
| "epoch": 1.1810480455040542, | |
| "grad_norm": 0.10235191270929159, | |
| "learning_rate": 3.800058137206053e-05, | |
| "loss": 0.0666, | |
| "num_input_tokens_seen": 459916000, | |
| "step": 1220, | |
| "train_runtime": 126329.2794, | |
| "train_tokens_per_second": 3640.613 | |
| }, | |
| { | |
| "epoch": 1.1858889023357135, | |
| "grad_norm": 0.11706669148345339, | |
| "learning_rate": 3.788010878480139e-05, | |
| "loss": 0.0721, | |
| "num_input_tokens_seen": 461786304, | |
| "step": 1225, | |
| "train_runtime": 127251.258, | |
| "train_tokens_per_second": 3628.933 | |
| }, | |
| { | |
| "epoch": 1.1907297591673727, | |
| "grad_norm": 0.09895227308048286, | |
| "learning_rate": 3.775922763259038e-05, | |
| "loss": 0.0661, | |
| "num_input_tokens_seen": 463676192, | |
| "step": 1230, | |
| "train_runtime": 128161.1571, | |
| "train_tokens_per_second": 3617.915 | |
| }, | |
| { | |
| "epoch": 1.1955706159990318, | |
| "grad_norm": 0.11484504551320268, | |
| "learning_rate": 3.763794174985181e-05, | |
| "loss": 0.0705, | |
| "num_input_tokens_seen": 465602880, | |
| "step": 1235, | |
| "train_runtime": 129003.0382, | |
| "train_tokens_per_second": 3609.24 | |
| }, | |
| { | |
| "epoch": 1.200411472830691, | |
| "grad_norm": 0.08994605167832181, | |
| "learning_rate": 3.7516254983848277e-05, | |
| "loss": 0.0698, | |
| "num_input_tokens_seen": 467493856, | |
| "step": 1240, | |
| "train_runtime": 129744.0548, | |
| "train_tokens_per_second": 3603.201 | |
| }, | |
| { | |
| "epoch": 1.2052523296623503, | |
| "grad_norm": 0.10300174734565927, | |
| "learning_rate": 3.7394171194558646e-05, | |
| "loss": 0.0693, | |
| "num_input_tokens_seen": 469496768, | |
| "step": 1245, | |
| "train_runtime": 130459.977, | |
| "train_tokens_per_second": 3598.78 | |
| }, | |
| { | |
| "epoch": 1.2100931864940094, | |
| "grad_norm": 0.10675389319087088, | |
| "learning_rate": 3.727169425455562e-05, | |
| "loss": 0.0693, | |
| "num_input_tokens_seen": 471449056, | |
| "step": 1250, | |
| "train_runtime": 131010.3565, | |
| "train_tokens_per_second": 3598.563 | |
| }, | |
| { | |
| "epoch": 1.2149340433256686, | |
| "grad_norm": 0.09921256550808967, | |
| "learning_rate": 3.7148828048882857e-05, | |
| "loss": 0.0697, | |
| "num_input_tokens_seen": 473264848, | |
| "step": 1255, | |
| "train_runtime": 131606.2869, | |
| "train_tokens_per_second": 3596.066 | |
| }, | |
| { | |
| "epoch": 1.219774900157328, | |
| "grad_norm": 0.10462373380428001, | |
| "learning_rate": 3.702557647493177e-05, | |
| "loss": 0.069, | |
| "num_input_tokens_seen": 475144208, | |
| "step": 1260, | |
| "train_runtime": 132107.4645, | |
| "train_tokens_per_second": 3596.649 | |
| }, | |
| { | |
| "epoch": 1.224615756988987, | |
| "grad_norm": 0.10415516508324393, | |
| "learning_rate": 3.690194344231789e-05, | |
| "loss": 0.0671, | |
| "num_input_tokens_seen": 477118864, | |
| "step": 1265, | |
| "train_runtime": 132532.0609, | |
| "train_tokens_per_second": 3600.026 | |
| }, | |
| { | |
| "epoch": 1.2294566138206462, | |
| "grad_norm": 0.10859351584240985, | |
| "learning_rate": 3.677793287275687e-05, | |
| "loss": 0.0697, | |
| "num_input_tokens_seen": 478984544, | |
| "step": 1270, | |
| "train_runtime": 132834.8843, | |
| "train_tokens_per_second": 3605.864 | |
| }, | |
| { | |
| "epoch": 1.2342974706523056, | |
| "grad_norm": 0.10820393055282612, | |
| "learning_rate": 3.665354869994003e-05, | |
| "loss": 0.073, | |
| "num_input_tokens_seen": 480862384, | |
| "step": 1275, | |
| "train_runtime": 133150.7075, | |
| "train_tokens_per_second": 3611.414 | |
| }, | |
| { | |
| "epoch": 1.2391383274839647, | |
| "grad_norm": 0.09874866443912496, | |
| "learning_rate": 3.652879486940965e-05, | |
| "loss": 0.071, | |
| "num_input_tokens_seen": 482657712, | |
| "step": 1280, | |
| "train_runtime": 133461.2075, | |
| "train_tokens_per_second": 3616.464 | |
| }, | |
| { | |
| "epoch": 1.2439791843156238, | |
| "grad_norm": 0.1063684888653233, | |
| "learning_rate": 3.640367533843376e-05, | |
| "loss": 0.0694, | |
| "num_input_tokens_seen": 484527216, | |
| "step": 1285, | |
| "train_runtime": 133797.6333, | |
| "train_tokens_per_second": 3621.344 | |
| }, | |
| { | |
| "epoch": 1.2488200411472832, | |
| "grad_norm": 0.11664187966473348, | |
| "learning_rate": 3.6278194075880625e-05, | |
| "loss": 0.0664, | |
| "num_input_tokens_seen": 486424320, | |
| "step": 1290, | |
| "train_runtime": 134098.3231, | |
| "train_tokens_per_second": 3627.371 | |
| }, | |
| { | |
| "epoch": 1.2536608979789423, | |
| "grad_norm": 0.08949284897230625, | |
| "learning_rate": 3.615235506209288e-05, | |
| "loss": 0.0686, | |
| "num_input_tokens_seen": 488454832, | |
| "step": 1295, | |
| "train_runtime": 134472.7384, | |
| "train_tokens_per_second": 3632.371 | |
| }, | |
| { | |
| "epoch": 1.2585017548106014, | |
| "grad_norm": 0.08869637799926437, | |
| "learning_rate": 3.602616228876123e-05, | |
| "loss": 0.0616, | |
| "num_input_tokens_seen": 490406800, | |
| "step": 1300, | |
| "train_runtime": 134802.021, | |
| "train_tokens_per_second": 3637.978 | |
| }, | |
| { | |
| "epoch": 1.2633426116422606, | |
| "grad_norm": 0.1058762928159983, | |
| "learning_rate": 3.589961975879787e-05, | |
| "loss": 0.0675, | |
| "num_input_tokens_seen": 492319664, | |
| "step": 1305, | |
| "train_runtime": 135130.0473, | |
| "train_tokens_per_second": 3643.303 | |
| }, | |
| { | |
| "epoch": 1.26818346847392, | |
| "grad_norm": 0.10187344379977778, | |
| "learning_rate": 3.577273148620946e-05, | |
| "loss": 0.0693, | |
| "num_input_tokens_seen": 494216816, | |
| "step": 1310, | |
| "train_runtime": 135461.9383, | |
| "train_tokens_per_second": 3648.381 | |
| }, | |
| { | |
| "epoch": 1.273024325305579, | |
| "grad_norm": 0.10989990396916802, | |
| "learning_rate": 3.564550149596985e-05, | |
| "loss": 0.0699, | |
| "num_input_tokens_seen": 496046624, | |
| "step": 1315, | |
| "train_runtime": 135771.2575, | |
| "train_tokens_per_second": 3653.547 | |
| }, | |
| { | |
| "epoch": 1.2778651821372382, | |
| "grad_norm": 0.10349545274745546, | |
| "learning_rate": 3.5517933823892384e-05, | |
| "loss": 0.0755, | |
| "num_input_tokens_seen": 497876768, | |
| "step": 1320, | |
| "train_runtime": 136087.2124, | |
| "train_tokens_per_second": 3658.513 | |
| }, | |
| { | |
| "epoch": 1.2827060389688976, | |
| "grad_norm": 0.09370049601860225, | |
| "learning_rate": 3.539003251650188e-05, | |
| "loss": 0.0786, | |
| "num_input_tokens_seen": 499699664, | |
| "step": 1325, | |
| "train_runtime": 136409.5926, | |
| "train_tokens_per_second": 3663.23 | |
| }, | |
| { | |
| "epoch": 1.2875468958005567, | |
| "grad_norm": 0.09622928750516618, | |
| "learning_rate": 3.526180163090627e-05, | |
| "loss": 0.0683, | |
| "num_input_tokens_seen": 501536176, | |
| "step": 1330, | |
| "train_runtime": 136698.4884, | |
| "train_tokens_per_second": 3668.923 | |
| }, | |
| { | |
| "epoch": 1.2923877526322158, | |
| "grad_norm": 0.09942000608374545, | |
| "learning_rate": 3.5133245234667883e-05, | |
| "loss": 0.0688, | |
| "num_input_tokens_seen": 503368832, | |
| "step": 1335, | |
| "train_runtime": 137002.8399, | |
| "train_tokens_per_second": 3674.149 | |
| }, | |
| { | |
| "epoch": 1.297228609463875, | |
| "grad_norm": 0.10312231028530282, | |
| "learning_rate": 3.50043674056745e-05, | |
| "loss": 0.0675, | |
| "num_input_tokens_seen": 505269776, | |
| "step": 1340, | |
| "train_runtime": 137326.8406, | |
| "train_tokens_per_second": 3679.323 | |
| }, | |
| { | |
| "epoch": 1.3020694662955343, | |
| "grad_norm": 0.10706010123105417, | |
| "learning_rate": 3.48751722320099e-05, | |
| "loss": 0.0646, | |
| "num_input_tokens_seen": 507187600, | |
| "step": 1345, | |
| "train_runtime": 137656.0465, | |
| "train_tokens_per_second": 3684.456 | |
| }, | |
| { | |
| "epoch": 1.3069103231271935, | |
| "grad_norm": 0.11190774683646115, | |
| "learning_rate": 3.4745663811824234e-05, | |
| "loss": 0.0701, | |
| "num_input_tokens_seen": 509000368, | |
| "step": 1350, | |
| "train_runtime": 137932.7138, | |
| "train_tokens_per_second": 3690.208 | |
| }, | |
| { | |
| "epoch": 1.3117511799588528, | |
| "grad_norm": 0.09559260030218834, | |
| "learning_rate": 3.461584625320407e-05, | |
| "loss": 0.0672, | |
| "num_input_tokens_seen": 510812240, | |
| "step": 1355, | |
| "train_runtime": 138211.6321, | |
| "train_tokens_per_second": 3695.87 | |
| }, | |
| { | |
| "epoch": 1.316592036790512, | |
| "grad_norm": 0.10592839240696805, | |
| "learning_rate": 3.448572367404197e-05, | |
| "loss": 0.0753, | |
| "num_input_tokens_seen": 512638608, | |
| "step": 1360, | |
| "train_runtime": 138535.165, | |
| "train_tokens_per_second": 3700.422 | |
| }, | |
| { | |
| "epoch": 1.321432893622171, | |
| "grad_norm": 0.10547925559174358, | |
| "learning_rate": 3.435530020190598e-05, | |
| "loss": 0.0666, | |
| "num_input_tokens_seen": 514458464, | |
| "step": 1365, | |
| "train_runtime": 138805.6716, | |
| "train_tokens_per_second": 3706.322 | |
| }, | |
| { | |
| "epoch": 1.3262737504538302, | |
| "grad_norm": 0.10826688240565788, | |
| "learning_rate": 3.422457997390865e-05, | |
| "loss": 0.0675, | |
| "num_input_tokens_seen": 516280880, | |
| "step": 1370, | |
| "train_runtime": 139079.93, | |
| "train_tokens_per_second": 3712.116 | |
| }, | |
| { | |
| "epoch": 1.3311146072854896, | |
| "grad_norm": 0.10770450004766066, | |
| "learning_rate": 3.4093567136575794e-05, | |
| "loss": 0.0726, | |
| "num_input_tokens_seen": 518154288, | |
| "step": 1375, | |
| "train_runtime": 139387.3129, | |
| "train_tokens_per_second": 3717.371 | |
| }, | |
| { | |
| "epoch": 1.3359554641171487, | |
| "grad_norm": 0.1142302841713288, | |
| "learning_rate": 3.396226584571499e-05, | |
| "loss": 0.0747, | |
| "num_input_tokens_seen": 519986144, | |
| "step": 1380, | |
| "train_runtime": 139690.3762, | |
| "train_tokens_per_second": 3722.419 | |
| }, | |
| { | |
| "epoch": 1.340796320948808, | |
| "grad_norm": 0.1002246744241518, | |
| "learning_rate": 3.383068026628371e-05, | |
| "loss": 0.07, | |
| "num_input_tokens_seen": 521862240, | |
| "step": 1385, | |
| "train_runtime": 139987.7519, | |
| "train_tokens_per_second": 3727.914 | |
| }, | |
| { | |
| "epoch": 1.3456371777804672, | |
| "grad_norm": 0.09964010260687982, | |
| "learning_rate": 3.3698814572257284e-05, | |
| "loss": 0.0665, | |
| "num_input_tokens_seen": 523858656, | |
| "step": 1390, | |
| "train_runtime": 140338.2763, | |
| "train_tokens_per_second": 3732.828 | |
| }, | |
| { | |
| "epoch": 1.3504780346121263, | |
| "grad_norm": 0.09090502017139336, | |
| "learning_rate": 3.356667294649639e-05, | |
| "loss": 0.0675, | |
| "num_input_tokens_seen": 525774560, | |
| "step": 1395, | |
| "train_runtime": 140671.1613, | |
| "train_tokens_per_second": 3737.614 | |
| }, | |
| { | |
| "epoch": 1.3553188914437855, | |
| "grad_norm": 0.10302633457655123, | |
| "learning_rate": 3.343425958061447e-05, | |
| "loss": 0.0662, | |
| "num_input_tokens_seen": 527681344, | |
| "step": 1400, | |
| "train_runtime": 140994.6669, | |
| "train_tokens_per_second": 3742.562 | |
| }, | |
| { | |
| "epoch": 1.3601597482754448, | |
| "grad_norm": 0.09470780975547861, | |
| "learning_rate": 3.330157867484472e-05, | |
| "loss": 0.0717, | |
| "num_input_tokens_seen": 529406736, | |
| "step": 1405, | |
| "train_runtime": 141236.0937, | |
| "train_tokens_per_second": 3748.381 | |
| }, | |
| { | |
| "epoch": 1.365000605107104, | |
| "grad_norm": 0.08585499478944604, | |
| "learning_rate": 3.3168634437906865e-05, | |
| "loss": 0.0662, | |
| "num_input_tokens_seen": 531305872, | |
| "step": 1410, | |
| "train_runtime": 141547.5703, | |
| "train_tokens_per_second": 3753.55 | |
| }, | |
| { | |
| "epoch": 1.369841461938763, | |
| "grad_norm": 0.10732846763853142, | |
| "learning_rate": 3.303543108687364e-05, | |
| "loss": 0.0715, | |
| "num_input_tokens_seen": 533037392, | |
| "step": 1415, | |
| "train_runtime": 141806.9679, | |
| "train_tokens_per_second": 3758.894 | |
| }, | |
| { | |
| "epoch": 1.3746823187704225, | |
| "grad_norm": 0.09819540974635672, | |
| "learning_rate": 3.290197284703707e-05, | |
| "loss": 0.0687, | |
| "num_input_tokens_seen": 535085280, | |
| "step": 1420, | |
| "train_runtime": 142243.4525, | |
| "train_tokens_per_second": 3761.757 | |
| }, | |
| { | |
| "epoch": 1.3795231756020816, | |
| "grad_norm": 0.09528337700443731, | |
| "learning_rate": 3.276826395177438e-05, | |
| "loss": 0.0728, | |
| "num_input_tokens_seen": 536966576, | |
| "step": 1425, | |
| "train_runtime": 142532.1226, | |
| "train_tokens_per_second": 3767.337 | |
| }, | |
| { | |
| "epoch": 1.3843640324337407, | |
| "grad_norm": 0.09402247417532383, | |
| "learning_rate": 3.263430864241376e-05, | |
| "loss": 0.068, | |
| "num_input_tokens_seen": 538829216, | |
| "step": 1430, | |
| "train_runtime": 142855.1544, | |
| "train_tokens_per_second": 3771.857 | |
| }, | |
| { | |
| "epoch": 1.3892048892653999, | |
| "grad_norm": 0.09890632658294073, | |
| "learning_rate": 3.250011116809978e-05, | |
| "loss": 0.0678, | |
| "num_input_tokens_seen": 540632144, | |
| "step": 1435, | |
| "train_runtime": 143161.7888, | |
| "train_tokens_per_second": 3776.372 | |
| }, | |
| { | |
| "epoch": 1.3940457460970592, | |
| "grad_norm": 0.08589478003550063, | |
| "learning_rate": 3.236567578565867e-05, | |
| "loss": 0.0693, | |
| "num_input_tokens_seen": 542432288, | |
| "step": 1440, | |
| "train_runtime": 143420.8223, | |
| "train_tokens_per_second": 3782.103 | |
| }, | |
| { | |
| "epoch": 1.3988866029287184, | |
| "grad_norm": 0.09609704465838043, | |
| "learning_rate": 3.223100675946321e-05, | |
| "loss": 0.0655, | |
| "num_input_tokens_seen": 544403440, | |
| "step": 1445, | |
| "train_runtime": 143809.3419, | |
| "train_tokens_per_second": 3785.592 | |
| }, | |
| { | |
| "epoch": 1.4037274597603777, | |
| "grad_norm": 0.08644550430034957, | |
| "learning_rate": 3.209610836129755e-05, | |
| "loss": 0.0657, | |
| "num_input_tokens_seen": 546192304, | |
| "step": 1450, | |
| "train_runtime": 144057.4772, | |
| "train_tokens_per_second": 3791.489 | |
| }, | |
| { | |
| "epoch": 1.4085683165920369, | |
| "grad_norm": 0.10119133528568032, | |
| "learning_rate": 3.1960984870221596e-05, | |
| "loss": 0.065, | |
| "num_input_tokens_seen": 548030912, | |
| "step": 1455, | |
| "train_runtime": 144334.6546, | |
| "train_tokens_per_second": 3796.946 | |
| }, | |
| { | |
| "epoch": 1.413409173423696, | |
| "grad_norm": 0.09755652421098708, | |
| "learning_rate": 3.1825640572435394e-05, | |
| "loss": 0.067, | |
| "num_input_tokens_seen": 549968368, | |
| "step": 1460, | |
| "train_runtime": 144647.7235, | |
| "train_tokens_per_second": 3802.123 | |
| }, | |
| { | |
| "epoch": 1.4182500302553551, | |
| "grad_norm": 0.09202887102207528, | |
| "learning_rate": 3.169007976114311e-05, | |
| "loss": 0.0633, | |
| "num_input_tokens_seen": 551997376, | |
| "step": 1465, | |
| "train_runtime": 145066.4375, | |
| "train_tokens_per_second": 3805.135 | |
| }, | |
| { | |
| "epoch": 1.4230908870870145, | |
| "grad_norm": 0.09893125478101567, | |
| "learning_rate": 3.155430673641681e-05, | |
| "loss": 0.069, | |
| "num_input_tokens_seen": 553815552, | |
| "step": 1470, | |
| "train_runtime": 145350.8539, | |
| "train_tokens_per_second": 3810.198 | |
| }, | |
| { | |
| "epoch": 1.4279317439186736, | |
| "grad_norm": 0.11231020588498852, | |
| "learning_rate": 3.1418325805060126e-05, | |
| "loss": 0.0713, | |
| "num_input_tokens_seen": 555719392, | |
| "step": 1475, | |
| "train_runtime": 145699.1273, | |
| "train_tokens_per_second": 3814.157 | |
| }, | |
| { | |
| "epoch": 1.4327726007503327, | |
| "grad_norm": 0.10568451565662428, | |
| "learning_rate": 3.1282141280471645e-05, | |
| "loss": 0.069, | |
| "num_input_tokens_seen": 557504016, | |
| "step": 1480, | |
| "train_runtime": 145974.2182, | |
| "train_tokens_per_second": 3819.195 | |
| }, | |
| { | |
| "epoch": 1.437613457581992, | |
| "grad_norm": 0.09645253967773308, | |
| "learning_rate": 3.114575748250801e-05, | |
| "loss": 0.0678, | |
| "num_input_tokens_seen": 559452496, | |
| "step": 1485, | |
| "train_runtime": 146328.064, | |
| "train_tokens_per_second": 3823.275 | |
| }, | |
| { | |
| "epoch": 1.4424543144136512, | |
| "grad_norm": 0.09105344674720453, | |
| "learning_rate": 3.100917873734696e-05, | |
| "loss": 0.0648, | |
| "num_input_tokens_seen": 561337136, | |
| "step": 1490, | |
| "train_runtime": 146656.3116, | |
| "train_tokens_per_second": 3827.569 | |
| }, | |
| { | |
| "epoch": 1.4472951712453104, | |
| "grad_norm": 0.09122782894664445, | |
| "learning_rate": 3.087240937735008e-05, | |
| "loss": 0.0682, | |
| "num_input_tokens_seen": 563237008, | |
| "step": 1495, | |
| "train_runtime": 146953.3037, | |
| "train_tokens_per_second": 3832.762 | |
| }, | |
| { | |
| "epoch": 1.4521360280769695, | |
| "grad_norm": 0.10094808912146574, | |
| "learning_rate": 3.073545374092535e-05, | |
| "loss": 0.07, | |
| "num_input_tokens_seen": 565056704, | |
| "step": 1500, | |
| "train_runtime": 147252.9238, | |
| "train_tokens_per_second": 3837.321 | |
| }, | |
| { | |
| "epoch": 1.4569768849086289, | |
| "grad_norm": 0.09898552497334528, | |
| "learning_rate": 3.05983161723896e-05, | |
| "loss": 0.0653, | |
| "num_input_tokens_seen": 566984576, | |
| "step": 1505, | |
| "train_runtime": 147575.0281, | |
| "train_tokens_per_second": 3842.009 | |
| }, | |
| { | |
| "epoch": 1.461817741740288, | |
| "grad_norm": 0.0893045303914291, | |
| "learning_rate": 3.046100102183061e-05, | |
| "loss": 0.0664, | |
| "num_input_tokens_seen": 568931872, | |
| "step": 1510, | |
| "train_runtime": 147890.4027, | |
| "train_tokens_per_second": 3846.983 | |
| }, | |
| { | |
| "epoch": 1.4666585985719474, | |
| "grad_norm": 0.07794027254932458, | |
| "learning_rate": 3.0323512644969194e-05, | |
| "loss": 0.0644, | |
| "num_input_tokens_seen": 570766304, | |
| "step": 1515, | |
| "train_runtime": 148153.9685, | |
| "train_tokens_per_second": 3852.521 | |
| }, | |
| { | |
| "epoch": 1.4714994554036065, | |
| "grad_norm": 0.09387987802719616, | |
| "learning_rate": 3.0185855403021013e-05, | |
| "loss": 0.0697, | |
| "num_input_tokens_seen": 572692384, | |
| "step": 1520, | |
| "train_runtime": 148490.2566, | |
| "train_tokens_per_second": 3856.767 | |
| }, | |
| { | |
| "epoch": 1.4763403122352656, | |
| "grad_norm": 0.10537208908005448, | |
| "learning_rate": 3.0048033662558222e-05, | |
| "loss": 0.0731, | |
| "num_input_tokens_seen": 574478256, | |
| "step": 1525, | |
| "train_runtime": 148778.7811, | |
| "train_tokens_per_second": 3861.292 | |
| }, | |
| { | |
| "epoch": 1.4811811690669248, | |
| "grad_norm": 0.09520737006467728, | |
| "learning_rate": 2.9910051795370974e-05, | |
| "loss": 0.068, | |
| "num_input_tokens_seen": 576337936, | |
| "step": 1530, | |
| "train_runtime": 149086.1454, | |
| "train_tokens_per_second": 3865.805 | |
| }, | |
| { | |
| "epoch": 1.4860220258985841, | |
| "grad_norm": 0.09478631735802089, | |
| "learning_rate": 2.977191417832874e-05, | |
| "loss": 0.0663, | |
| "num_input_tokens_seen": 578104656, | |
| "step": 1535, | |
| "train_runtime": 149330.6478, | |
| "train_tokens_per_second": 3871.306 | |
| }, | |
| { | |
| "epoch": 1.4908628827302433, | |
| "grad_norm": 0.10196404573493925, | |
| "learning_rate": 2.9633625193241475e-05, | |
| "loss": 0.0674, | |
| "num_input_tokens_seen": 579956016, | |
| "step": 1540, | |
| "train_runtime": 149621.4808, | |
| "train_tokens_per_second": 3876.155 | |
| }, | |
| { | |
| "epoch": 1.4957037395619024, | |
| "grad_norm": 0.10883093122161902, | |
| "learning_rate": 2.9495189226720615e-05, | |
| "loss": 0.0694, | |
| "num_input_tokens_seen": 581873728, | |
| "step": 1545, | |
| "train_runtime": 149975.2694, | |
| "train_tokens_per_second": 3879.798 | |
| }, | |
| { | |
| "epoch": 1.5005445963935617, | |
| "grad_norm": 0.09938067661414723, | |
| "learning_rate": 2.935661067003994e-05, | |
| "loss": 0.0689, | |
| "num_input_tokens_seen": 583722192, | |
| "step": 1550, | |
| "train_runtime": 150285.3755, | |
| "train_tokens_per_second": 3884.092 | |
| }, | |
| { | |
| "epoch": 1.5053854532252209, | |
| "grad_norm": 0.10557220766786837, | |
| "learning_rate": 2.9217893918996285e-05, | |
| "loss": 0.0711, | |
| "num_input_tokens_seen": 585531280, | |
| "step": 1555, | |
| "train_runtime": 150574.9739, | |
| "train_tokens_per_second": 3888.636 | |
| }, | |
| { | |
| "epoch": 1.51022631005688, | |
| "grad_norm": 0.09792652092850067, | |
| "learning_rate": 2.9079043373770088e-05, | |
| "loss": 0.0657, | |
| "num_input_tokens_seen": 587465120, | |
| "step": 1560, | |
| "train_runtime": 150916.7039, | |
| "train_tokens_per_second": 3892.645 | |
| }, | |
| { | |
| "epoch": 1.5150671668885392, | |
| "grad_norm": 0.10432359138697463, | |
| "learning_rate": 2.8940063438785808e-05, | |
| "loss": 0.065, | |
| "num_input_tokens_seen": 589312272, | |
| "step": 1565, | |
| "train_runtime": 151233.2823, | |
| "train_tokens_per_second": 3896.71 | |
| }, | |
| { | |
| "epoch": 1.5199080237201985, | |
| "grad_norm": 0.10464650554060877, | |
| "learning_rate": 2.8800958522572246e-05, | |
| "loss": 0.0669, | |
| "num_input_tokens_seen": 591224496, | |
| "step": 1570, | |
| "train_runtime": 151542.6129, | |
| "train_tokens_per_second": 3901.375 | |
| }, | |
| { | |
| "epoch": 1.5247488805518576, | |
| "grad_norm": 0.08982894126025302, | |
| "learning_rate": 2.866173303762268e-05, | |
| "loss": 0.0703, | |
| "num_input_tokens_seen": 593094000, | |
| "step": 1575, | |
| "train_runtime": 151870.8088, | |
| "train_tokens_per_second": 3905.253 | |
| }, | |
| { | |
| "epoch": 1.529589737383517, | |
| "grad_norm": 0.09170883604725243, | |
| "learning_rate": 2.8522391400254887e-05, | |
| "loss": 0.0678, | |
| "num_input_tokens_seen": 595038832, | |
| "step": 1580, | |
| "train_runtime": 152202.7319, | |
| "train_tokens_per_second": 3909.515 | |
| }, | |
| { | |
| "epoch": 1.5344305942151761, | |
| "grad_norm": 0.10514358953867606, | |
| "learning_rate": 2.8382938030471112e-05, | |
| "loss": 0.0697, | |
| "num_input_tokens_seen": 596824960, | |
| "step": 1585, | |
| "train_runtime": 152452.6889, | |
| "train_tokens_per_second": 3914.821 | |
| }, | |
| { | |
| "epoch": 1.5392714510468353, | |
| "grad_norm": 0.10505102469174953, | |
| "learning_rate": 2.8243377351817755e-05, | |
| "loss": 0.0689, | |
| "num_input_tokens_seen": 598705200, | |
| "step": 1590, | |
| "train_runtime": 152787.4796, | |
| "train_tokens_per_second": 3918.549 | |
| }, | |
| { | |
| "epoch": 1.5441123078784944, | |
| "grad_norm": 0.09749265072972622, | |
| "learning_rate": 2.8103713791245178e-05, | |
| "loss": 0.0612, | |
| "num_input_tokens_seen": 600600592, | |
| "step": 1595, | |
| "train_runtime": 153095.0077, | |
| "train_tokens_per_second": 3923.058 | |
| }, | |
| { | |
| "epoch": 1.5489531647101535, | |
| "grad_norm": 0.09875360040532327, | |
| "learning_rate": 2.7963951778967197e-05, | |
| "loss": 0.0681, | |
| "num_input_tokens_seen": 602389504, | |
| "step": 1600, | |
| "train_runtime": 153336.2956, | |
| "train_tokens_per_second": 3928.551 | |
| }, | |
| { | |
| "epoch": 1.553794021541813, | |
| "grad_norm": 0.0943003245484352, | |
| "learning_rate": 2.7824095748320568e-05, | |
| "loss": 0.0697, | |
| "num_input_tokens_seen": 604104288, | |
| "step": 1605, | |
| "train_runtime": 153582.7315, | |
| "train_tokens_per_second": 3933.413 | |
| }, | |
| { | |
| "epoch": 1.5586348783734723, | |
| "grad_norm": 0.08913383289373572, | |
| "learning_rate": 2.7684150135624376e-05, | |
| "loss": 0.0701, | |
| "num_input_tokens_seen": 605951504, | |
| "step": 1610, | |
| "train_runtime": 153880.8942, | |
| "train_tokens_per_second": 3937.796 | |
| }, | |
| { | |
| "epoch": 1.5634757352051314, | |
| "grad_norm": 0.09806492578910929, | |
| "learning_rate": 2.7544119380039314e-05, | |
| "loss": 0.0661, | |
| "num_input_tokens_seen": 607990208, | |
| "step": 1615, | |
| "train_runtime": 154299.517, | |
| "train_tokens_per_second": 3940.325 | |
| }, | |
| { | |
| "epoch": 1.5683165920367905, | |
| "grad_norm": 0.09468491378509171, | |
| "learning_rate": 2.740400792342685e-05, | |
| "loss": 0.0699, | |
| "num_input_tokens_seen": 609909232, | |
| "step": 1620, | |
| "train_runtime": 154619.4097, | |
| "train_tokens_per_second": 3944.584 | |
| }, | |
| { | |
| "epoch": 1.5731574488684497, | |
| "grad_norm": 0.11209084242527069, | |
| "learning_rate": 2.726382021020833e-05, | |
| "loss": 0.0718, | |
| "num_input_tokens_seen": 611736000, | |
| "step": 1625, | |
| "train_runtime": 154922.3873, | |
| "train_tokens_per_second": 3948.661 | |
| }, | |
| { | |
| "epoch": 1.5779983057001088, | |
| "grad_norm": 0.08630941935012056, | |
| "learning_rate": 2.7123560687224013e-05, | |
| "loss": 0.0665, | |
| "num_input_tokens_seen": 613559536, | |
| "step": 1630, | |
| "train_runtime": 155208.4781, | |
| "train_tokens_per_second": 3953.132 | |
| }, | |
| { | |
| "epoch": 1.5828391625317682, | |
| "grad_norm": 0.0933989919648124, | |
| "learning_rate": 2.6983233803592022e-05, | |
| "loss": 0.0664, | |
| "num_input_tokens_seen": 615391184, | |
| "step": 1635, | |
| "train_runtime": 155485.9117, | |
| "train_tokens_per_second": 3957.858 | |
| }, | |
| { | |
| "epoch": 1.5876800193634273, | |
| "grad_norm": 0.08973133951027497, | |
| "learning_rate": 2.6842844010567177e-05, | |
| "loss": 0.0734, | |
| "num_input_tokens_seen": 617305296, | |
| "step": 1640, | |
| "train_runtime": 155858.1926, | |
| "train_tokens_per_second": 3960.686 | |
| }, | |
| { | |
| "epoch": 1.5925208761950866, | |
| "grad_norm": 0.0933153070243224, | |
| "learning_rate": 2.6702395761399844e-05, | |
| "loss": 0.0674, | |
| "num_input_tokens_seen": 619107456, | |
| "step": 1645, | |
| "train_runtime": 156128.9329, | |
| "train_tokens_per_second": 3965.36 | |
| }, | |
| { | |
| "epoch": 1.5973617330267458, | |
| "grad_norm": 0.10662169623290256, | |
| "learning_rate": 2.656189351119463e-05, | |
| "loss": 0.0705, | |
| "num_input_tokens_seen": 620950304, | |
| "step": 1650, | |
| "train_runtime": 156466.5875, | |
| "train_tokens_per_second": 3968.581 | |
| }, | |
| { | |
| "epoch": 1.602202589858405, | |
| "grad_norm": 0.10372114619240043, | |
| "learning_rate": 2.6421341716769112e-05, | |
| "loss": 0.0692, | |
| "num_input_tokens_seen": 622796352, | |
| "step": 1655, | |
| "train_runtime": 156767.9586, | |
| "train_tokens_per_second": 3972.727 | |
| }, | |
| { | |
| "epoch": 1.607043446690064, | |
| "grad_norm": 0.09132496975982439, | |
| "learning_rate": 2.6280744836512423e-05, | |
| "loss": 0.0666, | |
| "num_input_tokens_seen": 624698480, | |
| "step": 1660, | |
| "train_runtime": 157088.7012, | |
| "train_tokens_per_second": 3976.724 | |
| }, | |
| { | |
| "epoch": 1.6118843035217232, | |
| "grad_norm": 0.09728380085896536, | |
| "learning_rate": 2.6140107330243858e-05, | |
| "loss": 0.0696, | |
| "num_input_tokens_seen": 626526800, | |
| "step": 1665, | |
| "train_runtime": 157381.2207, | |
| "train_tokens_per_second": 3980.95 | |
| }, | |
| { | |
| "epoch": 1.6167251603533825, | |
| "grad_norm": 0.10083788994513473, | |
| "learning_rate": 2.599943365907138e-05, | |
| "loss": 0.0708, | |
| "num_input_tokens_seen": 628340752, | |
| "step": 1670, | |
| "train_runtime": 157653.9021, | |
| "train_tokens_per_second": 3985.571 | |
| }, | |
| { | |
| "epoch": 1.621566017185042, | |
| "grad_norm": 0.09206012606935819, | |
| "learning_rate": 2.5858728285250156e-05, | |
| "loss": 0.0628, | |
| "num_input_tokens_seen": 630294736, | |
| "step": 1675, | |
| "train_runtime": 157993.6687, | |
| "train_tokens_per_second": 3989.367 | |
| }, | |
| { | |
| "epoch": 1.626406874016701, | |
| "grad_norm": 0.11407194887653826, | |
| "learning_rate": 2.5717995672040934e-05, | |
| "loss": 0.0668, | |
| "num_input_tokens_seen": 632181360, | |
| "step": 1680, | |
| "train_runtime": 158306.6385, | |
| "train_tokens_per_second": 3993.398 | |
| }, | |
| { | |
| "epoch": 1.6312477308483602, | |
| "grad_norm": 0.10532893116463045, | |
| "learning_rate": 2.5577240283568547e-05, | |
| "loss": 0.0672, | |
| "num_input_tokens_seen": 634139120, | |
| "step": 1685, | |
| "train_runtime": 158659.1124, | |
| "train_tokens_per_second": 3996.865 | |
| }, | |
| { | |
| "epoch": 1.6360885876800193, | |
| "grad_norm": 0.09253705708958208, | |
| "learning_rate": 2.5436466584680257e-05, | |
| "loss": 0.065, | |
| "num_input_tokens_seen": 636061136, | |
| "step": 1690, | |
| "train_runtime": 158996.3128, | |
| "train_tokens_per_second": 4000.477 | |
| }, | |
| { | |
| "epoch": 1.6409294445116784, | |
| "grad_norm": 0.10270850201788718, | |
| "learning_rate": 2.529567904080416e-05, | |
| "loss": 0.0679, | |
| "num_input_tokens_seen": 637901488, | |
| "step": 1695, | |
| "train_runtime": 159300.0136, | |
| "train_tokens_per_second": 4004.403 | |
| }, | |
| { | |
| "epoch": 1.6457703013433378, | |
| "grad_norm": 0.1020950381213344, | |
| "learning_rate": 2.5154882117807503e-05, | |
| "loss": 0.0674, | |
| "num_input_tokens_seen": 639783152, | |
| "step": 1700, | |
| "train_runtime": 159641.5796, | |
| "train_tokens_per_second": 4007.622 | |
| }, | |
| { | |
| "epoch": 1.650611158174997, | |
| "grad_norm": 0.10434335557369012, | |
| "learning_rate": 2.5014080281855062e-05, | |
| "loss": 0.0718, | |
| "num_input_tokens_seen": 641601136, | |
| "step": 1705, | |
| "train_runtime": 159948.6783, | |
| "train_tokens_per_second": 4011.294 | |
| }, | |
| { | |
| "epoch": 1.6554520150066563, | |
| "grad_norm": 0.09269120806836094, | |
| "learning_rate": 2.4873277999267443e-05, | |
| "loss": 0.0682, | |
| "num_input_tokens_seen": 643464448, | |
| "step": 1710, | |
| "train_runtime": 160228.7859, | |
| "train_tokens_per_second": 4015.91 | |
| }, | |
| { | |
| "epoch": 1.6602928718383154, | |
| "grad_norm": 0.09545551967743877, | |
| "learning_rate": 2.473247973637942e-05, | |
| "loss": 0.0668, | |
| "num_input_tokens_seen": 645403984, | |
| "step": 1715, | |
| "train_runtime": 160573.5465, | |
| "train_tokens_per_second": 4019.367 | |
| }, | |
| { | |
| "epoch": 1.6651337286699746, | |
| "grad_norm": 0.09245805131119629, | |
| "learning_rate": 2.459168995939827e-05, | |
| "loss": 0.063, | |
| "num_input_tokens_seen": 647359520, | |
| "step": 1720, | |
| "train_runtime": 160916.6879, | |
| "train_tokens_per_second": 4022.948 | |
| }, | |
| { | |
| "epoch": 1.6699745855016337, | |
| "grad_norm": 0.1130522027501556, | |
| "learning_rate": 2.4450913134262077e-05, | |
| "loss": 0.0698, | |
| "num_input_tokens_seen": 649221056, | |
| "step": 1725, | |
| "train_runtime": 161212.5554, | |
| "train_tokens_per_second": 4027.112 | |
| }, | |
| { | |
| "epoch": 1.674815442333293, | |
| "grad_norm": 0.1001979401813176, | |
| "learning_rate": 2.4310153726498107e-05, | |
| "loss": 0.0713, | |
| "num_input_tokens_seen": 651074128, | |
| "step": 1730, | |
| "train_runtime": 161567.7558, | |
| "train_tokens_per_second": 4029.728 | |
| }, | |
| { | |
| "epoch": 1.6796562991649522, | |
| "grad_norm": 0.09664199798231568, | |
| "learning_rate": 2.4169416201081102e-05, | |
| "loss": 0.0697, | |
| "num_input_tokens_seen": 652978208, | |
| "step": 1735, | |
| "train_runtime": 161942.97, | |
| "train_tokens_per_second": 4032.149 | |
| }, | |
| { | |
| "epoch": 1.6844971559966115, | |
| "grad_norm": 0.10551245269293205, | |
| "learning_rate": 2.4028705022291728e-05, | |
| "loss": 0.0661, | |
| "num_input_tokens_seen": 654910336, | |
| "step": 1740, | |
| "train_runtime": 162300.8039, | |
| "train_tokens_per_second": 4035.164 | |
| }, | |
| { | |
| "epoch": 1.6893380128282707, | |
| "grad_norm": 0.0926086339529829, | |
| "learning_rate": 2.3888024653574865e-05, | |
| "loss": 0.0654, | |
| "num_input_tokens_seen": 656834320, | |
| "step": 1745, | |
| "train_runtime": 162632.724, | |
| "train_tokens_per_second": 4038.759 | |
| }, | |
| { | |
| "epoch": 1.6941788696599298, | |
| "grad_norm": 0.0997990172837255, | |
| "learning_rate": 2.374737955739814e-05, | |
| "loss": 0.0675, | |
| "num_input_tokens_seen": 658744624, | |
| "step": 1750, | |
| "train_runtime": 162977.7639, | |
| "train_tokens_per_second": 4041.929 | |
| }, | |
| { | |
| "epoch": 1.699019726491589, | |
| "grad_norm": 0.09727233240178633, | |
| "learning_rate": 2.3606774195110256e-05, | |
| "loss": 0.0688, | |
| "num_input_tokens_seen": 660657152, | |
| "step": 1755, | |
| "train_runtime": 163311.2124, | |
| "train_tokens_per_second": 4045.388 | |
| }, | |
| { | |
| "epoch": 1.703860583323248, | |
| "grad_norm": 0.09186313556651109, | |
| "learning_rate": 2.346621302679957e-05, | |
| "loss": 0.0626, | |
| "num_input_tokens_seen": 662618160, | |
| "step": 1760, | |
| "train_runtime": 163650.2492, | |
| "train_tokens_per_second": 4048.99 | |
| }, | |
| { | |
| "epoch": 1.7087014401549074, | |
| "grad_norm": 0.09815622832004638, | |
| "learning_rate": 2.3325700511152572e-05, | |
| "loss": 0.0688, | |
| "num_input_tokens_seen": 664466176, | |
| "step": 1765, | |
| "train_runtime": 163980.8046, | |
| "train_tokens_per_second": 4052.097 | |
| }, | |
| { | |
| "epoch": 1.7135422969865666, | |
| "grad_norm": 0.09851131213295354, | |
| "learning_rate": 2.3185241105312435e-05, | |
| "loss": 0.0644, | |
| "num_input_tokens_seen": 666303008, | |
| "step": 1770, | |
| "train_runtime": 164258.4151, | |
| "train_tokens_per_second": 4056.431 | |
| }, | |
| { | |
| "epoch": 1.718383153818226, | |
| "grad_norm": 0.09650945128005722, | |
| "learning_rate": 2.3044839264737695e-05, | |
| "loss": 0.0654, | |
| "num_input_tokens_seen": 668249808, | |
| "step": 1775, | |
| "train_runtime": 164606.7007, | |
| "train_tokens_per_second": 4059.676 | |
| }, | |
| { | |
| "epoch": 1.723224010649885, | |
| "grad_norm": 0.07976228580722033, | |
| "learning_rate": 2.2904499443060852e-05, | |
| "loss": 0.0618, | |
| "num_input_tokens_seen": 670142528, | |
| "step": 1780, | |
| "train_runtime": 164916.446, | |
| "train_tokens_per_second": 4063.528 | |
| }, | |
| { | |
| "epoch": 1.7280648674815442, | |
| "grad_norm": 0.09316901052660637, | |
| "learning_rate": 2.2764226091947153e-05, | |
| "loss": 0.0705, | |
| "num_input_tokens_seen": 671892176, | |
| "step": 1785, | |
| "train_runtime": 165204.9592, | |
| "train_tokens_per_second": 4067.022 | |
| }, | |
| { | |
| "epoch": 1.7329057243132033, | |
| "grad_norm": 0.10554377156841822, | |
| "learning_rate": 2.2624023660953322e-05, | |
| "loss": 0.0694, | |
| "num_input_tokens_seen": 673790848, | |
| "step": 1790, | |
| "train_runtime": 165555.898, | |
| "train_tokens_per_second": 4069.869 | |
| }, | |
| { | |
| "epoch": 1.7377465811448627, | |
| "grad_norm": 0.1000010423520895, | |
| "learning_rate": 2.2483896597386506e-05, | |
| "loss": 0.0672, | |
| "num_input_tokens_seen": 675763984, | |
| "step": 1795, | |
| "train_runtime": 165926.0539, | |
| "train_tokens_per_second": 4072.682 | |
| }, | |
| { | |
| "epoch": 1.7425874379765218, | |
| "grad_norm": 0.09335668641339016, | |
| "learning_rate": 2.2343849346163092e-05, | |
| "loss": 0.0664, | |
| "num_input_tokens_seen": 677686256, | |
| "step": 1800, | |
| "train_runtime": 166242.0359, | |
| "train_tokens_per_second": 4076.504 | |
| }, | |
| { | |
| "epoch": 1.7474282948081812, | |
| "grad_norm": 0.09333896876761687, | |
| "learning_rate": 2.2203886349667826e-05, | |
| "loss": 0.067, | |
| "num_input_tokens_seen": 679548000, | |
| "step": 1805, | |
| "train_runtime": 166529.1496, | |
| "train_tokens_per_second": 4080.655 | |
| }, | |
| { | |
| "epoch": 1.7522691516398403, | |
| "grad_norm": 0.11018536147162415, | |
| "learning_rate": 2.2064012047612796e-05, | |
| "loss": 0.0674, | |
| "num_input_tokens_seen": 681323824, | |
| "step": 1810, | |
| "train_runtime": 166764.5916, | |
| "train_tokens_per_second": 4085.542 | |
| }, | |
| { | |
| "epoch": 1.7571100084714995, | |
| "grad_norm": 0.09634536795886925, | |
| "learning_rate": 2.1924230876896684e-05, | |
| "loss": 0.0639, | |
| "num_input_tokens_seen": 683289152, | |
| "step": 1815, | |
| "train_runtime": 167115.3972, | |
| "train_tokens_per_second": 4088.726 | |
| }, | |
| { | |
| "epoch": 1.7619508653031586, | |
| "grad_norm": 0.079098965621037, | |
| "learning_rate": 2.1784547271463966e-05, | |
| "loss": 0.063, | |
| "num_input_tokens_seen": 685248960, | |
| "step": 1820, | |
| "train_runtime": 167461.2987, | |
| "train_tokens_per_second": 4091.984 | |
| }, | |
| { | |
| "epoch": 1.7667917221348177, | |
| "grad_norm": 0.09262520602448539, | |
| "learning_rate": 2.164496566216428e-05, | |
| "loss": 0.0658, | |
| "num_input_tokens_seen": 687075264, | |
| "step": 1825, | |
| "train_runtime": 167728.4786, | |
| "train_tokens_per_second": 4096.354 | |
| }, | |
| { | |
| "epoch": 1.771632578966477, | |
| "grad_norm": 0.08425246771738228, | |
| "learning_rate": 2.150549047661193e-05, | |
| "loss": 0.0706, | |
| "num_input_tokens_seen": 688930992, | |
| "step": 1830, | |
| "train_runtime": 168041.5415, | |
| "train_tokens_per_second": 4099.766 | |
| }, | |
| { | |
| "epoch": 1.7764734357981364, | |
| "grad_norm": 0.08261309626975141, | |
| "learning_rate": 2.136612613904533e-05, | |
| "loss": 0.0633, | |
| "num_input_tokens_seen": 690736016, | |
| "step": 1835, | |
| "train_runtime": 168299.5399, | |
| "train_tokens_per_second": 4104.206 | |
| }, | |
| { | |
| "epoch": 1.7813142926297956, | |
| "grad_norm": 0.08439774632959215, | |
| "learning_rate": 2.1226877070186782e-05, | |
| "loss": 0.068, | |
| "num_input_tokens_seen": 692670496, | |
| "step": 1840, | |
| "train_runtime": 168637.0439, | |
| "train_tokens_per_second": 4107.463 | |
| }, | |
| { | |
| "epoch": 1.7861551494614547, | |
| "grad_norm": 0.09065583044936652, | |
| "learning_rate": 2.108774768710215e-05, | |
| "loss": 0.0643, | |
| "num_input_tokens_seen": 694613552, | |
| "step": 1845, | |
| "train_runtime": 168980.2344, | |
| "train_tokens_per_second": 4110.62 | |
| }, | |
| { | |
| "epoch": 1.7909960062931138, | |
| "grad_norm": 0.09792211257284066, | |
| "learning_rate": 2.0948742403060838e-05, | |
| "loss": 0.0667, | |
| "num_input_tokens_seen": 696469216, | |
| "step": 1850, | |
| "train_runtime": 169280.592, | |
| "train_tokens_per_second": 4114.289 | |
| }, | |
| { | |
| "epoch": 1.795836863124773, | |
| "grad_norm": 0.09897529531743958, | |
| "learning_rate": 2.0809865627395705e-05, | |
| "loss": 0.0681, | |
| "num_input_tokens_seen": 698418624, | |
| "step": 1855, | |
| "train_runtime": 169653.3494, | |
| "train_tokens_per_second": 4116.739 | |
| }, | |
| { | |
| "epoch": 1.8006777199564323, | |
| "grad_norm": 0.09500857629505935, | |
| "learning_rate": 2.0671121765363288e-05, | |
| "loss": 0.0647, | |
| "num_input_tokens_seen": 700384480, | |
| "step": 1860, | |
| "train_runtime": 170043.8506, | |
| "train_tokens_per_second": 4118.846 | |
| }, | |
| { | |
| "epoch": 1.8055185767880915, | |
| "grad_norm": 0.10247249755147927, | |
| "learning_rate": 2.0532515218003985e-05, | |
| "loss": 0.0699, | |
| "num_input_tokens_seen": 702304704, | |
| "step": 1865, | |
| "train_runtime": 170404.8081, | |
| "train_tokens_per_second": 4121.39 | |
| }, | |
| { | |
| "epoch": 1.8103594336197508, | |
| "grad_norm": 0.09643799643651368, | |
| "learning_rate": 2.039405038200252e-05, | |
| "loss": 0.0691, | |
| "num_input_tokens_seen": 704195280, | |
| "step": 1870, | |
| "train_runtime": 170715.849, | |
| "train_tokens_per_second": 4124.955 | |
| }, | |
| { | |
| "epoch": 1.81520029045141, | |
| "grad_norm": 0.09857814992539433, | |
| "learning_rate": 2.0255731649548427e-05, | |
| "loss": 0.0656, | |
| "num_input_tokens_seen": 706059712, | |
| "step": 1875, | |
| "train_runtime": 171033.6245, | |
| "train_tokens_per_second": 4128.192 | |
| }, | |
| { | |
| "epoch": 1.820041147283069, | |
| "grad_norm": 0.09418823224621559, | |
| "learning_rate": 2.011756340819673e-05, | |
| "loss": 0.0649, | |
| "num_input_tokens_seen": 708049376, | |
| "step": 1880, | |
| "train_runtime": 171420.7357, | |
| "train_tokens_per_second": 4130.477 | |
| }, | |
| { | |
| "epoch": 1.8248820041147282, | |
| "grad_norm": 0.10339589373318703, | |
| "learning_rate": 1.9979550040728838e-05, | |
| "loss": 0.0677, | |
| "num_input_tokens_seen": 709957648, | |
| "step": 1885, | |
| "train_runtime": 171751.8988, | |
| "train_tokens_per_second": 4133.623 | |
| }, | |
| { | |
| "epoch": 1.8297228609463874, | |
| "grad_norm": 0.09532254902656445, | |
| "learning_rate": 1.9841695925013406e-05, | |
| "loss": 0.0666, | |
| "num_input_tokens_seen": 711775040, | |
| "step": 1890, | |
| "train_runtime": 172045.6813, | |
| "train_tokens_per_second": 4137.128 | |
| }, | |
| { | |
| "epoch": 1.8345637177780467, | |
| "grad_norm": 0.0880459196194006, | |
| "learning_rate": 1.9704005433867555e-05, | |
| "loss": 0.0652, | |
| "num_input_tokens_seen": 713667296, | |
| "step": 1895, | |
| "train_runtime": 172364.1363, | |
| "train_tokens_per_second": 4140.463 | |
| }, | |
| { | |
| "epoch": 1.839404574609706, | |
| "grad_norm": 0.08969514913899801, | |
| "learning_rate": 1.9566482934918135e-05, | |
| "loss": 0.0653, | |
| "num_input_tokens_seen": 715572784, | |
| "step": 1900, | |
| "train_runtime": 172680.2824, | |
| "train_tokens_per_second": 4143.917 | |
| }, | |
| { | |
| "epoch": 1.8442454314413652, | |
| "grad_norm": 0.0913565853305896, | |
| "learning_rate": 1.9429132790463172e-05, | |
| "loss": 0.0623, | |
| "num_input_tokens_seen": 717403184, | |
| "step": 1905, | |
| "train_runtime": 172952.4861, | |
| "train_tokens_per_second": 4147.978 | |
| }, | |
| { | |
| "epoch": 1.8490862882730243, | |
| "grad_norm": 0.07640087073283276, | |
| "learning_rate": 1.9291959357333495e-05, | |
| "loss": 0.0652, | |
| "num_input_tokens_seen": 719188672, | |
| "step": 1910, | |
| "train_runtime": 173218.7348, | |
| "train_tokens_per_second": 4151.91 | |
| }, | |
| { | |
| "epoch": 1.8539271451046835, | |
| "grad_norm": 0.08816630513451665, | |
| "learning_rate": 1.9154966986754568e-05, | |
| "loss": 0.0656, | |
| "num_input_tokens_seen": 721142368, | |
| "step": 1915, | |
| "train_runtime": 173569.3058, | |
| "train_tokens_per_second": 4154.781 | |
| }, | |
| { | |
| "epoch": 1.8587680019363426, | |
| "grad_norm": 0.09071657241594651, | |
| "learning_rate": 1.9018160024208406e-05, | |
| "loss": 0.0673, | |
| "num_input_tokens_seen": 722942560, | |
| "step": 1920, | |
| "train_runtime": 173839.4391, | |
| "train_tokens_per_second": 4158.68 | |
| }, | |
| { | |
| "epoch": 1.863608858768002, | |
| "grad_norm": 0.10455029456382117, | |
| "learning_rate": 1.8881542809295804e-05, | |
| "loss": 0.0659, | |
| "num_input_tokens_seen": 724852000, | |
| "step": 1925, | |
| "train_runtime": 174168.4044, | |
| "train_tokens_per_second": 4161.788 | |
| }, | |
| { | |
| "epoch": 1.868449715599661, | |
| "grad_norm": 0.08954830475655517, | |
| "learning_rate": 1.874511967559861e-05, | |
| "loss": 0.0635, | |
| "num_input_tokens_seen": 726715728, | |
| "step": 1930, | |
| "train_runtime": 174457.6742, | |
| "train_tokens_per_second": 4165.57 | |
| }, | |
| { | |
| "epoch": 1.8732905724313205, | |
| "grad_norm": 0.09323369420411323, | |
| "learning_rate": 1.860889495054231e-05, | |
| "loss": 0.0675, | |
| "num_input_tokens_seen": 728548704, | |
| "step": 1935, | |
| "train_runtime": 174741.255, | |
| "train_tokens_per_second": 4169.3 | |
| }, | |
| { | |
| "epoch": 1.8781314292629796, | |
| "grad_norm": 0.09793727462020756, | |
| "learning_rate": 1.8472872955258764e-05, | |
| "loss": 0.065, | |
| "num_input_tokens_seen": 730456800, | |
| "step": 1940, | |
| "train_runtime": 175068.9574, | |
| "train_tokens_per_second": 4172.395 | |
| }, | |
| { | |
| "epoch": 1.8829722860946387, | |
| "grad_norm": 0.09583230673463962, | |
| "learning_rate": 1.8337058004449087e-05, | |
| "loss": 0.066, | |
| "num_input_tokens_seen": 732296016, | |
| "step": 1945, | |
| "train_runtime": 175374.5228, | |
| "train_tokens_per_second": 4175.612 | |
| }, | |
| { | |
| "epoch": 1.8878131429262979, | |
| "grad_norm": 0.09835179058080876, | |
| "learning_rate": 1.8201454406246853e-05, | |
| "loss": 0.0651, | |
| "num_input_tokens_seen": 734259984, | |
| "step": 1950, | |
| "train_runtime": 175740.5312, | |
| "train_tokens_per_second": 4178.091 | |
| }, | |
| { | |
| "epoch": 1.892653999757957, | |
| "grad_norm": 0.09108637153342643, | |
| "learning_rate": 1.8066066462081365e-05, | |
| "loss": 0.0626, | |
| "num_input_tokens_seen": 736134608, | |
| "step": 1955, | |
| "train_runtime": 176053.0546, | |
| "train_tokens_per_second": 4181.323 | |
| }, | |
| { | |
| "epoch": 1.8974948565896164, | |
| "grad_norm": 0.0919055897190125, | |
| "learning_rate": 1.7930898466541278e-05, | |
| "loss": 0.0643, | |
| "num_input_tokens_seen": 737950480, | |
| "step": 1960, | |
| "train_runtime": 176349.4191, | |
| "train_tokens_per_second": 4184.593 | |
| }, | |
| { | |
| "epoch": 1.9023357134212757, | |
| "grad_norm": 0.090159941398429, | |
| "learning_rate": 1.779595470723831e-05, | |
| "loss": 0.0615, | |
| "num_input_tokens_seen": 739837152, | |
| "step": 1965, | |
| "train_runtime": 176648.1234, | |
| "train_tokens_per_second": 4188.197 | |
| }, | |
| { | |
| "epoch": 1.9071765702529349, | |
| "grad_norm": 0.09346294098357426, | |
| "learning_rate": 1.7661239464671307e-05, | |
| "loss": 0.0665, | |
| "num_input_tokens_seen": 741721280, | |
| "step": 1970, | |
| "train_runtime": 176965.8973, | |
| "train_tokens_per_second": 4191.323 | |
| }, | |
| { | |
| "epoch": 1.912017427084594, | |
| "grad_norm": 0.10241633630793719, | |
| "learning_rate": 1.7526757012090384e-05, | |
| "loss": 0.0651, | |
| "num_input_tokens_seen": 743570432, | |
| "step": 1975, | |
| "train_runtime": 177269.1725, | |
| "train_tokens_per_second": 4194.584 | |
| }, | |
| { | |
| "epoch": 1.9168582839162531, | |
| "grad_norm": 0.09173533340918434, | |
| "learning_rate": 1.7392511615361454e-05, | |
| "loss": 0.0651, | |
| "num_input_tokens_seen": 745332208, | |
| "step": 1980, | |
| "train_runtime": 177532.7109, | |
| "train_tokens_per_second": 4198.281 | |
| }, | |
| { | |
| "epoch": 1.9216991407479123, | |
| "grad_norm": 0.1008623059871893, | |
| "learning_rate": 1.7258507532830843e-05, | |
| "loss": 0.0648, | |
| "num_input_tokens_seen": 747317616, | |
| "step": 1985, | |
| "train_runtime": 177898.9302, | |
| "train_tokens_per_second": 4200.799 | |
| }, | |
| { | |
| "epoch": 1.9265399975795716, | |
| "grad_norm": 0.089928131700152, | |
| "learning_rate": 1.7124749015190245e-05, | |
| "loss": 0.0646, | |
| "num_input_tokens_seen": 749180048, | |
| "step": 1990, | |
| "train_runtime": 178198.867, | |
| "train_tokens_per_second": 4204.18 | |
| }, | |
| { | |
| "epoch": 1.9313808544112308, | |
| "grad_norm": 0.08994654227932986, | |
| "learning_rate": 1.699124030534191e-05, | |
| "loss": 0.0629, | |
| "num_input_tokens_seen": 751039536, | |
| "step": 1995, | |
| "train_runtime": 178494.6526, | |
| "train_tokens_per_second": 4207.63 | |
| }, | |
| { | |
| "epoch": 1.93622171124289, | |
| "grad_norm": 0.08906162646309361, | |
| "learning_rate": 1.6857985638263994e-05, | |
| "loss": 0.0669, | |
| "num_input_tokens_seen": 752911840, | |
| "step": 2000, | |
| "train_runtime": 178774.0756, | |
| "train_tokens_per_second": 4211.527 | |
| }, | |
| { | |
| "epoch": 1.9410625680745492, | |
| "grad_norm": 0.0871511140825133, | |
| "learning_rate": 1.6724989240876302e-05, | |
| "loss": 0.0633, | |
| "num_input_tokens_seen": 754784096, | |
| "step": 2005, | |
| "train_runtime": 179064.7161, | |
| "train_tokens_per_second": 4215.147 | |
| }, | |
| { | |
| "epoch": 1.9459034249062084, | |
| "grad_norm": 0.09886328312958423, | |
| "learning_rate": 1.6592255331906127e-05, | |
| "loss": 0.0631, | |
| "num_input_tokens_seen": 756649472, | |
| "step": 2010, | |
| "train_runtime": 179383.4531, | |
| "train_tokens_per_second": 4218.056 | |
| }, | |
| { | |
| "epoch": 1.9507442817378675, | |
| "grad_norm": 0.09426446935032833, | |
| "learning_rate": 1.64597881217545e-05, | |
| "loss": 0.0611, | |
| "num_input_tokens_seen": 758536336, | |
| "step": 2015, | |
| "train_runtime": 179682.0449, | |
| "train_tokens_per_second": 4221.548 | |
| }, | |
| { | |
| "epoch": 1.9555851385695266, | |
| "grad_norm": 0.09397155441601662, | |
| "learning_rate": 1.632759181236258e-05, | |
| "loss": 0.0653, | |
| "num_input_tokens_seen": 760377888, | |
| "step": 2020, | |
| "train_runtime": 179989.7183, | |
| "train_tokens_per_second": 4224.563 | |
| }, | |
| { | |
| "epoch": 1.960425995401186, | |
| "grad_norm": 0.10368768607789577, | |
| "learning_rate": 1.61956705970784e-05, | |
| "loss": 0.0674, | |
| "num_input_tokens_seen": 762278528, | |
| "step": 2025, | |
| "train_runtime": 180338.7056, | |
| "train_tokens_per_second": 4226.927 | |
| }, | |
| { | |
| "epoch": 1.9652668522328454, | |
| "grad_norm": 0.0931039697726731, | |
| "learning_rate": 1.606402866052382e-05, | |
| "loss": 0.0666, | |
| "num_input_tokens_seen": 764061840, | |
| "step": 2030, | |
| "train_runtime": 180601.1349, | |
| "train_tokens_per_second": 4230.659 | |
| }, | |
| { | |
| "epoch": 1.9701077090645045, | |
| "grad_norm": 0.09153549712253775, | |
| "learning_rate": 1.593267017846185e-05, | |
| "loss": 0.0652, | |
| "num_input_tokens_seen": 765994112, | |
| "step": 2035, | |
| "train_runtime": 180938.3007, | |
| "train_tokens_per_second": 4233.455 | |
| }, | |
| { | |
| "epoch": 1.9749485658961636, | |
| "grad_norm": 0.0910008764482439, | |
| "learning_rate": 1.5801599317664106e-05, | |
| "loss": 0.0655, | |
| "num_input_tokens_seen": 767872592, | |
| "step": 2040, | |
| "train_runtime": 181245.9015, | |
| "train_tokens_per_second": 4236.634 | |
| }, | |
| { | |
| "epoch": 1.9797894227278228, | |
| "grad_norm": 0.09525002435832676, | |
| "learning_rate": 1.567082023577869e-05, | |
| "loss": 0.0643, | |
| "num_input_tokens_seen": 769751856, | |
| "step": 2045, | |
| "train_runtime": 181570.7397, | |
| "train_tokens_per_second": 4239.405 | |
| }, | |
| { | |
| "epoch": 1.984630279559482, | |
| "grad_norm": 0.08882860457825476, | |
| "learning_rate": 1.554033708119832e-05, | |
| "loss": 0.0614, | |
| "num_input_tokens_seen": 771662720, | |
| "step": 2050, | |
| "train_runtime": 181876.0901, | |
| "train_tokens_per_second": 4242.794 | |
| }, | |
| { | |
| "epoch": 1.9894711363911413, | |
| "grad_norm": 0.09856363331292063, | |
| "learning_rate": 1.5410153992928685e-05, | |
| "loss": 0.0637, | |
| "num_input_tokens_seen": 773536048, | |
| "step": 2055, | |
| "train_runtime": 182203.6654, | |
| "train_tokens_per_second": 4245.447 | |
| }, | |
| { | |
| "epoch": 1.9943119932228004, | |
| "grad_norm": 0.09210117173688946, | |
| "learning_rate": 1.528027510045723e-05, | |
| "loss": 0.0647, | |
| "num_input_tokens_seen": 775448176, | |
| "step": 2060, | |
| "train_runtime": 182525.7774, | |
| "train_tokens_per_second": 4248.431 | |
| }, | |
| { | |
| "epoch": 1.9991528500544598, | |
| "grad_norm": 0.09915830664554921, | |
| "learning_rate": 1.5150704523622066e-05, | |
| "loss": 0.0633, | |
| "num_input_tokens_seen": 777334320, | |
| "step": 2065, | |
| "train_runtime": 182819.8586, | |
| "train_tokens_per_second": 4251.914 | |
| }, | |
| { | |
| "epoch": 2.0038726854653275, | |
| "grad_norm": 0.08510468955619002, | |
| "learning_rate": 1.5021446372481408e-05, | |
| "loss": 0.0456, | |
| "num_input_tokens_seen": 779269280, | |
| "step": 2070, | |
| "train_runtime": 183203.0122, | |
| "train_tokens_per_second": 4253.583 | |
| }, | |
| { | |
| "epoch": 2.0087135422969866, | |
| "grad_norm": 0.11048348864635747, | |
| "learning_rate": 1.4892504747183078e-05, | |
| "loss": 0.0469, | |
| "num_input_tokens_seen": 781110080, | |
| "step": 2075, | |
| "train_runtime": 183493.0172, | |
| "train_tokens_per_second": 4256.893 | |
| }, | |
| { | |
| "epoch": 2.0135543991286458, | |
| "grad_norm": 0.08898938959054886, | |
| "learning_rate": 1.4763883737834538e-05, | |
| "loss": 0.0424, | |
| "num_input_tokens_seen": 783014480, | |
| "step": 2080, | |
| "train_runtime": 183815.8349, | |
| "train_tokens_per_second": 4259.777 | |
| }, | |
| { | |
| "epoch": 2.018395255960305, | |
| "grad_norm": 0.0919658947768258, | |
| "learning_rate": 1.4635587424373104e-05, | |
| "loss": 0.0432, | |
| "num_input_tokens_seen": 784918160, | |
| "step": 2085, | |
| "train_runtime": 184106.3305, | |
| "train_tokens_per_second": 4263.396 | |
| }, | |
| { | |
| "epoch": 2.023236112791964, | |
| "grad_norm": 0.09232021098074011, | |
| "learning_rate": 1.4507619876436548e-05, | |
| "loss": 0.0417, | |
| "num_input_tokens_seen": 786925872, | |
| "step": 2090, | |
| "train_runtime": 184467.9736, | |
| "train_tokens_per_second": 4265.921 | |
| }, | |
| { | |
| "epoch": 2.0280769696236236, | |
| "grad_norm": 0.09272252103462124, | |
| "learning_rate": 1.4379985153233983e-05, | |
| "loss": 0.0427, | |
| "num_input_tokens_seen": 788883568, | |
| "step": 2095, | |
| "train_runtime": 184855.3063, | |
| "train_tokens_per_second": 4267.573 | |
| }, | |
| { | |
| "epoch": 2.0329178264552827, | |
| "grad_norm": 0.08716923303447359, | |
| "learning_rate": 1.4252687303417125e-05, | |
| "loss": 0.0447, | |
| "num_input_tokens_seen": 790834704, | |
| "step": 2100, | |
| "train_runtime": 185222.062, | |
| "train_tokens_per_second": 4269.657 | |
| }, | |
| { | |
| "epoch": 2.037758683286942, | |
| "grad_norm": 0.09674916113938142, | |
| "learning_rate": 1.4125730364951867e-05, | |
| "loss": 0.044, | |
| "num_input_tokens_seen": 792644976, | |
| "step": 2105, | |
| "train_runtime": 185490.7671, | |
| "train_tokens_per_second": 4273.231 | |
| }, | |
| { | |
| "epoch": 2.042599540118601, | |
| "grad_norm": 0.1018255197888474, | |
| "learning_rate": 1.3999118364990172e-05, | |
| "loss": 0.0432, | |
| "num_input_tokens_seen": 794521776, | |
| "step": 2110, | |
| "train_runtime": 185814.5824, | |
| "train_tokens_per_second": 4275.885 | |
| }, | |
| { | |
| "epoch": 2.04744039695026, | |
| "grad_norm": 0.10641267828109918, | |
| "learning_rate": 1.387285531974235e-05, | |
| "loss": 0.0465, | |
| "num_input_tokens_seen": 796324144, | |
| "step": 2115, | |
| "train_runtime": 186093.9032, | |
| "train_tokens_per_second": 4279.152 | |
| }, | |
| { | |
| "epoch": 2.0522812537819193, | |
| "grad_norm": 0.08890413404478513, | |
| "learning_rate": 1.3746945234349651e-05, | |
| "loss": 0.042, | |
| "num_input_tokens_seen": 798176336, | |
| "step": 2120, | |
| "train_runtime": 186381.3552, | |
| "train_tokens_per_second": 4282.49 | |
| }, | |
| { | |
| "epoch": 2.0571221106135784, | |
| "grad_norm": 0.1007973258825116, | |
| "learning_rate": 1.3621392102757236e-05, | |
| "loss": 0.0454, | |
| "num_input_tokens_seen": 799981648, | |
| "step": 2125, | |
| "train_runtime": 186688.6329, | |
| "train_tokens_per_second": 4285.112 | |
| }, | |
| { | |
| "epoch": 2.061962967445238, | |
| "grad_norm": 0.11828945277047198, | |
| "learning_rate": 1.3496199907587426e-05, | |
| "loss": 0.0447, | |
| "num_input_tokens_seen": 801919920, | |
| "step": 2130, | |
| "train_runtime": 187027.4012, | |
| "train_tokens_per_second": 4287.714 | |
| }, | |
| { | |
| "epoch": 2.066803824276897, | |
| "grad_norm": 0.09697782214954971, | |
| "learning_rate": 1.337137262001349e-05, | |
| "loss": 0.0422, | |
| "num_input_tokens_seen": 803830048, | |
| "step": 2135, | |
| "train_runtime": 187371.5567, | |
| "train_tokens_per_second": 4290.032 | |
| }, | |
| { | |
| "epoch": 2.0716446811085563, | |
| "grad_norm": 0.08971480039994223, | |
| "learning_rate": 1.324691419963352e-05, | |
| "loss": 0.0414, | |
| "num_input_tokens_seen": 805707856, | |
| "step": 2140, | |
| "train_runtime": 187658.5794, | |
| "train_tokens_per_second": 4293.477 | |
| }, | |
| { | |
| "epoch": 2.0764855379402154, | |
| "grad_norm": 0.09701860651683053, | |
| "learning_rate": 1.3122828594345007e-05, | |
| "loss": 0.0445, | |
| "num_input_tokens_seen": 807627408, | |
| "step": 2145, | |
| "train_runtime": 187990.6441, | |
| "train_tokens_per_second": 4296.104 | |
| }, | |
| { | |
| "epoch": 2.0813263947718745, | |
| "grad_norm": 0.10086672081682901, | |
| "learning_rate": 1.2999119740219434e-05, | |
| "loss": 0.0462, | |
| "num_input_tokens_seen": 809504384, | |
| "step": 2150, | |
| "train_runtime": 188275.3974, | |
| "train_tokens_per_second": 4299.576 | |
| }, | |
| { | |
| "epoch": 2.0861672516035337, | |
| "grad_norm": 0.10444978322400048, | |
| "learning_rate": 1.2875791561377557e-05, | |
| "loss": 0.0458, | |
| "num_input_tokens_seen": 811379696, | |
| "step": 2155, | |
| "train_runtime": 188599.6081, | |
| "train_tokens_per_second": 4302.128 | |
| }, | |
| { | |
| "epoch": 2.091008108435193, | |
| "grad_norm": 0.10161748986408596, | |
| "learning_rate": 1.2752847969864857e-05, | |
| "loss": 0.0425, | |
| "num_input_tokens_seen": 813165728, | |
| "step": 2160, | |
| "train_runtime": 188873.2381, | |
| "train_tokens_per_second": 4305.352 | |
| }, | |
| { | |
| "epoch": 2.0958489652668524, | |
| "grad_norm": 0.09934783137965864, | |
| "learning_rate": 1.2630292865527483e-05, | |
| "loss": 0.0425, | |
| "num_input_tokens_seen": 815080720, | |
| "step": 2165, | |
| "train_runtime": 189177.9808, | |
| "train_tokens_per_second": 4308.539 | |
| }, | |
| { | |
| "epoch": 2.1006898220985115, | |
| "grad_norm": 0.09905949770742634, | |
| "learning_rate": 1.2508130135888518e-05, | |
| "loss": 0.0425, | |
| "num_input_tokens_seen": 817073936, | |
| "step": 2170, | |
| "train_runtime": 189630.3128, | |
| "train_tokens_per_second": 4308.773 | |
| }, | |
| { | |
| "epoch": 2.1055306789301707, | |
| "grad_norm": 0.09067487917664219, | |
| "learning_rate": 1.2386363656024691e-05, | |
| "loss": 0.0415, | |
| "num_input_tokens_seen": 818989136, | |
| "step": 2175, | |
| "train_runtime": 189949.6186, | |
| "train_tokens_per_second": 4311.612 | |
| }, | |
| { | |
| "epoch": 2.11037153576183, | |
| "grad_norm": 0.10406493878646128, | |
| "learning_rate": 1.226499728844345e-05, | |
| "loss": 0.0425, | |
| "num_input_tokens_seen": 820972848, | |
| "step": 2180, | |
| "train_runtime": 190334.3589, | |
| "train_tokens_per_second": 4313.319 | |
| }, | |
| { | |
| "epoch": 2.115212392593489, | |
| "grad_norm": 0.08540798243716224, | |
| "learning_rate": 1.2144034882960392e-05, | |
| "loss": 0.0398, | |
| "num_input_tokens_seen": 822903904, | |
| "step": 2185, | |
| "train_runtime": 190672.7343, | |
| "train_tokens_per_second": 4315.792 | |
| }, | |
| { | |
| "epoch": 2.120053249425148, | |
| "grad_norm": 0.11741293089028593, | |
| "learning_rate": 1.2023480276577267e-05, | |
| "loss": 0.0473, | |
| "num_input_tokens_seen": 824814944, | |
| "step": 2190, | |
| "train_runtime": 191027.2891, | |
| "train_tokens_per_second": 4317.786 | |
| }, | |
| { | |
| "epoch": 2.1248941062568076, | |
| "grad_norm": 0.09836368383563045, | |
| "learning_rate": 1.1903337293360101e-05, | |
| "loss": 0.042, | |
| "num_input_tokens_seen": 826652352, | |
| "step": 2195, | |
| "train_runtime": 191295.2085, | |
| "train_tokens_per_second": 4321.344 | |
| }, | |
| { | |
| "epoch": 2.1297349630884668, | |
| "grad_norm": 0.1091691815964545, | |
| "learning_rate": 1.1783609744318057e-05, | |
| "loss": 0.0419, | |
| "num_input_tokens_seen": 828587888, | |
| "step": 2200, | |
| "train_runtime": 191647.5762, | |
| "train_tokens_per_second": 4323.498 | |
| }, | |
| { | |
| "epoch": 2.134575819920126, | |
| "grad_norm": 0.0943679758427896, | |
| "learning_rate": 1.1664301427282418e-05, | |
| "loss": 0.0449, | |
| "num_input_tokens_seen": 830391184, | |
| "step": 2205, | |
| "train_runtime": 191919.3897, | |
| "train_tokens_per_second": 4326.771 | |
| }, | |
| { | |
| "epoch": 2.139416676751785, | |
| "grad_norm": 0.10612562433762897, | |
| "learning_rate": 1.154541612678619e-05, | |
| "loss": 0.0476, | |
| "num_input_tokens_seen": 832282752, | |
| "step": 2210, | |
| "train_runtime": 192247.4177, | |
| "train_tokens_per_second": 4329.227 | |
| }, | |
| { | |
| "epoch": 2.144257533583444, | |
| "grad_norm": 0.09834530066415154, | |
| "learning_rate": 1.1426957613944039e-05, | |
| "loss": 0.0457, | |
| "num_input_tokens_seen": 834136992, | |
| "step": 2215, | |
| "train_runtime": 192568.1333, | |
| "train_tokens_per_second": 4331.646 | |
| }, | |
| { | |
| "epoch": 2.1490983904151033, | |
| "grad_norm": 0.11010939625022646, | |
| "learning_rate": 1.1308929646332658e-05, | |
| "loss": 0.0417, | |
| "num_input_tokens_seen": 836052320, | |
| "step": 2220, | |
| "train_runtime": 192890.3051, | |
| "train_tokens_per_second": 4334.341 | |
| }, | |
| { | |
| "epoch": 2.153939247246763, | |
| "grad_norm": 0.0989548927712895, | |
| "learning_rate": 1.1191335967871582e-05, | |
| "loss": 0.0439, | |
| "num_input_tokens_seen": 837885568, | |
| "step": 2225, | |
| "train_runtime": 193169.477, | |
| "train_tokens_per_second": 4337.567 | |
| }, | |
| { | |
| "epoch": 2.158780104078422, | |
| "grad_norm": 0.10316511914427559, | |
| "learning_rate": 1.1074180308704429e-05, | |
| "loss": 0.0448, | |
| "num_input_tokens_seen": 839786048, | |
| "step": 2230, | |
| "train_runtime": 193482.1859, | |
| "train_tokens_per_second": 4340.379 | |
| }, | |
| { | |
| "epoch": 2.163620960910081, | |
| "grad_norm": 0.11447985050959848, | |
| "learning_rate": 1.0957466385080578e-05, | |
| "loss": 0.0462, | |
| "num_input_tokens_seen": 841636432, | |
| "step": 2235, | |
| "train_runtime": 193776.8505, | |
| "train_tokens_per_second": 4343.328 | |
| }, | |
| { | |
| "epoch": 2.1684618177417403, | |
| "grad_norm": 0.09641942816445281, | |
| "learning_rate": 1.0841197899237282e-05, | |
| "loss": 0.0435, | |
| "num_input_tokens_seen": 843588608, | |
| "step": 2240, | |
| "train_runtime": 194131.362, | |
| "train_tokens_per_second": 4345.452 | |
| }, | |
| { | |
| "epoch": 2.1733026745733994, | |
| "grad_norm": 0.08526832186648046, | |
| "learning_rate": 1.0725378539282246e-05, | |
| "loss": 0.0435, | |
| "num_input_tokens_seen": 845476688, | |
| "step": 2245, | |
| "train_runtime": 194465.4517, | |
| "train_tokens_per_second": 4347.696 | |
| }, | |
| { | |
| "epoch": 2.1781435314050586, | |
| "grad_norm": 0.09389266894701039, | |
| "learning_rate": 1.061001197907659e-05, | |
| "loss": 0.0426, | |
| "num_input_tokens_seen": 847341792, | |
| "step": 2250, | |
| "train_runtime": 194755.117, | |
| "train_tokens_per_second": 4350.806 | |
| }, | |
| { | |
| "epoch": 2.1829843882367177, | |
| "grad_norm": 0.1091155029328961, | |
| "learning_rate": 1.0495101878118408e-05, | |
| "loss": 0.0438, | |
| "num_input_tokens_seen": 849140496, | |
| "step": 2255, | |
| "train_runtime": 195018.4821, | |
| "train_tokens_per_second": 4354.154 | |
| }, | |
| { | |
| "epoch": 2.1878252450683773, | |
| "grad_norm": 0.10639940814249475, | |
| "learning_rate": 1.0380651881426567e-05, | |
| "loss": 0.0421, | |
| "num_input_tokens_seen": 850942480, | |
| "step": 2260, | |
| "train_runtime": 195260.0607, | |
| "train_tokens_per_second": 4357.996 | |
| }, | |
| { | |
| "epoch": 2.1926661019000364, | |
| "grad_norm": 0.10716301854067384, | |
| "learning_rate": 1.0266665619425181e-05, | |
| "loss": 0.0441, | |
| "num_input_tokens_seen": 852750800, | |
| "step": 2265, | |
| "train_runtime": 195553.3537, | |
| "train_tokens_per_second": 4360.707 | |
| }, | |
| { | |
| "epoch": 2.1975069587316955, | |
| "grad_norm": 0.10343872393389002, | |
| "learning_rate": 1.0153146707828404e-05, | |
| "loss": 0.0427, | |
| "num_input_tokens_seen": 854703456, | |
| "step": 2270, | |
| "train_runtime": 195905.746, | |
| "train_tokens_per_second": 4362.83 | |
| }, | |
| { | |
| "epoch": 2.2023478155633547, | |
| "grad_norm": 0.09659246514397754, | |
| "learning_rate": 1.004009874752575e-05, | |
| "loss": 0.0438, | |
| "num_input_tokens_seen": 856635792, | |
| "step": 2275, | |
| "train_runtime": 196280.3262, | |
| "train_tokens_per_second": 4364.349 | |
| }, | |
| { | |
| "epoch": 2.207188672395014, | |
| "grad_norm": 0.10837299321798222, | |
| "learning_rate": 9.927525324467866e-06, | |
| "loss": 0.0435, | |
| "num_input_tokens_seen": 858544176, | |
| "step": 2280, | |
| "train_runtime": 196631.0128, | |
| "train_tokens_per_second": 4366.27 | |
| }, | |
| { | |
| "epoch": 2.212029529226673, | |
| "grad_norm": 0.0935449758044754, | |
| "learning_rate": 9.81543000955279e-06, | |
| "loss": 0.0412, | |
| "num_input_tokens_seen": 860409424, | |
| "step": 2285, | |
| "train_runtime": 196926.666, | |
| "train_tokens_per_second": 4369.187 | |
| }, | |
| { | |
| "epoch": 2.2168703860583325, | |
| "grad_norm": 0.08817931437627177, | |
| "learning_rate": 9.703816358512674e-06, | |
| "loss": 0.0415, | |
| "num_input_tokens_seen": 862320048, | |
| "step": 2290, | |
| "train_runtime": 197238.6013, | |
| "train_tokens_per_second": 4371.964 | |
| }, | |
| { | |
| "epoch": 2.2217112428899917, | |
| "grad_norm": 0.0980673558112737, | |
| "learning_rate": 9.592687911801001e-06, | |
| "loss": 0.0442, | |
| "num_input_tokens_seen": 864243216, | |
| "step": 2295, | |
| "train_runtime": 197594.875, | |
| "train_tokens_per_second": 4373.814 | |
| }, | |
| { | |
| "epoch": 2.226552099721651, | |
| "grad_norm": 0.10853728697423164, | |
| "learning_rate": 9.482048194480275e-06, | |
| "loss": 0.0425, | |
| "num_input_tokens_seen": 866168848, | |
| "step": 2300, | |
| "train_runtime": 197933.6478, | |
| "train_tokens_per_second": 4376.057 | |
| }, | |
| { | |
| "epoch": 2.23139295655331, | |
| "grad_norm": 0.11490821008247167, | |
| "learning_rate": 9.371900716110183e-06, | |
| "loss": 0.0431, | |
| "num_input_tokens_seen": 868041296, | |
| "step": 2305, | |
| "train_runtime": 198239.5933, | |
| "train_tokens_per_second": 4378.748 | |
| }, | |
| { | |
| "epoch": 2.236233813384969, | |
| "grad_norm": 0.08947053295422262, | |
| "learning_rate": 9.262248970636334e-06, | |
| "loss": 0.046, | |
| "num_input_tokens_seen": 869878384, | |
| "step": 2310, | |
| "train_runtime": 198532.6139, | |
| "train_tokens_per_second": 4381.539 | |
| }, | |
| { | |
| "epoch": 2.241074670216628, | |
| "grad_norm": 0.10485732709538129, | |
| "learning_rate": 9.153096436279327e-06, | |
| "loss": 0.042, | |
| "num_input_tokens_seen": 871688688, | |
| "step": 2315, | |
| "train_runtime": 198817.926, | |
| "train_tokens_per_second": 4384.357 | |
| }, | |
| { | |
| "epoch": 2.2459155270482873, | |
| "grad_norm": 0.11044109934649306, | |
| "learning_rate": 9.044446575424548e-06, | |
| "loss": 0.0431, | |
| "num_input_tokens_seen": 873728640, | |
| "step": 2320, | |
| "train_runtime": 199228.9285, | |
| "train_tokens_per_second": 4385.551 | |
| }, | |
| { | |
| "epoch": 2.250756383879947, | |
| "grad_norm": 0.09950408440238417, | |
| "learning_rate": 8.936302834512206e-06, | |
| "loss": 0.0439, | |
| "num_input_tokens_seen": 875601696, | |
| "step": 2325, | |
| "train_runtime": 199550.3994, | |
| "train_tokens_per_second": 4387.872 | |
| }, | |
| { | |
| "epoch": 2.255597240711606, | |
| "grad_norm": 0.09167335947382782, | |
| "learning_rate": 8.828668643928112e-06, | |
| "loss": 0.0419, | |
| "num_input_tokens_seen": 877559760, | |
| "step": 2330, | |
| "train_runtime": 199908.7951, | |
| "train_tokens_per_second": 4389.801 | |
| }, | |
| { | |
| "epoch": 2.260438097543265, | |
| "grad_norm": 0.10598202575372626, | |
| "learning_rate": 8.721547417894816e-06, | |
| "loss": 0.0432, | |
| "num_input_tokens_seen": 879392576, | |
| "step": 2335, | |
| "train_runtime": 200216.2117, | |
| "train_tokens_per_second": 4392.215 | |
| }, | |
| { | |
| "epoch": 2.2652789543749243, | |
| "grad_norm": 0.10543835687376972, | |
| "learning_rate": 8.614942554363326e-06, | |
| "loss": 0.0453, | |
| "num_input_tokens_seen": 881180800, | |
| "step": 2340, | |
| "train_runtime": 200501.3334, | |
| "train_tokens_per_second": 4394.887 | |
| }, | |
| { | |
| "epoch": 2.2701198112065835, | |
| "grad_norm": 0.10496418814685284, | |
| "learning_rate": 8.508857434905304e-06, | |
| "loss": 0.0423, | |
| "num_input_tokens_seen": 883048928, | |
| "step": 2345, | |
| "train_runtime": 200822.2237, | |
| "train_tokens_per_second": 4397.167 | |
| }, | |
| { | |
| "epoch": 2.2749606680382426, | |
| "grad_norm": 0.09577382338976835, | |
| "learning_rate": 8.40329542460582e-06, | |
| "loss": 0.0424, | |
| "num_input_tokens_seen": 885000208, | |
| "step": 2350, | |
| "train_runtime": 201157.0354, | |
| "train_tokens_per_second": 4399.549 | |
| }, | |
| { | |
| "epoch": 2.279801524869902, | |
| "grad_norm": 0.10452053840305, | |
| "learning_rate": 8.298259871956596e-06, | |
| "loss": 0.0432, | |
| "num_input_tokens_seen": 886939040, | |
| "step": 2355, | |
| "train_runtime": 201495.1483, | |
| "train_tokens_per_second": 4401.789 | |
| }, | |
| { | |
| "epoch": 2.2846423817015613, | |
| "grad_norm": 0.08543950174023542, | |
| "learning_rate": 8.193754108749793e-06, | |
| "loss": 0.0408, | |
| "num_input_tokens_seen": 888957616, | |
| "step": 2360, | |
| "train_runtime": 201854.2624, | |
| "train_tokens_per_second": 4403.958 | |
| }, | |
| { | |
| "epoch": 2.2894832385332204, | |
| "grad_norm": 0.10273943575259807, | |
| "learning_rate": 8.08978144997234e-06, | |
| "loss": 0.0427, | |
| "num_input_tokens_seen": 890755008, | |
| "step": 2365, | |
| "train_runtime": 202132.5142, | |
| "train_tokens_per_second": 4406.787 | |
| }, | |
| { | |
| "epoch": 2.2943240953648796, | |
| "grad_norm": 0.10320774278040343, | |
| "learning_rate": 7.986345193700726e-06, | |
| "loss": 0.0414, | |
| "num_input_tokens_seen": 892675136, | |
| "step": 2370, | |
| "train_runtime": 202472.4309, | |
| "train_tokens_per_second": 4408.873 | |
| }, | |
| { | |
| "epoch": 2.2991649521965387, | |
| "grad_norm": 0.10693768647967408, | |
| "learning_rate": 7.883448620996489e-06, | |
| "loss": 0.0427, | |
| "num_input_tokens_seen": 894455920, | |
| "step": 2375, | |
| "train_runtime": 202743.8126, | |
| "train_tokens_per_second": 4411.754 | |
| }, | |
| { | |
| "epoch": 2.304005809028198, | |
| "grad_norm": 0.10326302338915108, | |
| "learning_rate": 7.781094995802007e-06, | |
| "loss": 0.0485, | |
| "num_input_tokens_seen": 896339664, | |
| "step": 2380, | |
| "train_runtime": 203093.6325, | |
| "train_tokens_per_second": 4413.431 | |
| }, | |
| { | |
| "epoch": 2.308846665859857, | |
| "grad_norm": 0.1056092406773901, | |
| "learning_rate": 7.679287564837074e-06, | |
| "loss": 0.0417, | |
| "num_input_tokens_seen": 898166624, | |
| "step": 2385, | |
| "train_runtime": 203393.0523, | |
| "train_tokens_per_second": 4415.916 | |
| }, | |
| { | |
| "epoch": 2.3136875226915166, | |
| "grad_norm": 0.10998323658142867, | |
| "learning_rate": 7.57802955749585e-06, | |
| "loss": 0.0408, | |
| "num_input_tokens_seen": 900181312, | |
| "step": 2390, | |
| "train_runtime": 203758.3229, | |
| "train_tokens_per_second": 4417.887 | |
| }, | |
| { | |
| "epoch": 2.3185283795231757, | |
| "grad_norm": 0.1005967471563839, | |
| "learning_rate": 7.4773241857444415e-06, | |
| "loss": 0.0423, | |
| "num_input_tokens_seen": 902067280, | |
| "step": 2395, | |
| "train_runtime": 204090.2987, | |
| "train_tokens_per_second": 4419.942 | |
| }, | |
| { | |
| "epoch": 2.323369236354835, | |
| "grad_norm": 0.09602599680101273, | |
| "learning_rate": 7.3771746440190195e-06, | |
| "loss": 0.0417, | |
| "num_input_tokens_seen": 903961952, | |
| "step": 2400, | |
| "train_runtime": 204390.7435, | |
| "train_tokens_per_second": 4422.715 | |
| }, | |
| { | |
| "epoch": 2.328210093186494, | |
| "grad_norm": 0.10546331216246121, | |
| "learning_rate": 7.277584109124483e-06, | |
| "loss": 0.0439, | |
| "num_input_tokens_seen": 905764752, | |
| "step": 2405, | |
| "train_runtime": 204689.5804, | |
| "train_tokens_per_second": 4425.065 | |
| }, | |
| { | |
| "epoch": 2.333050950018153, | |
| "grad_norm": 0.10566890437995724, | |
| "learning_rate": 7.178555740133694e-06, | |
| "loss": 0.0437, | |
| "num_input_tokens_seen": 907639040, | |
| "step": 2410, | |
| "train_runtime": 205007.1634, | |
| "train_tokens_per_second": 4427.353 | |
| }, | |
| { | |
| "epoch": 2.3378918068498122, | |
| "grad_norm": 0.1078878711996976, | |
| "learning_rate": 7.080092678287262e-06, | |
| "loss": 0.0429, | |
| "num_input_tokens_seen": 909548736, | |
| "step": 2415, | |
| "train_runtime": 205363.2901, | |
| "train_tokens_per_second": 4428.974 | |
| }, | |
| { | |
| "epoch": 2.3427326636814714, | |
| "grad_norm": 0.10362678547369009, | |
| "learning_rate": 6.982198046893912e-06, | |
| "loss": 0.0403, | |
| "num_input_tokens_seen": 911369184, | |
| "step": 2420, | |
| "train_runtime": 205601.0346, | |
| "train_tokens_per_second": 4432.707 | |
| }, | |
| { | |
| "epoch": 2.347573520513131, | |
| "grad_norm": 0.1116147652899066, | |
| "learning_rate": 6.884874951231407e-06, | |
| "loss": 0.0444, | |
| "num_input_tokens_seen": 913234896, | |
| "step": 2425, | |
| "train_runtime": 205896.2768, | |
| "train_tokens_per_second": 4435.412 | |
| }, | |
| { | |
| "epoch": 2.35241437734479, | |
| "grad_norm": 0.11521220794776303, | |
| "learning_rate": 6.788126478448046e-06, | |
| "loss": 0.045, | |
| "num_input_tokens_seen": 915148176, | |
| "step": 2430, | |
| "train_runtime": 206245.7823, | |
| "train_tokens_per_second": 4437.173 | |
| }, | |
| { | |
| "epoch": 2.357255234176449, | |
| "grad_norm": 0.09281196214081752, | |
| "learning_rate": 6.691955697464722e-06, | |
| "loss": 0.0438, | |
| "num_input_tokens_seen": 917092976, | |
| "step": 2435, | |
| "train_runtime": 206645.7743, | |
| "train_tokens_per_second": 4437.995 | |
| }, | |
| { | |
| "epoch": 2.3620960910081084, | |
| "grad_norm": 0.100214030668592, | |
| "learning_rate": 6.596365658877612e-06, | |
| "loss": 0.0424, | |
| "num_input_tokens_seen": 919066720, | |
| "step": 2440, | |
| "train_runtime": 207017.7612, | |
| "train_tokens_per_second": 4439.555 | |
| }, | |
| { | |
| "epoch": 2.3669369478397675, | |
| "grad_norm": 0.10760036279429956, | |
| "learning_rate": 6.501359394861378e-06, | |
| "loss": 0.0417, | |
| "num_input_tokens_seen": 920862256, | |
| "step": 2445, | |
| "train_runtime": 207284.7764, | |
| "train_tokens_per_second": 4442.498 | |
| }, | |
| { | |
| "epoch": 2.371777804671427, | |
| "grad_norm": 0.10509350266248424, | |
| "learning_rate": 6.4069399190729965e-06, | |
| "loss": 0.045, | |
| "num_input_tokens_seen": 922629344, | |
| "step": 2450, | |
| "train_runtime": 207527.687, | |
| "train_tokens_per_second": 4445.813 | |
| }, | |
| { | |
| "epoch": 2.376618661503086, | |
| "grad_norm": 0.10714050286215131, | |
| "learning_rate": 6.3131102265561595e-06, | |
| "loss": 0.0428, | |
| "num_input_tokens_seen": 924459968, | |
| "step": 2455, | |
| "train_runtime": 207811.3596, | |
| "train_tokens_per_second": 4448.554 | |
| }, | |
| { | |
| "epoch": 2.3814595183347453, | |
| "grad_norm": 0.09927356415259088, | |
| "learning_rate": 6.219873293646275e-06, | |
| "loss": 0.0434, | |
| "num_input_tokens_seen": 926282784, | |
| "step": 2460, | |
| "train_runtime": 208098.2228, | |
| "train_tokens_per_second": 4451.181 | |
| }, | |
| { | |
| "epoch": 2.3863003751664045, | |
| "grad_norm": 0.10624426038475755, | |
| "learning_rate": 6.127232077876044e-06, | |
| "loss": 0.0423, | |
| "num_input_tokens_seen": 928104928, | |
| "step": 2465, | |
| "train_runtime": 208396.5232, | |
| "train_tokens_per_second": 4453.553 | |
| }, | |
| { | |
| "epoch": 2.3911412319980636, | |
| "grad_norm": 0.10065956898085184, | |
| "learning_rate": 6.0351895178816675e-06, | |
| "loss": 0.0414, | |
| "num_input_tokens_seen": 930034976, | |
| "step": 2470, | |
| "train_runtime": 208753.2157, | |
| "train_tokens_per_second": 4455.189 | |
| }, | |
| { | |
| "epoch": 2.3959820888297227, | |
| "grad_norm": 0.08919790357784825, | |
| "learning_rate": 5.943748533309609e-06, | |
| "loss": 0.0403, | |
| "num_input_tokens_seen": 931869680, | |
| "step": 2475, | |
| "train_runtime": 209030.5157, | |
| "train_tokens_per_second": 4458.056 | |
| }, | |
| { | |
| "epoch": 2.400822945661382, | |
| "grad_norm": 0.09926050843218681, | |
| "learning_rate": 5.8529120247239945e-06, | |
| "loss": 0.0426, | |
| "num_input_tokens_seen": 933822272, | |
| "step": 2480, | |
| "train_runtime": 209411.1645, | |
| "train_tokens_per_second": 4459.276 | |
| }, | |
| { | |
| "epoch": 2.4056638024930415, | |
| "grad_norm": 0.09286276749293895, | |
| "learning_rate": 5.762682873514605e-06, | |
| "loss": 0.041, | |
| "num_input_tokens_seen": 935677664, | |
| "step": 2485, | |
| "train_runtime": 209681.396, | |
| "train_tokens_per_second": 4462.378 | |
| }, | |
| { | |
| "epoch": 2.4105046593247006, | |
| "grad_norm": 0.11296254298586997, | |
| "learning_rate": 5.673063941805451e-06, | |
| "loss": 0.0445, | |
| "num_input_tokens_seen": 937557696, | |
| "step": 2490, | |
| "train_runtime": 209997.2312, | |
| "train_tokens_per_second": 4464.619 | |
| }, | |
| { | |
| "epoch": 2.4153455161563597, | |
| "grad_norm": 0.08754210776797587, | |
| "learning_rate": 5.584058072364032e-06, | |
| "loss": 0.0403, | |
| "num_input_tokens_seen": 939447456, | |
| "step": 2495, | |
| "train_runtime": 210255.8593, | |
| "train_tokens_per_second": 4468.115 | |
| }, | |
| { | |
| "epoch": 2.420186372988019, | |
| "grad_norm": 0.10698743548503842, | |
| "learning_rate": 5.495668088511128e-06, | |
| "loss": 0.0413, | |
| "num_input_tokens_seen": 941372928, | |
| "step": 2500, | |
| "train_runtime": 210635.05, | |
| "train_tokens_per_second": 4469.213 | |
| }, | |
| { | |
| "epoch": 2.425027229819678, | |
| "grad_norm": 0.11191046124395111, | |
| "learning_rate": 5.407896794031245e-06, | |
| "loss": 0.0435, | |
| "num_input_tokens_seen": 943222032, | |
| "step": 2505, | |
| "train_runtime": 210947.9822, | |
| "train_tokens_per_second": 4471.349 | |
| }, | |
| { | |
| "epoch": 2.429868086651337, | |
| "grad_norm": 0.11147955526705726, | |
| "learning_rate": 5.3207469730836875e-06, | |
| "loss": 0.0424, | |
| "num_input_tokens_seen": 945141472, | |
| "step": 2510, | |
| "train_runtime": 211288.3155, | |
| "train_tokens_per_second": 4473.231 | |
| }, | |
| { | |
| "epoch": 2.4347089434829963, | |
| "grad_norm": 0.11269801669995572, | |
| "learning_rate": 5.2342213901142415e-06, | |
| "loss": 0.042, | |
| "num_input_tokens_seen": 946942384, | |
| "step": 2515, | |
| "train_runtime": 211559.8316, | |
| "train_tokens_per_second": 4476.003 | |
| }, | |
| { | |
| "epoch": 2.439549800314656, | |
| "grad_norm": 0.10414158057001394, | |
| "learning_rate": 5.1483227897674736e-06, | |
| "loss": 0.0424, | |
| "num_input_tokens_seen": 948797600, | |
| "step": 2520, | |
| "train_runtime": 211859.0494, | |
| "train_tokens_per_second": 4478.438 | |
| }, | |
| { | |
| "epoch": 2.444390657146315, | |
| "grad_norm": 0.10225241274686434, | |
| "learning_rate": 5.0630538967996824e-06, | |
| "loss": 0.044, | |
| "num_input_tokens_seen": 950738128, | |
| "step": 2525, | |
| "train_runtime": 212239.9612, | |
| "train_tokens_per_second": 4479.543 | |
| }, | |
| { | |
| "epoch": 2.449231513977974, | |
| "grad_norm": 0.09486493161848354, | |
| "learning_rate": 4.978417415992459e-06, | |
| "loss": 0.0414, | |
| "num_input_tokens_seen": 952683104, | |
| "step": 2530, | |
| "train_runtime": 212560.4677, | |
| "train_tokens_per_second": 4481.939 | |
| }, | |
| { | |
| "epoch": 2.4540723708096333, | |
| "grad_norm": 0.10225202491694556, | |
| "learning_rate": 4.8944160320668925e-06, | |
| "loss": 0.044, | |
| "num_input_tokens_seen": 954565248, | |
| "step": 2535, | |
| "train_runtime": 212864.2481, | |
| "train_tokens_per_second": 4484.385 | |
| }, | |
| { | |
| "epoch": 2.4589132276412924, | |
| "grad_norm": 0.10555017743814563, | |
| "learning_rate": 4.811052409598415e-06, | |
| "loss": 0.0435, | |
| "num_input_tokens_seen": 956458752, | |
| "step": 2540, | |
| "train_runtime": 213206.0267, | |
| "train_tokens_per_second": 4486.077 | |
| }, | |
| { | |
| "epoch": 2.4637540844729515, | |
| "grad_norm": 0.10832795415710876, | |
| "learning_rate": 4.728329192932277e-06, | |
| "loss": 0.0434, | |
| "num_input_tokens_seen": 958360512, | |
| "step": 2545, | |
| "train_runtime": 213541.3209, | |
| "train_tokens_per_second": 4487.939 | |
| }, | |
| { | |
| "epoch": 2.468594941304611, | |
| "grad_norm": 0.10095120179330958, | |
| "learning_rate": 4.64624900609964e-06, | |
| "loss": 0.0433, | |
| "num_input_tokens_seen": 960237872, | |
| "step": 2550, | |
| "train_runtime": 213826.9979, | |
| "train_tokens_per_second": 4490.723 | |
| }, | |
| { | |
| "epoch": 2.4734357981362702, | |
| "grad_norm": 0.10772165766963647, | |
| "learning_rate": 4.56481445273439e-06, | |
| "loss": 0.0413, | |
| "num_input_tokens_seen": 962247008, | |
| "step": 2555, | |
| "train_runtime": 214215.3446, | |
| "train_tokens_per_second": 4491.961 | |
| }, | |
| { | |
| "epoch": 2.4782766549679294, | |
| "grad_norm": 0.09806092608971834, | |
| "learning_rate": 4.4840281159905155e-06, | |
| "loss": 0.0414, | |
| "num_input_tokens_seen": 964170272, | |
| "step": 2560, | |
| "train_runtime": 214550.1729, | |
| "train_tokens_per_second": 4493.915 | |
| }, | |
| { | |
| "epoch": 2.4831175117995885, | |
| "grad_norm": 0.09220012235718683, | |
| "learning_rate": 4.403892558460177e-06, | |
| "loss": 0.0412, | |
| "num_input_tokens_seen": 966120000, | |
| "step": 2565, | |
| "train_runtime": 214929.4897, | |
| "train_tokens_per_second": 4495.056 | |
| }, | |
| { | |
| "epoch": 2.4879583686312476, | |
| "grad_norm": 0.09514831510019058, | |
| "learning_rate": 4.324410322092415e-06, | |
| "loss": 0.0412, | |
| "num_input_tokens_seen": 967960784, | |
| "step": 2570, | |
| "train_runtime": 215212.0348, | |
| "train_tokens_per_second": 4497.708 | |
| }, | |
| { | |
| "epoch": 2.4927992254629068, | |
| "grad_norm": 0.08465241248211482, | |
| "learning_rate": 4.2455839281125256e-06, | |
| "loss": 0.0417, | |
| "num_input_tokens_seen": 969844080, | |
| "step": 2575, | |
| "train_runtime": 215514.8865, | |
| "train_tokens_per_second": 4500.126 | |
| }, | |
| { | |
| "epoch": 2.4976400822945664, | |
| "grad_norm": 0.09900416312304733, | |
| "learning_rate": 4.167415876942085e-06, | |
| "loss": 0.0417, | |
| "num_input_tokens_seen": 971694160, | |
| "step": 2580, | |
| "train_runtime": 215818.3338, | |
| "train_tokens_per_second": 4502.371 | |
| }, | |
| { | |
| "epoch": 2.5024809391262255, | |
| "grad_norm": 0.09456247404799463, | |
| "learning_rate": 4.089908648119625e-06, | |
| "loss": 0.0425, | |
| "num_input_tokens_seen": 973545824, | |
| "step": 2585, | |
| "train_runtime": 216108.9629, | |
| "train_tokens_per_second": 4504.884 | |
| }, | |
| { | |
| "epoch": 2.5073217959578846, | |
| "grad_norm": 0.1006524579480961, | |
| "learning_rate": 4.013064700221991e-06, | |
| "loss": 0.0411, | |
| "num_input_tokens_seen": 975457776, | |
| "step": 2590, | |
| "train_runtime": 216449.4006, | |
| "train_tokens_per_second": 4506.632 | |
| }, | |
| { | |
| "epoch": 2.5121626527895438, | |
| "grad_norm": 0.09861997049355122, | |
| "learning_rate": 3.936886470786347e-06, | |
| "loss": 0.0405, | |
| "num_input_tokens_seen": 977366416, | |
| "step": 2595, | |
| "train_runtime": 216770.5711, | |
| "train_tokens_per_second": 4508.76 | |
| }, | |
| { | |
| "epoch": 2.517003509621203, | |
| "grad_norm": 0.09885870437167427, | |
| "learning_rate": 3.8613763762328695e-06, | |
| "loss": 0.0432, | |
| "num_input_tokens_seen": 979260288, | |
| "step": 2600, | |
| "train_runtime": 217072.79, | |
| "train_tokens_per_second": 4511.207 | |
| }, | |
| { | |
| "epoch": 2.521844366452862, | |
| "grad_norm": 0.1044111303405238, | |
| "learning_rate": 3.7865368117880688e-06, | |
| "loss": 0.0441, | |
| "num_input_tokens_seen": 981164192, | |
| "step": 2605, | |
| "train_runtime": 217397.5953, | |
| "train_tokens_per_second": 4513.225 | |
| }, | |
| { | |
| "epoch": 2.526685223284521, | |
| "grad_norm": 0.09689580727353043, | |
| "learning_rate": 3.712370151408842e-06, | |
| "loss": 0.041, | |
| "num_input_tokens_seen": 983019680, | |
| "step": 2610, | |
| "train_runtime": 217701.7754, | |
| "train_tokens_per_second": 4515.442 | |
| }, | |
| { | |
| "epoch": 2.5315260801161807, | |
| "grad_norm": 0.07930860025277806, | |
| "learning_rate": 3.6388787477071557e-06, | |
| "loss": 0.0382, | |
| "num_input_tokens_seen": 984946480, | |
| "step": 2615, | |
| "train_runtime": 218039.1448, | |
| "train_tokens_per_second": 4517.292 | |
| }, | |
| { | |
| "epoch": 2.53636693694784, | |
| "grad_norm": 0.09701585395532773, | |
| "learning_rate": 3.5660649318754153e-06, | |
| "loss": 0.0431, | |
| "num_input_tokens_seen": 986820704, | |
| "step": 2620, | |
| "train_runtime": 218364.0414, | |
| "train_tokens_per_second": 4519.154 | |
| }, | |
| { | |
| "epoch": 2.541207793779499, | |
| "grad_norm": 0.08951915315467092, | |
| "learning_rate": 3.493931013612528e-06, | |
| "loss": 0.0404, | |
| "num_input_tokens_seen": 988683712, | |
| "step": 2625, | |
| "train_runtime": 218662.1125, | |
| "train_tokens_per_second": 4521.514 | |
| }, | |
| { | |
| "epoch": 2.546048650611158, | |
| "grad_norm": 0.10946918657325026, | |
| "learning_rate": 3.4224792810506335e-06, | |
| "loss": 0.0412, | |
| "num_input_tokens_seen": 990613776, | |
| "step": 2630, | |
| "train_runtime": 219031.7427, | |
| "train_tokens_per_second": 4522.695 | |
| }, | |
| { | |
| "epoch": 2.5508895074428173, | |
| "grad_norm": 0.09243649296411362, | |
| "learning_rate": 3.3517120006825137e-06, | |
| "loss": 0.0421, | |
| "num_input_tokens_seen": 992516752, | |
| "step": 2635, | |
| "train_runtime": 219368.5843, | |
| "train_tokens_per_second": 4524.425 | |
| }, | |
| { | |
| "epoch": 2.5557303642744764, | |
| "grad_norm": 0.11072560597436613, | |
| "learning_rate": 3.2816314172897183e-06, | |
| "loss": 0.0433, | |
| "num_input_tokens_seen": 994379968, | |
| "step": 2640, | |
| "train_runtime": 219685.0843, | |
| "train_tokens_per_second": 4526.388 | |
| }, | |
| { | |
| "epoch": 2.5605712211061356, | |
| "grad_norm": 0.1058803495743547, | |
| "learning_rate": 3.2122397538713432e-06, | |
| "loss": 0.0413, | |
| "num_input_tokens_seen": 996372960, | |
| "step": 2645, | |
| "train_runtime": 220112.659, | |
| "train_tokens_per_second": 4526.65 | |
| }, | |
| { | |
| "epoch": 2.565412077937795, | |
| "grad_norm": 0.09586921377974332, | |
| "learning_rate": 3.143539211573518e-06, | |
| "loss": 0.0445, | |
| "num_input_tokens_seen": 998147504, | |
| "step": 2650, | |
| "train_runtime": 220369.4226, | |
| "train_tokens_per_second": 4529.428 | |
| }, | |
| { | |
| "epoch": 2.5702529347694543, | |
| "grad_norm": 0.100735105625526, | |
| "learning_rate": 3.075531969619594e-06, | |
| "loss": 0.0402, | |
| "num_input_tokens_seen": 1000010032, | |
| "step": 2655, | |
| "train_runtime": 220683.995, | |
| "train_tokens_per_second": 4531.412 | |
| }, | |
| { | |
| "epoch": 2.5750937916011134, | |
| "grad_norm": 0.08157917824449015, | |
| "learning_rate": 3.00822018524099e-06, | |
| "loss": 0.0394, | |
| "num_input_tokens_seen": 1001911504, | |
| "step": 2660, | |
| "train_runtime": 220992.259, | |
| "train_tokens_per_second": 4533.695 | |
| }, | |
| { | |
| "epoch": 2.5799346484327725, | |
| "grad_norm": 0.10671166825123399, | |
| "learning_rate": 2.9416059936088176e-06, | |
| "loss": 0.0405, | |
| "num_input_tokens_seen": 1003915536, | |
| "step": 2665, | |
| "train_runtime": 221370.367, | |
| "train_tokens_per_second": 4535.004 | |
| }, | |
| { | |
| "epoch": 2.5847755052644317, | |
| "grad_norm": 0.10485178372092739, | |
| "learning_rate": 2.8756915077660816e-06, | |
| "loss": 0.042, | |
| "num_input_tokens_seen": 1005786320, | |
| "step": 2670, | |
| "train_runtime": 221680.2811, | |
| "train_tokens_per_second": 4537.103 | |
| }, | |
| { | |
| "epoch": 2.5896163620960913, | |
| "grad_norm": 0.11239886407003709, | |
| "learning_rate": 2.810478818560719e-06, | |
| "loss": 0.046, | |
| "num_input_tokens_seen": 1007566272, | |
| "step": 2675, | |
| "train_runtime": 221965.9319, | |
| "train_tokens_per_second": 4539.283 | |
| }, | |
| { | |
| "epoch": 2.59445721892775, | |
| "grad_norm": 0.10703312505536274, | |
| "learning_rate": 2.745969994579231e-06, | |
| "loss": 0.0429, | |
| "num_input_tokens_seen": 1009450528, | |
| "step": 2680, | |
| "train_runtime": 222294.4882, | |
| "train_tokens_per_second": 4541.051 | |
| }, | |
| { | |
| "epoch": 2.5992980757594095, | |
| "grad_norm": 0.09294201066454781, | |
| "learning_rate": 2.6821670820810883e-06, | |
| "loss": 0.0413, | |
| "num_input_tokens_seen": 1011300880, | |
| "step": 2685, | |
| "train_runtime": 222609.6014, | |
| "train_tokens_per_second": 4542.935 | |
| }, | |
| { | |
| "epoch": 2.6041389325910687, | |
| "grad_norm": 0.09973495575855759, | |
| "learning_rate": 2.6190721049338145e-06, | |
| "loss": 0.0406, | |
| "num_input_tokens_seen": 1013140688, | |
| "step": 2690, | |
| "train_runtime": 222887.9, | |
| "train_tokens_per_second": 4545.517 | |
| }, | |
| { | |
| "epoch": 2.608979789422728, | |
| "grad_norm": 0.100229670298609, | |
| "learning_rate": 2.5566870645487906e-06, | |
| "loss": 0.0423, | |
| "num_input_tokens_seen": 1015019296, | |
| "step": 2695, | |
| "train_runtime": 223209.5025, | |
| "train_tokens_per_second": 4547.384 | |
| }, | |
| { | |
| "epoch": 2.613820646254387, | |
| "grad_norm": 0.08644793584438773, | |
| "learning_rate": 2.495013939817767e-06, | |
| "loss": 0.0395, | |
| "num_input_tokens_seen": 1016892176, | |
| "step": 2700, | |
| "train_runtime": 223525.4721, | |
| "train_tokens_per_second": 4549.335 | |
| }, | |
| { | |
| "epoch": 2.618661503086046, | |
| "grad_norm": 0.09650950829215955, | |
| "learning_rate": 2.434054687050091e-06, | |
| "loss": 0.0405, | |
| "num_input_tokens_seen": 1018797600, | |
| "step": 2705, | |
| "train_runtime": 223827.1085, | |
| "train_tokens_per_second": 4551.717 | |
| }, | |
| { | |
| "epoch": 2.6235023599177056, | |
| "grad_norm": 0.12423581499512551, | |
| "learning_rate": 2.3738112399106644e-06, | |
| "loss": 0.0454, | |
| "num_input_tokens_seen": 1020718416, | |
| "step": 2710, | |
| "train_runtime": 224190.0696, | |
| "train_tokens_per_second": 4552.915 | |
| }, | |
| { | |
| "epoch": 2.6283432167493648, | |
| "grad_norm": 0.10989337129875472, | |
| "learning_rate": 2.314285509358566e-06, | |
| "loss": 0.0428, | |
| "num_input_tokens_seen": 1022524336, | |
| "step": 2715, | |
| "train_runtime": 224455.0174, | |
| "train_tokens_per_second": 4555.587 | |
| }, | |
| { | |
| "epoch": 2.633184073581024, | |
| "grad_norm": 0.09039073713194058, | |
| "learning_rate": 2.255479383586509e-06, | |
| "loss": 0.0406, | |
| "num_input_tokens_seen": 1024388192, | |
| "step": 2720, | |
| "train_runtime": 224743.9654, | |
| "train_tokens_per_second": 4558.023 | |
| }, | |
| { | |
| "epoch": 2.638024930412683, | |
| "grad_norm": 0.08953330757222618, | |
| "learning_rate": 2.197394727960861e-06, | |
| "loss": 0.0408, | |
| "num_input_tokens_seen": 1026219264, | |
| "step": 2725, | |
| "train_runtime": 225055.269, | |
| "train_tokens_per_second": 4559.854 | |
| }, | |
| { | |
| "epoch": 2.642865787244342, | |
| "grad_norm": 0.08090268583905104, | |
| "learning_rate": 2.1400333849625338e-06, | |
| "loss": 0.0392, | |
| "num_input_tokens_seen": 1028260624, | |
| "step": 2730, | |
| "train_runtime": 225415.8787, | |
| "train_tokens_per_second": 4561.616 | |
| }, | |
| { | |
| "epoch": 2.6477066440760013, | |
| "grad_norm": 0.10367619160660342, | |
| "learning_rate": 2.083397174128518e-06, | |
| "loss": 0.0415, | |
| "num_input_tokens_seen": 1030159584, | |
| "step": 2735, | |
| "train_runtime": 225741.8649, | |
| "train_tokens_per_second": 4563.441 | |
| }, | |
| { | |
| "epoch": 2.6525475009076604, | |
| "grad_norm": 0.11074351346114625, | |
| "learning_rate": 2.027487891994162e-06, | |
| "loss": 0.0432, | |
| "num_input_tokens_seen": 1032081920, | |
| "step": 2740, | |
| "train_runtime": 226092.6663, | |
| "train_tokens_per_second": 4564.862 | |
| }, | |
| { | |
| "epoch": 2.65738835773932, | |
| "grad_norm": 0.10039057936660997, | |
| "learning_rate": 1.9723073120361924e-06, | |
| "loss": 0.0425, | |
| "num_input_tokens_seen": 1033931904, | |
| "step": 2745, | |
| "train_runtime": 226382.3218, | |
| "train_tokens_per_second": 4567.194 | |
| }, | |
| { | |
| "epoch": 2.662229214570979, | |
| "grad_norm": 0.09363138188924082, | |
| "learning_rate": 1.9178571846164532e-06, | |
| "loss": 0.0397, | |
| "num_input_tokens_seen": 1035806768, | |
| "step": 2750, | |
| "train_runtime": 226704.6785, | |
| "train_tokens_per_second": 4568.97 | |
| }, | |
| { | |
| "epoch": 2.6670700714026383, | |
| "grad_norm": 0.08714334312836239, | |
| "learning_rate": 1.8641392369263933e-06, | |
| "loss": 0.04, | |
| "num_input_tokens_seen": 1037779856, | |
| "step": 2755, | |
| "train_runtime": 227072.2692, | |
| "train_tokens_per_second": 4570.262 | |
| }, | |
| { | |
| "epoch": 2.6719109282342974, | |
| "grad_norm": 0.10402712457251752, | |
| "learning_rate": 1.8111551729322662e-06, | |
| "loss": 0.0439, | |
| "num_input_tokens_seen": 1039606192, | |
| "step": 2760, | |
| "train_runtime": 227397.5471, | |
| "train_tokens_per_second": 4571.756 | |
| }, | |
| { | |
| "epoch": 2.6767517850659566, | |
| "grad_norm": 0.10242769460180877, | |
| "learning_rate": 1.7589066733210814e-06, | |
| "loss": 0.0473, | |
| "num_input_tokens_seen": 1041352016, | |
| "step": 2765, | |
| "train_runtime": 227680.2847, | |
| "train_tokens_per_second": 4573.747 | |
| }, | |
| { | |
| "epoch": 2.681592641897616, | |
| "grad_norm": 0.10096005023224232, | |
| "learning_rate": 1.7073953954472949e-06, | |
| "loss": 0.0432, | |
| "num_input_tokens_seen": 1043209584, | |
| "step": 2770, | |
| "train_runtime": 228004.7054, | |
| "train_tokens_per_second": 4575.386 | |
| }, | |
| { | |
| "epoch": 2.686433498729275, | |
| "grad_norm": 0.10430771290514242, | |
| "learning_rate": 1.6566229732802501e-06, | |
| "loss": 0.0423, | |
| "num_input_tokens_seen": 1045070400, | |
| "step": 2775, | |
| "train_runtime": 228328.0951, | |
| "train_tokens_per_second": 4577.056 | |
| }, | |
| { | |
| "epoch": 2.6912743555609344, | |
| "grad_norm": 0.09976539169225217, | |
| "learning_rate": 1.6065910173523101e-06, | |
| "loss": 0.0404, | |
| "num_input_tokens_seen": 1047027216, | |
| "step": 2780, | |
| "train_runtime": 228649.8742, | |
| "train_tokens_per_second": 4579.173 | |
| }, | |
| { | |
| "epoch": 2.6961152123925936, | |
| "grad_norm": 0.09965543945612194, | |
| "learning_rate": 1.5573011147078236e-06, | |
| "loss": 0.0407, | |
| "num_input_tokens_seen": 1048933616, | |
| "step": 2785, | |
| "train_runtime": 228968.4155, | |
| "train_tokens_per_second": 4581.128 | |
| }, | |
| { | |
| "epoch": 2.7009560692242527, | |
| "grad_norm": 0.09982142335711996, | |
| "learning_rate": 1.5087548288527291e-06, | |
| "loss": 0.0416, | |
| "num_input_tokens_seen": 1050730672, | |
| "step": 2790, | |
| "train_runtime": 229238.8178, | |
| "train_tokens_per_second": 4583.563 | |
| }, | |
| { | |
| "epoch": 2.705796926055912, | |
| "grad_norm": 0.10720846842326023, | |
| "learning_rate": 1.4609536997049977e-06, | |
| "loss": 0.0415, | |
| "num_input_tokens_seen": 1052607296, | |
| "step": 2795, | |
| "train_runtime": 229546.0728, | |
| "train_tokens_per_second": 4585.604 | |
| }, | |
| { | |
| "epoch": 2.710637782887571, | |
| "grad_norm": 0.1048290688754556, | |
| "learning_rate": 1.4138992435457688e-06, | |
| "loss": 0.041, | |
| "num_input_tokens_seen": 1054530832, | |
| "step": 2800, | |
| "train_runtime": 229875.6229, | |
| "train_tokens_per_second": 4587.397 | |
| }, | |
| { | |
| "epoch": 2.7154786397192305, | |
| "grad_norm": 0.10487087810225432, | |
| "learning_rate": 1.3675929529712555e-06, | |
| "loss": 0.0404, | |
| "num_input_tokens_seen": 1056509856, | |
| "step": 2805, | |
| "train_runtime": 230229.7535, | |
| "train_tokens_per_second": 4588.937 | |
| }, | |
| { | |
| "epoch": 2.7203194965508897, | |
| "grad_norm": 0.10984325530051703, | |
| "learning_rate": 1.3220362968454026e-06, | |
| "loss": 0.041, | |
| "num_input_tokens_seen": 1058388320, | |
| "step": 2810, | |
| "train_runtime": 230583.9386, | |
| "train_tokens_per_second": 4590.035 | |
| }, | |
| { | |
| "epoch": 2.725160353382549, | |
| "grad_norm": 0.09575719645821536, | |
| "learning_rate": 1.277230720253289e-06, | |
| "loss": 0.0416, | |
| "num_input_tokens_seen": 1060208912, | |
| "step": 2815, | |
| "train_runtime": 230862.7987, | |
| "train_tokens_per_second": 4592.377 | |
| }, | |
| { | |
| "epoch": 2.730001210214208, | |
| "grad_norm": 0.0952849105139362, | |
| "learning_rate": 1.2331776444552939e-06, | |
| "loss": 0.042, | |
| "num_input_tokens_seen": 1062178720, | |
| "step": 2820, | |
| "train_runtime": 231234.4358, | |
| "train_tokens_per_second": 4593.514 | |
| }, | |
| { | |
| "epoch": 2.734842067045867, | |
| "grad_norm": 0.09720163766984548, | |
| "learning_rate": 1.1898784668419927e-06, | |
| "loss": 0.0409, | |
| "num_input_tokens_seen": 1064115760, | |
| "step": 2825, | |
| "train_runtime": 231566.8629, | |
| "train_tokens_per_second": 4595.285 | |
| }, | |
| { | |
| "epoch": 2.739682923877526, | |
| "grad_norm": 0.10713711730304852, | |
| "learning_rate": 1.1473345608898789e-06, | |
| "loss": 0.0433, | |
| "num_input_tokens_seen": 1065971408, | |
| "step": 2830, | |
| "train_runtime": 231877.52, | |
| "train_tokens_per_second": 4597.131 | |
| }, | |
| { | |
| "epoch": 2.7445237807091853, | |
| "grad_norm": 0.09631173848061805, | |
| "learning_rate": 1.10554727611773e-06, | |
| "loss": 0.0433, | |
| "num_input_tokens_seen": 1067891024, | |
| "step": 2835, | |
| "train_runtime": 232238.3471, | |
| "train_tokens_per_second": 4598.254 | |
| }, | |
| { | |
| "epoch": 2.749364637540845, | |
| "grad_norm": 0.10739207946404603, | |
| "learning_rate": 1.0645179380438657e-06, | |
| "loss": 0.0433, | |
| "num_input_tokens_seen": 1069744096, | |
| "step": 2840, | |
| "train_runtime": 232579.3058, | |
| "train_tokens_per_second": 4599.481 | |
| }, | |
| { | |
| "epoch": 2.754205494372504, | |
| "grad_norm": 0.10233938837342275, | |
| "learning_rate": 1.0242478481440498e-06, | |
| "loss": 0.0395, | |
| "num_input_tokens_seen": 1071601856, | |
| "step": 2845, | |
| "train_runtime": 232856.6503, | |
| "train_tokens_per_second": 4601.981 | |
| }, | |
| { | |
| "epoch": 2.759046351204163, | |
| "grad_norm": 0.09778768115212016, | |
| "learning_rate": 9.847382838102492e-07, | |
| "loss": 0.0421, | |
| "num_input_tokens_seen": 1073480480, | |
| "step": 2850, | |
| "train_runtime": 233167.1054, | |
| "train_tokens_per_second": 4603.91 | |
| }, | |
| { | |
| "epoch": 2.7638872080358223, | |
| "grad_norm": 0.08134168754707406, | |
| "learning_rate": 9.459904983100704e-07, | |
| "loss": 0.0404, | |
| "num_input_tokens_seen": 1075401632, | |
| "step": 2855, | |
| "train_runtime": 233474.4543, | |
| "train_tokens_per_second": 4606.078 | |
| }, | |
| { | |
| "epoch": 2.7687280648674815, | |
| "grad_norm": 0.10586368696758197, | |
| "learning_rate": 9.080057207470405e-07, | |
| "loss": 0.0379, | |
| "num_input_tokens_seen": 1077335872, | |
| "step": 2860, | |
| "train_runtime": 233807.1971, | |
| "train_tokens_per_second": 4607.796 | |
| }, | |
| { | |
| "epoch": 2.7735689216991406, | |
| "grad_norm": 0.09864439204162297, | |
| "learning_rate": 8.707851560216112e-07, | |
| "loss": 0.0403, | |
| "num_input_tokens_seen": 1079310576, | |
| "step": 2865, | |
| "train_runtime": 234165.3118, | |
| "train_tokens_per_second": 4609.182 | |
| }, | |
| { | |
| "epoch": 2.7784097785307997, | |
| "grad_norm": 0.09763664283200235, | |
| "learning_rate": 8.343299847929226e-07, | |
| "loss": 0.0387, | |
| "num_input_tokens_seen": 1081245280, | |
| "step": 2870, | |
| "train_runtime": 234484.5006, | |
| "train_tokens_per_second": 4611.159 | |
| }, | |
| { | |
| "epoch": 2.7832506353624593, | |
| "grad_norm": 0.10339662193442319, | |
| "learning_rate": 7.986413634413686e-07, | |
| "loss": 0.0423, | |
| "num_input_tokens_seen": 1083177952, | |
| "step": 2875, | |
| "train_runtime": 234868.9456, | |
| "train_tokens_per_second": 4611.84 | |
| }, | |
| { | |
| "epoch": 2.7880914921941184, | |
| "grad_norm": 0.10893179032282667, | |
| "learning_rate": 7.637204240319163e-07, | |
| "loss": 0.0415, | |
| "num_input_tokens_seen": 1085074640, | |
| "step": 2880, | |
| "train_runtime": 235198.1723, | |
| "train_tokens_per_second": 4613.448 | |
| }, | |
| { | |
| "epoch": 2.7929323490257776, | |
| "grad_norm": 0.09451603653033806, | |
| "learning_rate": 7.295682742781862e-07, | |
| "loss": 0.04, | |
| "num_input_tokens_seen": 1086903600, | |
| "step": 2885, | |
| "train_runtime": 235485.5269, | |
| "train_tokens_per_second": 4615.586 | |
| }, | |
| { | |
| "epoch": 2.7977732058574367, | |
| "grad_norm": 0.09247794261791085, | |
| "learning_rate": 6.961859975073121e-07, | |
| "loss": 0.0407, | |
| "num_input_tokens_seen": 1088720960, | |
| "step": 2890, | |
| "train_runtime": 235741.8558, | |
| "train_tokens_per_second": 4618.276 | |
| }, | |
| { | |
| "epoch": 2.802614062689096, | |
| "grad_norm": 0.09819027331260509, | |
| "learning_rate": 6.635746526255981e-07, | |
| "loss": 0.0399, | |
| "num_input_tokens_seen": 1090542208, | |
| "step": 2895, | |
| "train_runtime": 236021.1683, | |
| "train_tokens_per_second": 4620.527 | |
| }, | |
| { | |
| "epoch": 2.8074549195207554, | |
| "grad_norm": 0.10508939236953255, | |
| "learning_rate": 6.317352740849048e-07, | |
| "loss": 0.0407, | |
| "num_input_tokens_seen": 1092439568, | |
| "step": 2900, | |
| "train_runtime": 236348.1264, | |
| "train_tokens_per_second": 4622.163 | |
| }, | |
| { | |
| "epoch": 2.812295776352414, | |
| "grad_norm": 0.1127436229338298, | |
| "learning_rate": 6.006688718498549e-07, | |
| "loss": 0.0425, | |
| "num_input_tokens_seen": 1094246688, | |
| "step": 2905, | |
| "train_runtime": 236640.2834, | |
| "train_tokens_per_second": 4624.093 | |
| }, | |
| { | |
| "epoch": 2.8171366331840737, | |
| "grad_norm": 0.09425104412787287, | |
| "learning_rate": 5.703764313657795e-07, | |
| "loss": 0.0426, | |
| "num_input_tokens_seen": 1096143712, | |
| "step": 2910, | |
| "train_runtime": 236969.0877, | |
| "train_tokens_per_second": 4625.682 | |
| }, | |
| { | |
| "epoch": 2.821977490015733, | |
| "grad_norm": 0.0964656395201385, | |
| "learning_rate": 5.408589135274755e-07, | |
| "loss": 0.0411, | |
| "num_input_tokens_seen": 1098111104, | |
| "step": 2915, | |
| "train_runtime": 237285.2741, | |
| "train_tokens_per_second": 4627.81 | |
| }, | |
| { | |
| "epoch": 2.826818346847392, | |
| "grad_norm": 0.10779087274875156, | |
| "learning_rate": 5.121172546487196e-07, | |
| "loss": 0.0407, | |
| "num_input_tokens_seen": 1100058224, | |
| "step": 2920, | |
| "train_runtime": 237651.5376, | |
| "train_tokens_per_second": 4628.871 | |
| }, | |
| { | |
| "epoch": 2.831659203679051, | |
| "grad_norm": 0.10847631726351929, | |
| "learning_rate": 4.841523664325581e-07, | |
| "loss": 0.0397, | |
| "num_input_tokens_seen": 1101896720, | |
| "step": 2925, | |
| "train_runtime": 237940.6724, | |
| "train_tokens_per_second": 4630.973 | |
| }, | |
| { | |
| "epoch": 2.8365000605107102, | |
| "grad_norm": 0.09624461904411019, | |
| "learning_rate": 4.5696513594240264e-07, | |
| "loss": 0.0412, | |
| "num_input_tokens_seen": 1103744592, | |
| "step": 2930, | |
| "train_runtime": 238212.5813, | |
| "train_tokens_per_second": 4633.444 | |
| }, | |
| { | |
| "epoch": 2.84134091734237, | |
| "grad_norm": 0.1121598440101494, | |
| "learning_rate": 4.305564255738831e-07, | |
| "loss": 0.0407, | |
| "num_input_tokens_seen": 1105651136, | |
| "step": 2935, | |
| "train_runtime": 238531.5307, | |
| "train_tokens_per_second": 4635.241 | |
| }, | |
| { | |
| "epoch": 2.846181774174029, | |
| "grad_norm": 0.10501345290205552, | |
| "learning_rate": 4.0492707302749176e-07, | |
| "loss": 0.0412, | |
| "num_input_tokens_seen": 1107467904, | |
| "step": 2940, | |
| "train_runtime": 238807.7005, | |
| "train_tokens_per_second": 4637.488 | |
| }, | |
| { | |
| "epoch": 2.851022631005688, | |
| "grad_norm": 0.09913381095639745, | |
| "learning_rate": 3.800778912820102e-07, | |
| "loss": 0.041, | |
| "num_input_tokens_seen": 1109281136, | |
| "step": 2945, | |
| "train_runtime": 239102.1626, | |
| "train_tokens_per_second": 4639.361 | |
| }, | |
| { | |
| "epoch": 2.8558634878373472, | |
| "grad_norm": 0.10690920603697875, | |
| "learning_rate": 3.560096685687325e-07, | |
| "loss": 0.041, | |
| "num_input_tokens_seen": 1111166496, | |
| "step": 2950, | |
| "train_runtime": 239424.9583, | |
| "train_tokens_per_second": 4640.98 | |
| }, | |
| { | |
| "epoch": 2.8607043446690064, | |
| "grad_norm": 0.10371511929667834, | |
| "learning_rate": 3.32723168346441e-07, | |
| "loss": 0.0408, | |
| "num_input_tokens_seen": 1113024464, | |
| "step": 2955, | |
| "train_runtime": 239736.7802, | |
| "train_tokens_per_second": 4642.694 | |
| }, | |
| { | |
| "epoch": 2.8655452015006655, | |
| "grad_norm": 0.09279356116922857, | |
| "learning_rate": 3.102191292772144e-07, | |
| "loss": 0.0428, | |
| "num_input_tokens_seen": 1114789808, | |
| "step": 2960, | |
| "train_runtime": 239990.4972, | |
| "train_tokens_per_second": 4645.141 | |
| }, | |
| { | |
| "epoch": 2.8703860583323246, | |
| "grad_norm": 0.10484566419466071, | |
| "learning_rate": 2.884982652029716e-07, | |
| "loss": 0.0414, | |
| "num_input_tokens_seen": 1116715504, | |
| "step": 2965, | |
| "train_runtime": 240306.4348, | |
| "train_tokens_per_second": 4647.048 | |
| }, | |
| { | |
| "epoch": 2.875226915163984, | |
| "grad_norm": 0.09371900146099076, | |
| "learning_rate": 2.6756126512285094e-07, | |
| "loss": 0.0423, | |
| "num_input_tokens_seen": 1118661392, | |
| "step": 2970, | |
| "train_runtime": 240658.0326, | |
| "train_tokens_per_second": 4648.344 | |
| }, | |
| { | |
| "epoch": 2.8800677719956433, | |
| "grad_norm": 0.08968284205417433, | |
| "learning_rate": 2.4740879317133314e-07, | |
| "loss": 0.043, | |
| "num_input_tokens_seen": 1120523488, | |
| "step": 2975, | |
| "train_runtime": 240990.9356, | |
| "train_tokens_per_second": 4649.65 | |
| }, | |
| { | |
| "epoch": 2.8849086288273025, | |
| "grad_norm": 0.1194432143401207, | |
| "learning_rate": 2.2804148859719433e-07, | |
| "loss": 0.0425, | |
| "num_input_tokens_seen": 1122329216, | |
| "step": 2980, | |
| "train_runtime": 241267.0534, | |
| "train_tokens_per_second": 4651.813 | |
| }, | |
| { | |
| "epoch": 2.8897494856589616, | |
| "grad_norm": 0.10332352909612158, | |
| "learning_rate": 2.0945996574321392e-07, | |
| "loss": 0.0423, | |
| "num_input_tokens_seen": 1124108576, | |
| "step": 2985, | |
| "train_runtime": 241512.3485, | |
| "train_tokens_per_second": 4654.456 | |
| }, | |
| { | |
| "epoch": 2.8945903424906207, | |
| "grad_norm": 0.09902903701655406, | |
| "learning_rate": 1.9166481402669856e-07, | |
| "loss": 0.0416, | |
| "num_input_tokens_seen": 1125941808, | |
| "step": 2990, | |
| "train_runtime": 241794.3455, | |
| "train_tokens_per_second": 4656.609 | |
| }, | |
| { | |
| "epoch": 2.89943119932228, | |
| "grad_norm": 0.10194619378485503, | |
| "learning_rate": 1.7465659792077484e-07, | |
| "loss": 0.0412, | |
| "num_input_tokens_seen": 1127752624, | |
| "step": 2995, | |
| "train_runtime": 242080.3868, | |
| "train_tokens_per_second": 4658.587 | |
| }, | |
| { | |
| "epoch": 2.904272056153939, | |
| "grad_norm": 0.11088193358770991, | |
| "learning_rate": 1.5843585693648967e-07, | |
| "loss": 0.0418, | |
| "num_input_tokens_seen": 1129601232, | |
| "step": 3000, | |
| "train_runtime": 242368.9222, | |
| "train_tokens_per_second": 4660.669 | |
| }, | |
| { | |
| "epoch": 2.9091129129855986, | |
| "grad_norm": 0.09014663812275092, | |
| "learning_rate": 1.4300310560570184e-07, | |
| "loss": 0.041, | |
| "num_input_tokens_seen": 1131446736, | |
| "step": 3005, | |
| "train_runtime": 242641.5856, | |
| "train_tokens_per_second": 4663.037 | |
| }, | |
| { | |
| "epoch": 2.9139537698172577, | |
| "grad_norm": 0.10599016609672014, | |
| "learning_rate": 1.2835883346474786e-07, | |
| "loss": 0.0402, | |
| "num_input_tokens_seen": 1133359440, | |
| "step": 3010, | |
| "train_runtime": 242967.2782, | |
| "train_tokens_per_second": 4664.659 | |
| }, | |
| { | |
| "epoch": 2.918794626648917, | |
| "grad_norm": 0.09666954905813178, | |
| "learning_rate": 1.1450350503892648e-07, | |
| "loss": 0.0413, | |
| "num_input_tokens_seen": 1135150272, | |
| "step": 3015, | |
| "train_runtime": 243234.1704, | |
| "train_tokens_per_second": 4666.903 | |
| }, | |
| { | |
| "epoch": 2.923635483480576, | |
| "grad_norm": 0.0952319372122157, | |
| "learning_rate": 1.014375598277495e-07, | |
| "loss": 0.0397, | |
| "num_input_tokens_seen": 1137058240, | |
| "step": 3020, | |
| "train_runtime": 243553.8042, | |
| "train_tokens_per_second": 4668.612 | |
| }, | |
| { | |
| "epoch": 2.928476340312235, | |
| "grad_norm": 0.0961571420112863, | |
| "learning_rate": 8.916141229101671e-08, | |
| "loss": 0.0423, | |
| "num_input_tokens_seen": 1138880976, | |
| "step": 3025, | |
| "train_runtime": 243833.0558, | |
| "train_tokens_per_second": 4670.741 | |
| }, | |
| { | |
| "epoch": 2.9333171971438947, | |
| "grad_norm": 0.10748828390119694, | |
| "learning_rate": 7.767545183565983e-08, | |
| "loss": 0.0417, | |
| "num_input_tokens_seen": 1140802272, | |
| "step": 3030, | |
| "train_runtime": 244185.0486, | |
| "train_tokens_per_second": 4671.876 | |
| }, | |
| { | |
| "epoch": 2.9381580539755534, | |
| "grad_norm": 0.09453143585948885, | |
| "learning_rate": 6.69800428033912e-08, | |
| "loss": 0.0408, | |
| "num_input_tokens_seen": 1142726336, | |
| "step": 3035, | |
| "train_runtime": 244534.0926, | |
| "train_tokens_per_second": 4673.076 | |
| }, | |
| { | |
| "epoch": 2.942998910807213, | |
| "grad_norm": 0.10758820416109051, | |
| "learning_rate": 5.707552445914366e-08, | |
| "loss": 0.041, | |
| "num_input_tokens_seen": 1144590976, | |
| "step": 3040, | |
| "train_runtime": 244862.1006, | |
| "train_tokens_per_second": 4674.431 | |
| }, | |
| { | |
| "epoch": 2.947839767638872, | |
| "grad_norm": 0.09221176210129879, | |
| "learning_rate": 4.7962210980317946e-08, | |
| "loss": 0.0408, | |
| "num_input_tokens_seen": 1146496640, | |
| "step": 3045, | |
| "train_runtime": 245189.2651, | |
| "train_tokens_per_second": 4675.966 | |
| }, | |
| { | |
| "epoch": 2.9526806244705313, | |
| "grad_norm": 0.10331440557683293, | |
| "learning_rate": 3.964039144680465e-08, | |
| "loss": 0.0418, | |
| "num_input_tokens_seen": 1148333472, | |
| "step": 3050, | |
| "train_runtime": 245482.1925, | |
| "train_tokens_per_second": 4677.869 | |
| }, | |
| { | |
| "epoch": 2.9575214813021904, | |
| "grad_norm": 0.08747696767377501, | |
| "learning_rate": 3.2110329831824825e-08, | |
| "loss": 0.0423, | |
| "num_input_tokens_seen": 1150262528, | |
| "step": 3055, | |
| "train_runtime": 245807.9806, | |
| "train_tokens_per_second": 4679.517 | |
| }, | |
| { | |
| "epoch": 2.9623623381338495, | |
| "grad_norm": 0.08985631847344186, | |
| "learning_rate": 2.537226499355616e-08, | |
| "loss": 0.0384, | |
| "num_input_tokens_seen": 1152112096, | |
| "step": 3060, | |
| "train_runtime": 246091.225, | |
| "train_tokens_per_second": 4681.646 | |
| }, | |
| { | |
| "epoch": 2.967203194965509, | |
| "grad_norm": 0.10286034088749602, | |
| "learning_rate": 1.942641066754458e-08, | |
| "loss": 0.0419, | |
| "num_input_tokens_seen": 1153978224, | |
| "step": 3065, | |
| "train_runtime": 246407.1175, | |
| "train_tokens_per_second": 4683.218 | |
| }, | |
| { | |
| "epoch": 2.9720440517971682, | |
| "grad_norm": 0.09984385756001556, | |
| "learning_rate": 1.4272955459937453e-08, | |
| "loss": 0.0414, | |
| "num_input_tokens_seen": 1155885360, | |
| "step": 3070, | |
| "train_runtime": 246753.738, | |
| "train_tokens_per_second": 4684.368 | |
| }, | |
| { | |
| "epoch": 2.9768849086288274, | |
| "grad_norm": 0.09194901699543462, | |
| "learning_rate": 9.912062841496705e-09, | |
| "loss": 0.0387, | |
| "num_input_tokens_seen": 1157755504, | |
| "step": 3075, | |
| "train_runtime": 247066.5954, | |
| "train_tokens_per_second": 4686.006 | |
| }, | |
| { | |
| "epoch": 2.9817257654604865, | |
| "grad_norm": 0.10514662226999139, | |
| "learning_rate": 6.343871142411306e-09, | |
| "loss": 0.042, | |
| "num_input_tokens_seen": 1159608192, | |
| "step": 3080, | |
| "train_runtime": 247360.5627, | |
| "train_tokens_per_second": 4687.927 | |
| }, | |
| { | |
| "epoch": 2.9865666222921456, | |
| "grad_norm": 0.09518313021004897, | |
| "learning_rate": 3.568493547909113e-09, | |
| "loss": 0.0407, | |
| "num_input_tokens_seen": 1161579776, | |
| "step": 3085, | |
| "train_runtime": 247715.5527, | |
| "train_tokens_per_second": 4689.168 | |
| }, | |
| { | |
| "epoch": 2.991407479123805, | |
| "grad_norm": 0.09017165474805426, | |
| "learning_rate": 1.586018094670849e-09, | |
| "loss": 0.0391, | |
| "num_input_tokens_seen": 1163408592, | |
| "step": 3090, | |
| "train_runtime": 247983.5908, | |
| "train_tokens_per_second": 4691.474 | |
| }, | |
| { | |
| "epoch": 2.996248335955464, | |
| "grad_norm": 0.1022357668004034, | |
| "learning_rate": 3.965076680351176e-10, | |
| "loss": 0.0417, | |
| "num_input_tokens_seen": 1165284400, | |
| "step": 3095, | |
| "train_runtime": 248298.2933, | |
| "train_tokens_per_second": 4693.083 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "num_input_tokens_seen": 1166793552, | |
| "step": 3099, | |
| "total_flos": 5942698242998272.0, | |
| "train_loss": 0.21310824125995556, | |
| "train_runtime": 248614.6933, | |
| "train_samples_per_second": 1.595, | |
| "train_steps_per_second": 0.012 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 3099, | |
| "num_input_tokens_seen": 1166793552, | |
| "num_train_epochs": 3, | |
| "save_steps": 7000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 5942698242998272.0, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |