| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.7839559871158865, |
| "eval_steps": 500, |
| "global_step": 50016, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0005015713289289101, |
| "grad_norm": 5.1814727783203125, |
| "learning_rate": 1.875e-05, |
| "loss": 39.1474, |
| "step": 32, |
| "throughput": 3995.130054373345 |
| }, |
| { |
| "epoch": 0.0010031426578578201, |
| "grad_norm": 2.1393489837646484, |
| "learning_rate": 3.75e-05, |
| "loss": 30.6991, |
| "step": 64, |
| "throughput": 5926.865921123086 |
| }, |
| { |
| "epoch": 0.0015047139867867302, |
| "grad_norm": 1.213215947151184, |
| "learning_rate": 5.625e-05, |
| "loss": 27.1674, |
| "step": 96, |
| "throughput": 7137.788461518295 |
| }, |
| { |
| "epoch": 0.0020062853157156403, |
| "grad_norm": 1.3887317180633545, |
| "learning_rate": 7.5e-05, |
| "loss": 24.7095, |
| "step": 128, |
| "throughput": 8012.59199968389 |
| }, |
| { |
| "epoch": 0.0025078566446445506, |
| "grad_norm": 1.5313411951065063, |
| "learning_rate": 9.374999999999999e-05, |
| "loss": 22.9573, |
| "step": 160, |
| "throughput": 8640.775590769676 |
| }, |
| { |
| "epoch": 0.0030094279735734604, |
| "grad_norm": 1.0794322490692139, |
| "learning_rate": 0.0001125, |
| "loss": 21.5803, |
| "step": 192, |
| "throughput": 9124.247227654996 |
| }, |
| { |
| "epoch": 0.0035109993025023707, |
| "grad_norm": 1.7150249481201172, |
| "learning_rate": 0.00013125, |
| "loss": 20.2367, |
| "step": 224, |
| "throughput": 9503.876590751574 |
| }, |
| { |
| "epoch": 0.0040125706314312806, |
| "grad_norm": 1.1530752182006836, |
| "learning_rate": 0.00015, |
| "loss": 19.0211, |
| "step": 256, |
| "throughput": 9810.215026698792 |
| }, |
| { |
| "epoch": 0.004514141960360191, |
| "grad_norm": 1.1234049797058105, |
| "learning_rate": 0.00016874999999999998, |
| "loss": 17.9283, |
| "step": 288, |
| "throughput": 10062.289764004558 |
| }, |
| { |
| "epoch": 0.005015713289289101, |
| "grad_norm": 0.913296639919281, |
| "learning_rate": 0.00018749999999999998, |
| "loss": 16.9365, |
| "step": 320, |
| "throughput": 10273.36417506043 |
| }, |
| { |
| "epoch": 0.005517284618218011, |
| "grad_norm": 1.1062116622924805, |
| "learning_rate": 0.00020624999999999997, |
| "loss": 16.1751, |
| "step": 352, |
| "throughput": 10452.745437624211 |
| }, |
| { |
| "epoch": 0.006018855947146921, |
| "grad_norm": 0.9756858944892883, |
| "learning_rate": 0.000225, |
| "loss": 15.4471, |
| "step": 384, |
| "throughput": 10565.312459821444 |
| }, |
| { |
| "epoch": 0.006520427276075831, |
| "grad_norm": 0.9211740493774414, |
| "learning_rate": 0.00024375, |
| "loss": 14.8975, |
| "step": 416, |
| "throughput": 10647.274434110246 |
| }, |
| { |
| "epoch": 0.007021998605004741, |
| "grad_norm": 0.9290841221809387, |
| "learning_rate": 0.0002625, |
| "loss": 14.4338, |
| "step": 448, |
| "throughput": 10769.698005822003 |
| }, |
| { |
| "epoch": 0.007523569933933652, |
| "grad_norm": 0.8251187205314636, |
| "learning_rate": 0.00028125, |
| "loss": 14.0338, |
| "step": 480, |
| "throughput": 10878.049557563976 |
| }, |
| { |
| "epoch": 0.008025141262862561, |
| "grad_norm": 0.6234081387519836, |
| "learning_rate": 0.0003, |
| "loss": 13.7662, |
| "step": 512, |
| "throughput": 10971.220222130216 |
| }, |
| { |
| "epoch": 0.008526712591791472, |
| "grad_norm": 0.5208855867385864, |
| "learning_rate": 0.00029999972162979993, |
| "loss": 13.4863, |
| "step": 544, |
| "throughput": 11058.017884661573 |
| }, |
| { |
| "epoch": 0.009028283920720382, |
| "grad_norm": 0.4786897897720337, |
| "learning_rate": 0.00029999888652034774, |
| "loss": 13.2042, |
| "step": 576, |
| "throughput": 11136.350438298447 |
| }, |
| { |
| "epoch": 0.009529855249649291, |
| "grad_norm": 0.42444518208503723, |
| "learning_rate": 0.00029999749467508744, |
| "loss": 13.014, |
| "step": 608, |
| "throughput": 11207.51047082044 |
| }, |
| { |
| "epoch": 0.010031426578578202, |
| "grad_norm": 0.4553331732749939, |
| "learning_rate": 0.0002999955460997589, |
| "loss": 12.7939, |
| "step": 640, |
| "throughput": 11272.208055366227 |
| }, |
| { |
| "epoch": 0.010532997907507112, |
| "grad_norm": 0.4054524004459381, |
| "learning_rate": 0.0002999930408023982, |
| "loss": 12.6612, |
| "step": 672, |
| "throughput": 11331.413096739732 |
| }, |
| { |
| "epoch": 0.011034569236436023, |
| "grad_norm": 0.4195540249347687, |
| "learning_rate": 0.00029998997879333714, |
| "loss": 12.501, |
| "step": 704, |
| "throughput": 11361.610717738524 |
| }, |
| { |
| "epoch": 0.011536140565364932, |
| "grad_norm": 0.42652466893196106, |
| "learning_rate": 0.0002999863600852034, |
| "loss": 12.355, |
| "step": 736, |
| "throughput": 11378.28571357199 |
| }, |
| { |
| "epoch": 0.012037711894293842, |
| "grad_norm": 0.31916525959968567, |
| "learning_rate": 0.0002999821846929206, |
| "loss": 12.2631, |
| "step": 768, |
| "throughput": 11420.460494303246 |
| }, |
| { |
| "epoch": 0.012539283223222753, |
| "grad_norm": 0.3535449802875519, |
| "learning_rate": 0.000299977452633708, |
| "loss": 12.1538, |
| "step": 800, |
| "throughput": 11465.309609724914 |
| }, |
| { |
| "epoch": 0.013040854552151662, |
| "grad_norm": 0.2800523638725281, |
| "learning_rate": 0.00029997216392708075, |
| "loss": 12.0527, |
| "step": 832, |
| "throughput": 11505.28822949725 |
| }, |
| { |
| "epoch": 0.013542425881080573, |
| "grad_norm": 0.33092939853668213, |
| "learning_rate": 0.00029996631859484943, |
| "loss": 11.9626, |
| "step": 864, |
| "throughput": 11544.232063949898 |
| }, |
| { |
| "epoch": 0.014043997210009483, |
| "grad_norm": 0.25572431087493896, |
| "learning_rate": 0.00029995991666112014, |
| "loss": 11.876, |
| "step": 896, |
| "throughput": 11580.621967456918 |
| }, |
| { |
| "epoch": 0.014545568538938392, |
| "grad_norm": 0.3377090394496918, |
| "learning_rate": 0.0002999529581522946, |
| "loss": 11.8027, |
| "step": 928, |
| "throughput": 11614.753535897125 |
| }, |
| { |
| "epoch": 0.015047139867867303, |
| "grad_norm": 0.26273658871650696, |
| "learning_rate": 0.0002999454430970696, |
| "loss": 11.763, |
| "step": 960, |
| "throughput": 11646.571087012338 |
| }, |
| { |
| "epoch": 0.015548711196796213, |
| "grad_norm": 0.2514078915119171, |
| "learning_rate": 0.0002999373715264373, |
| "loss": 11.6852, |
| "step": 992, |
| "throughput": 11676.406141226686 |
| }, |
| { |
| "epoch": 0.016050282525725122, |
| "grad_norm": 0.21636246144771576, |
| "learning_rate": 0.0002999287434736849, |
| "loss": 11.6263, |
| "step": 1024, |
| "throughput": 11690.870213371038 |
| }, |
| { |
| "epoch": 0.016551853854654033, |
| "grad_norm": 0.21994490921497345, |
| "learning_rate": 0.0002999195589743945, |
| "loss": 11.5639, |
| "step": 1056, |
| "throughput": 11691.441399084995 |
| }, |
| { |
| "epoch": 0.017053425183582945, |
| "grad_norm": 0.2077898383140564, |
| "learning_rate": 0.000299909818066443, |
| "loss": 11.5014, |
| "step": 1088, |
| "throughput": 11710.072498228305 |
| }, |
| { |
| "epoch": 0.017554996512511852, |
| "grad_norm": 0.20699281990528107, |
| "learning_rate": 0.00029989952079000195, |
| "loss": 11.4781, |
| "step": 1120, |
| "throughput": 11734.86066531971 |
| }, |
| { |
| "epoch": 0.018056567841440763, |
| "grad_norm": 0.20005486905574799, |
| "learning_rate": 0.0002998886671875373, |
| "loss": 11.4124, |
| "step": 1152, |
| "throughput": 11758.296021447659 |
| }, |
| { |
| "epoch": 0.018558139170369675, |
| "grad_norm": 0.18954195082187653, |
| "learning_rate": 0.0002998772573038094, |
| "loss": 11.3601, |
| "step": 1184, |
| "throughput": 11779.562423849526 |
| }, |
| { |
| "epoch": 0.019059710499298582, |
| "grad_norm": 0.21691370010375977, |
| "learning_rate": 0.0002998652911858726, |
| "loss": 11.3089, |
| "step": 1216, |
| "throughput": 11800.96803685039 |
| }, |
| { |
| "epoch": 0.019561281828227493, |
| "grad_norm": 0.24996884167194366, |
| "learning_rate": 0.00029985276888307524, |
| "loss": 11.2703, |
| "step": 1248, |
| "throughput": 11821.228998747654 |
| }, |
| { |
| "epoch": 0.020062853157156404, |
| "grad_norm": 0.18788766860961914, |
| "learning_rate": 0.00029983969044705927, |
| "loss": 11.2489, |
| "step": 1280, |
| "throughput": 11840.652754110957 |
| }, |
| { |
| "epoch": 0.020564424486085316, |
| "grad_norm": 0.20493867993354797, |
| "learning_rate": 0.0002998260559317603, |
| "loss": 11.2038, |
| "step": 1312, |
| "throughput": 11859.215230540382 |
| }, |
| { |
| "epoch": 0.021065995815014223, |
| "grad_norm": 0.20930466055870056, |
| "learning_rate": 0.00029981186539340703, |
| "loss": 11.1555, |
| "step": 1344, |
| "throughput": 11870.635377978188 |
| }, |
| { |
| "epoch": 0.021567567143943134, |
| "grad_norm": 0.1783915013074875, |
| "learning_rate": 0.0002997971188905213, |
| "loss": 11.1366, |
| "step": 1376, |
| "throughput": 11874.738822938076 |
| }, |
| { |
| "epoch": 0.022069138472872046, |
| "grad_norm": 0.16567964851856232, |
| "learning_rate": 0.0002997818164839178, |
| "loss": 11.0964, |
| "step": 1408, |
| "throughput": 11875.943651931068 |
| }, |
| { |
| "epoch": 0.022570709801800953, |
| "grad_norm": 0.19126038253307343, |
| "learning_rate": 0.00029976595823670354, |
| "loss": 11.0634, |
| "step": 1440, |
| "throughput": 11890.967243214012 |
| }, |
| { |
| "epoch": 0.023072281130729864, |
| "grad_norm": 0.15882526338100433, |
| "learning_rate": 0.0002997495442142781, |
| "loss": 11.0499, |
| "step": 1472, |
| "throughput": 11906.388345043066 |
| }, |
| { |
| "epoch": 0.023573852459658776, |
| "grad_norm": 0.17650361359119415, |
| "learning_rate": 0.000299732574484333, |
| "loss": 11.0088, |
| "step": 1504, |
| "throughput": 11920.006024618477 |
| }, |
| { |
| "epoch": 0.024075423788587683, |
| "grad_norm": 0.19148582220077515, |
| "learning_rate": 0.0002997150491168514, |
| "loss": 10.9806, |
| "step": 1536, |
| "throughput": 11934.273864161154 |
| }, |
| { |
| "epoch": 0.024576995117516594, |
| "grad_norm": 0.18846440315246582, |
| "learning_rate": 0.0002996969681841079, |
| "loss": 10.9449, |
| "step": 1568, |
| "throughput": 11947.993589384481 |
| }, |
| { |
| "epoch": 0.025078566446445506, |
| "grad_norm": 0.18076573312282562, |
| "learning_rate": 0.0002996783317606684, |
| "loss": 10.9236, |
| "step": 1600, |
| "throughput": 11961.20785667367 |
| }, |
| { |
| "epoch": 0.025580137775374417, |
| "grad_norm": 0.164947047829628, |
| "learning_rate": 0.0002996591399233895, |
| "loss": 10.8896, |
| "step": 1632, |
| "throughput": 11973.88601819077 |
| }, |
| { |
| "epoch": 0.026081709104303324, |
| "grad_norm": 0.18726250529289246, |
| "learning_rate": 0.00029963939275141855, |
| "loss": 10.8525, |
| "step": 1664, |
| "throughput": 11985.116203079922 |
| }, |
| { |
| "epoch": 0.026583280433232236, |
| "grad_norm": 0.18523889780044556, |
| "learning_rate": 0.00029961909032619275, |
| "loss": 10.8433, |
| "step": 1696, |
| "throughput": 11986.272076412846 |
| }, |
| { |
| "epoch": 0.027084851762161147, |
| "grad_norm": 0.18160128593444824, |
| "learning_rate": 0.00029959823273143947, |
| "loss": 10.8182, |
| "step": 1728, |
| "throughput": 11983.895196652385 |
| }, |
| { |
| "epoch": 0.027586423091090054, |
| "grad_norm": 0.15962938964366913, |
| "learning_rate": 0.0002995768200531755, |
| "loss": 10.8248, |
| "step": 1760, |
| "throughput": 11992.332506370003 |
| }, |
| { |
| "epoch": 0.028087994420018966, |
| "grad_norm": 0.15896254777908325, |
| "learning_rate": 0.00029955485237970675, |
| "loss": 10.7814, |
| "step": 1792, |
| "throughput": 12003.474001535129 |
| }, |
| { |
| "epoch": 0.028589565748947877, |
| "grad_norm": 0.15553218126296997, |
| "learning_rate": 0.00029953232980162793, |
| "loss": 10.76, |
| "step": 1824, |
| "throughput": 12013.247989756059 |
| }, |
| { |
| "epoch": 0.029091137077876784, |
| "grad_norm": 0.16225172579288483, |
| "learning_rate": 0.0002995092524118223, |
| "loss": 10.7184, |
| "step": 1856, |
| "throughput": 12023.777304766878 |
| }, |
| { |
| "epoch": 0.029592708406805696, |
| "grad_norm": 0.1489713191986084, |
| "learning_rate": 0.00029948562030546107, |
| "loss": 10.7283, |
| "step": 1888, |
| "throughput": 12033.905583821244 |
| }, |
| { |
| "epoch": 0.030094279735734607, |
| "grad_norm": 0.15911641716957092, |
| "learning_rate": 0.00029946143358000306, |
| "loss": 10.6952, |
| "step": 1920, |
| "throughput": 12043.77236211472 |
| }, |
| { |
| "epoch": 0.030595851064663518, |
| "grad_norm": 0.16882722079753876, |
| "learning_rate": 0.0002994366923351945, |
| "loss": 10.6803, |
| "step": 1952, |
| "throughput": 12053.312899128261 |
| }, |
| { |
| "epoch": 0.031097422393592426, |
| "grad_norm": 0.1465579718351364, |
| "learning_rate": 0.00029941139667306817, |
| "loss": 10.6555, |
| "step": 1984, |
| "throughput": 12061.72702376433 |
| }, |
| { |
| "epoch": 0.03159899372252133, |
| "grad_norm": 0.14933708310127258, |
| "learning_rate": 0.00029938554669794364, |
| "loss": 10.629, |
| "step": 2016, |
| "throughput": 12064.317775333797 |
| }, |
| { |
| "epoch": 0.032100565051450244, |
| "grad_norm": 0.16598300635814667, |
| "learning_rate": 0.00029935914251642625, |
| "loss": 10.6152, |
| "step": 2048, |
| "throughput": 12061.086543803585 |
| }, |
| { |
| "epoch": 0.032602136380379156, |
| "grad_norm": 0.15937751531600952, |
| "learning_rate": 0.0002993321842374069, |
| "loss": 10.5994, |
| "step": 2080, |
| "throughput": 12055.457800713268 |
| }, |
| { |
| "epoch": 0.03310370770930807, |
| "grad_norm": 0.15959997475147247, |
| "learning_rate": 0.00029930467197206156, |
| "loss": 10.5698, |
| "step": 2112, |
| "throughput": 12064.1609508185 |
| }, |
| { |
| "epoch": 0.03360527903823698, |
| "grad_norm": 0.15936905145645142, |
| "learning_rate": 0.000299276605833851, |
| "loss": 10.5483, |
| "step": 2144, |
| "throughput": 12072.650798888526 |
| }, |
| { |
| "epoch": 0.03410685036716589, |
| "grad_norm": 0.15118514001369476, |
| "learning_rate": 0.00029924798593851994, |
| "loss": 10.5501, |
| "step": 2176, |
| "throughput": 12080.019658762709 |
| }, |
| { |
| "epoch": 0.0346084216960948, |
| "grad_norm": 0.14804407954216003, |
| "learning_rate": 0.00029921881240409703, |
| "loss": 10.5372, |
| "step": 2208, |
| "throughput": 12087.98405323524 |
| }, |
| { |
| "epoch": 0.035109993025023704, |
| "grad_norm": 0.142776757478714, |
| "learning_rate": 0.00029918908535089394, |
| "loss": 10.5293, |
| "step": 2240, |
| "throughput": 12095.798193861325 |
| }, |
| { |
| "epoch": 0.035611564353952616, |
| "grad_norm": 0.149314746260643, |
| "learning_rate": 0.00029915880490150515, |
| "loss": 10.5013, |
| "step": 2272, |
| "throughput": 12103.443761699426 |
| }, |
| { |
| "epoch": 0.03611313568288153, |
| "grad_norm": 0.14946982264518738, |
| "learning_rate": 0.0002991279711808072, |
| "loss": 10.5007, |
| "step": 2304, |
| "throughput": 12110.91765243157 |
| }, |
| { |
| "epoch": 0.03661470701181044, |
| "grad_norm": 0.15681925415992737, |
| "learning_rate": 0.0002990965843159587, |
| "loss": 10.462, |
| "step": 2336, |
| "throughput": 12113.172526776905 |
| }, |
| { |
| "epoch": 0.03711627834073935, |
| "grad_norm": 0.14676795899868011, |
| "learning_rate": 0.000299064644436399, |
| "loss": 10.4549, |
| "step": 2368, |
| "throughput": 12111.914287763128 |
| }, |
| { |
| "epoch": 0.03761784966966826, |
| "grad_norm": 0.1510101556777954, |
| "learning_rate": 0.0002990321516738482, |
| "loss": 10.4197, |
| "step": 2400, |
| "throughput": 12111.473673161498 |
| }, |
| { |
| "epoch": 0.038119420998597164, |
| "grad_norm": 0.1597863733768463, |
| "learning_rate": 0.00029899910616230674, |
| "loss": 10.4311, |
| "step": 2432, |
| "throughput": 12117.028182195081 |
| }, |
| { |
| "epoch": 0.038620992327526076, |
| "grad_norm": 0.14865833520889282, |
| "learning_rate": 0.0002989655080380543, |
| "loss": 10.4316, |
| "step": 2464, |
| "throughput": 12123.821883309733 |
| }, |
| { |
| "epoch": 0.03912256365645499, |
| "grad_norm": 0.1432724893093109, |
| "learning_rate": 0.0002989313574396496, |
| "loss": 10.3984, |
| "step": 2496, |
| "throughput": 12129.646773074963 |
| }, |
| { |
| "epoch": 0.0396241349853839, |
| "grad_norm": 0.1372014582157135, |
| "learning_rate": 0.00029889665450792983, |
| "loss": 10.3833, |
| "step": 2528, |
| "throughput": 12136.138293021668 |
| }, |
| { |
| "epoch": 0.04012570631431281, |
| "grad_norm": 0.13886210322380066, |
| "learning_rate": 0.0002988613993860101, |
| "loss": 10.3763, |
| "step": 2560, |
| "throughput": 12142.472546726538 |
| }, |
| { |
| "epoch": 0.04062727764324172, |
| "grad_norm": 0.13816037774085999, |
| "learning_rate": 0.0002988255922192825, |
| "loss": 10.3653, |
| "step": 2592, |
| "throughput": 12148.65141120839 |
| }, |
| { |
| "epoch": 0.04112884897217063, |
| "grad_norm": 0.14726093411445618, |
| "learning_rate": 0.000298789233155416, |
| "loss": 10.3521, |
| "step": 2624, |
| "throughput": 12154.738846922763 |
| }, |
| { |
| "epoch": 0.041630420301099536, |
| "grad_norm": 0.14088797569274902, |
| "learning_rate": 0.0002987523223443554, |
| "loss": 10.342, |
| "step": 2656, |
| "throughput": 12157.94437275133 |
| }, |
| { |
| "epoch": 0.04213199163002845, |
| "grad_norm": 0.12842944264411926, |
| "learning_rate": 0.000298714859938321, |
| "loss": 10.3272, |
| "step": 2688, |
| "throughput": 12156.68430068634 |
| }, |
| { |
| "epoch": 0.04263356295895736, |
| "grad_norm": 0.13655942678451538, |
| "learning_rate": 0.0002986768460918079, |
| "loss": 10.3139, |
| "step": 2720, |
| "throughput": 12154.777180138237 |
| }, |
| { |
| "epoch": 0.04313513428788627, |
| "grad_norm": 0.1346423178911209, |
| "learning_rate": 0.0002986382809615853, |
| "loss": 10.3062, |
| "step": 2752, |
| "throughput": 12158.481768431573 |
| }, |
| { |
| "epoch": 0.04363670561681518, |
| "grad_norm": 0.15266847610473633, |
| "learning_rate": 0.00029859916470669596, |
| "loss": 10.3068, |
| "step": 2784, |
| "throughput": 12164.032464673319 |
| }, |
| { |
| "epoch": 0.04413827694574409, |
| "grad_norm": 0.13238975405693054, |
| "learning_rate": 0.0002985594974884554, |
| "loss": 10.2559, |
| "step": 2816, |
| "throughput": 12168.953498849784 |
| }, |
| { |
| "epoch": 0.044639848274673, |
| "grad_norm": 0.13468751311302185, |
| "learning_rate": 0.00029851927947045136, |
| "loss": 10.271, |
| "step": 2848, |
| "throughput": 12174.27847236647 |
| }, |
| { |
| "epoch": 0.04514141960360191, |
| "grad_norm": 0.13259458541870117, |
| "learning_rate": 0.000298478510818543, |
| "loss": 10.2444, |
| "step": 2880, |
| "throughput": 12179.546502271302 |
| }, |
| { |
| "epoch": 0.04564299093253082, |
| "grad_norm": 0.14665883779525757, |
| "learning_rate": 0.0002984371917008604, |
| "loss": 10.2316, |
| "step": 2912, |
| "throughput": 12184.691956190614 |
| }, |
| { |
| "epoch": 0.04614456226145973, |
| "grad_norm": 0.13770803809165955, |
| "learning_rate": 0.0002983953222878037, |
| "loss": 10.2499, |
| "step": 2944, |
| "throughput": 12189.781748950709 |
| }, |
| { |
| "epoch": 0.04664613359038864, |
| "grad_norm": 0.15984688699245453, |
| "learning_rate": 0.0002983529027520426, |
| "loss": 10.208, |
| "step": 2976, |
| "throughput": 12193.11276934592 |
| }, |
| { |
| "epoch": 0.04714770491931755, |
| "grad_norm": 0.14149409532546997, |
| "learning_rate": 0.0002983099332685153, |
| "loss": 10.2187, |
| "step": 3008, |
| "throughput": 12192.766548304595 |
| }, |
| { |
| "epoch": 0.04764927624824646, |
| "grad_norm": 0.14984308183193207, |
| "learning_rate": 0.000298266414014428, |
| "loss": 10.2101, |
| "step": 3040, |
| "throughput": 12189.453681022953 |
| }, |
| { |
| "epoch": 0.04815084757717537, |
| "grad_norm": 0.13683457672595978, |
| "learning_rate": 0.0002982223451692544, |
| "loss": 10.2017, |
| "step": 3072, |
| "throughput": 12191.962757263142 |
| }, |
| { |
| "epoch": 0.04865241890610428, |
| "grad_norm": 0.14434507489204407, |
| "learning_rate": 0.0002981777269147344, |
| "loss": 10.1901, |
| "step": 3104, |
| "throughput": 12196.147790334488 |
| }, |
| { |
| "epoch": 0.04915399023503319, |
| "grad_norm": 0.13738803565502167, |
| "learning_rate": 0.0002981325594348739, |
| "loss": 10.2011, |
| "step": 3136, |
| "throughput": 12200.803712412984 |
| }, |
| { |
| "epoch": 0.0496555615639621, |
| "grad_norm": 0.1290079653263092, |
| "learning_rate": 0.00029808684291594373, |
| "loss": 10.1577, |
| "step": 3168, |
| "throughput": 12204.732814989668 |
| }, |
| { |
| "epoch": 0.05015713289289101, |
| "grad_norm": 0.12609000504016876, |
| "learning_rate": 0.0002980405775464789, |
| "loss": 10.153, |
| "step": 3200, |
| "throughput": 12209.032284004508 |
| }, |
| { |
| "epoch": 0.05065870422181992, |
| "grad_norm": 0.1282884180545807, |
| "learning_rate": 0.00029799376351727797, |
| "loss": 10.1375, |
| "step": 3232, |
| "throughput": 12213.400158356682 |
| }, |
| { |
| "epoch": 0.051160275550748834, |
| "grad_norm": 0.13750484585762024, |
| "learning_rate": 0.00029794640102140206, |
| "loss": 10.1275, |
| "step": 3264, |
| "throughput": 12217.685151283038 |
| }, |
| { |
| "epoch": 0.05166184687967774, |
| "grad_norm": 0.17931711673736572, |
| "learning_rate": 0.00029789849025417433, |
| "loss": 10.1251, |
| "step": 3296, |
| "throughput": 12221.40489775536 |
| }, |
| { |
| "epoch": 0.05216341820860665, |
| "grad_norm": 0.11613932251930237, |
| "learning_rate": 0.0002978500314131789, |
| "loss": 10.1578, |
| "step": 3328, |
| "throughput": 12221.815611125585 |
| }, |
| { |
| "epoch": 0.05266498953753556, |
| "grad_norm": 0.13067440688610077, |
| "learning_rate": 0.00029780102469826014, |
| "loss": 10.1099, |
| "step": 3360, |
| "throughput": 12221.108428713243 |
| }, |
| { |
| "epoch": 0.05316656086646447, |
| "grad_norm": 0.13646602630615234, |
| "learning_rate": 0.00029775147031152195, |
| "loss": 10.098, |
| "step": 3392, |
| "throughput": 12220.73803888541 |
| }, |
| { |
| "epoch": 0.05366813219539338, |
| "grad_norm": 0.1439736783504486, |
| "learning_rate": 0.0002977013684573267, |
| "loss": 10.101, |
| "step": 3424, |
| "throughput": 12223.880725167104 |
| }, |
| { |
| "epoch": 0.054169703524322294, |
| "grad_norm": 0.1305106282234192, |
| "learning_rate": 0.0002976507193422946, |
| "loss": 10.0933, |
| "step": 3456, |
| "throughput": 12227.953070782476 |
| }, |
| { |
| "epoch": 0.0546712748532512, |
| "grad_norm": 0.15496422350406647, |
| "learning_rate": 0.00029759952317530284, |
| "loss": 10.1026, |
| "step": 3488, |
| "throughput": 12231.44852716162 |
| }, |
| { |
| "epoch": 0.05517284618218011, |
| "grad_norm": 0.1231626644730568, |
| "learning_rate": 0.0002975477801674845, |
| "loss": 10.0508, |
| "step": 3520, |
| "throughput": 12235.337631141307 |
| }, |
| { |
| "epoch": 0.05567441751110902, |
| "grad_norm": 0.12398959696292877, |
| "learning_rate": 0.00029749549053222784, |
| "loss": 10.0712, |
| "step": 3552, |
| "throughput": 12239.127986608266 |
| }, |
| { |
| "epoch": 0.05617598884003793, |
| "grad_norm": 0.11847177147865295, |
| "learning_rate": 0.0002974426544851755, |
| "loss": 10.0456, |
| "step": 3584, |
| "throughput": 12242.863384381568 |
| }, |
| { |
| "epoch": 0.05667756016896684, |
| "grad_norm": 0.12773899734020233, |
| "learning_rate": 0.00029738927224422354, |
| "loss": 10.0489, |
| "step": 3616, |
| "throughput": 12246.014967120955 |
| }, |
| { |
| "epoch": 0.057179131497895753, |
| "grad_norm": 0.13713018596172333, |
| "learning_rate": 0.0002973353440295205, |
| "loss": 10.0223, |
| "step": 3648, |
| "throughput": 12247.24311621234 |
| }, |
| { |
| "epoch": 0.057680702826824665, |
| "grad_norm": 0.14463427662849426, |
| "learning_rate": 0.0002972808700634664, |
| "loss": 10.0269, |
| "step": 3680, |
| "throughput": 12245.601304229167 |
| }, |
| { |
| "epoch": 0.05818227415575357, |
| "grad_norm": 0.12428227812051773, |
| "learning_rate": 0.0002972258505707121, |
| "loss": 10.0188, |
| "step": 3712, |
| "throughput": 12244.117691271802 |
| }, |
| { |
| "epoch": 0.05868384548468248, |
| "grad_norm": 0.1311129778623581, |
| "learning_rate": 0.00029717028577815817, |
| "loss": 10.0069, |
| "step": 3744, |
| "throughput": 12246.616999806483 |
| }, |
| { |
| "epoch": 0.05918541681361139, |
| "grad_norm": 0.1432640105485916, |
| "learning_rate": 0.0002971141759149539, |
| "loss": 10.0253, |
| "step": 3776, |
| "throughput": 12249.62555966762 |
| }, |
| { |
| "epoch": 0.0596869881425403, |
| "grad_norm": 0.12176132202148438, |
| "learning_rate": 0.00029705752121249665, |
| "loss": 10.0013, |
| "step": 3808, |
| "throughput": 12252.551883866128 |
| }, |
| { |
| "epoch": 0.060188559471469213, |
| "grad_norm": 0.1390705704689026, |
| "learning_rate": 0.0002970003219044305, |
| "loss": 9.9886, |
| "step": 3840, |
| "throughput": 12255.913981330372 |
| }, |
| { |
| "epoch": 0.060690130800398125, |
| "grad_norm": 0.13727904856204987, |
| "learning_rate": 0.0002969425782266455, |
| "loss": 10.0022, |
| "step": 3872, |
| "throughput": 12259.206427835396 |
| }, |
| { |
| "epoch": 0.061191702129327036, |
| "grad_norm": 0.1457907259464264, |
| "learning_rate": 0.0002968842904172769, |
| "loss": 9.9932, |
| "step": 3904, |
| "throughput": 12262.448975674195 |
| }, |
| { |
| "epoch": 0.06169327345825594, |
| "grad_norm": 0.13661052286624908, |
| "learning_rate": 0.00029682545871670375, |
| "loss": 9.9852, |
| "step": 3936, |
| "throughput": 12265.135223847203 |
| }, |
| { |
| "epoch": 0.06219484478718485, |
| "grad_norm": 0.11689677834510803, |
| "learning_rate": 0.0002967660833675481, |
| "loss": 9.9688, |
| "step": 3968, |
| "throughput": 12266.224417757796 |
| }, |
| { |
| "epoch": 0.06269641611611376, |
| "grad_norm": 0.1388208568096161, |
| "learning_rate": 0.0002967061646146741, |
| "loss": 9.956, |
| "step": 4000, |
| "throughput": 12265.161713743151 |
| }, |
| { |
| "epoch": 0.06319798744504267, |
| "grad_norm": 0.14242246747016907, |
| "learning_rate": 0.00029664570270518685, |
| "loss": 9.9404, |
| "step": 4032, |
| "throughput": 12265.547154570646 |
| }, |
| { |
| "epoch": 0.06369955877397158, |
| "grad_norm": 0.12950783967971802, |
| "learning_rate": 0.00029658469788843147, |
| "loss": 9.9449, |
| "step": 4064, |
| "throughput": 12265.70607379335 |
| }, |
| { |
| "epoch": 0.06420113010290049, |
| "grad_norm": 0.12653489410877228, |
| "learning_rate": 0.00029652315041599203, |
| "loss": 9.9341, |
| "step": 4096, |
| "throughput": 12268.422166740711 |
| }, |
| { |
| "epoch": 0.0647027014318294, |
| "grad_norm": 0.13766422867774963, |
| "learning_rate": 0.00029646106054169046, |
| "loss": 9.9369, |
| "step": 4128, |
| "throughput": 12266.532288400096 |
| }, |
| { |
| "epoch": 0.06520427276075831, |
| "grad_norm": 0.125003382563591, |
| "learning_rate": 0.00029639842852158553, |
| "loss": 9.9329, |
| "step": 4160, |
| "throughput": 12269.080286215773 |
| }, |
| { |
| "epoch": 0.06570584408968723, |
| "grad_norm": 0.12812422215938568, |
| "learning_rate": 0.00029633525461397194, |
| "loss": 9.927, |
| "step": 4192, |
| "throughput": 12272.078468674545 |
| }, |
| { |
| "epoch": 0.06620741541861613, |
| "grad_norm": 0.13568617403507233, |
| "learning_rate": 0.00029627153907937903, |
| "loss": 9.9293, |
| "step": 4224, |
| "throughput": 12275.038225707407 |
| }, |
| { |
| "epoch": 0.06670898674754504, |
| "grad_norm": 0.12992961704730988, |
| "learning_rate": 0.0002962072821805699, |
| "loss": 9.8933, |
| "step": 4256, |
| "throughput": 12277.575637351083 |
| }, |
| { |
| "epoch": 0.06721055807647396, |
| "grad_norm": 0.11827738583087921, |
| "learning_rate": 0.0002961424841825402, |
| "loss": 9.9004, |
| "step": 4288, |
| "throughput": 12279.553752485568 |
| }, |
| { |
| "epoch": 0.06771212940540286, |
| "grad_norm": 0.12144280970096588, |
| "learning_rate": 0.00029607714535251703, |
| "loss": 9.8951, |
| "step": 4320, |
| "throughput": 12279.402994481276 |
| }, |
| { |
| "epoch": 0.06821370073433178, |
| "grad_norm": 0.11789822578430176, |
| "learning_rate": 0.00029601126595995794, |
| "loss": 9.8987, |
| "step": 4352, |
| "throughput": 12278.677511112539 |
| }, |
| { |
| "epoch": 0.06871527206326068, |
| "grad_norm": 0.1158544048666954, |
| "learning_rate": 0.0002959448462765497, |
| "loss": 9.8789, |
| "step": 4384, |
| "throughput": 12278.85046684112 |
| }, |
| { |
| "epoch": 0.0692168433921896, |
| "grad_norm": 0.12830589711666107, |
| "learning_rate": 0.0002958778865762072, |
| "loss": 9.8952, |
| "step": 4416, |
| "throughput": 12280.41777725107 |
| }, |
| { |
| "epoch": 0.0697184147211185, |
| "grad_norm": 0.118076391518116, |
| "learning_rate": 0.0002958103871350727, |
| "loss": 9.8743, |
| "step": 4448, |
| "throughput": 12283.159086085681 |
| }, |
| { |
| "epoch": 0.07021998605004741, |
| "grad_norm": 0.13101021945476532, |
| "learning_rate": 0.0002957423482315139, |
| "loss": 9.8762, |
| "step": 4480, |
| "throughput": 12285.418379854667 |
| }, |
| { |
| "epoch": 0.07072155737897633, |
| "grad_norm": 0.14345066249370575, |
| "learning_rate": 0.0002956737701461235, |
| "loss": 9.8613, |
| "step": 4512, |
| "throughput": 12288.06607647167 |
| }, |
| { |
| "epoch": 0.07122312870790523, |
| "grad_norm": 0.13554368913173676, |
| "learning_rate": 0.00029560465316171773, |
| "loss": 9.8495, |
| "step": 4544, |
| "throughput": 12290.674364258046 |
| }, |
| { |
| "epoch": 0.07172470003683415, |
| "grad_norm": 0.12480172514915466, |
| "learning_rate": 0.0002955349975633352, |
| "loss": 9.8633, |
| "step": 4576, |
| "throughput": 12293.266006329883 |
| }, |
| { |
| "epoch": 0.07222627136576305, |
| "grad_norm": 0.12317401170730591, |
| "learning_rate": 0.00029546480363823577, |
| "loss": 9.859, |
| "step": 4608, |
| "throughput": 12295.056858274298 |
| }, |
| { |
| "epoch": 0.07272784269469197, |
| "grad_norm": 0.12118421494960785, |
| "learning_rate": 0.0002953940716758995, |
| "loss": 9.8337, |
| "step": 4640, |
| "throughput": 12295.407806257885 |
| }, |
| { |
| "epoch": 0.07322941402362088, |
| "grad_norm": 0.12546321749687195, |
| "learning_rate": 0.0002953228019680252, |
| "loss": 9.8429, |
| "step": 4672, |
| "throughput": 12295.151649105255 |
| }, |
| { |
| "epoch": 0.07373098535254978, |
| "grad_norm": 0.1200215220451355, |
| "learning_rate": 0.0002952509948085293, |
| "loss": 9.82, |
| "step": 4704, |
| "throughput": 12294.887429480248 |
| }, |
| { |
| "epoch": 0.0742325566814787, |
| "grad_norm": 0.1412278413772583, |
| "learning_rate": 0.00029517865049354477, |
| "loss": 9.8439, |
| "step": 4736, |
| "throughput": 12296.221471223143 |
| }, |
| { |
| "epoch": 0.0747341280104076, |
| "grad_norm": 0.13449692726135254, |
| "learning_rate": 0.0002951057693214197, |
| "loss": 9.8178, |
| "step": 4768, |
| "throughput": 12298.279352568583 |
| }, |
| { |
| "epoch": 0.07523569933933652, |
| "grad_norm": 0.11419707536697388, |
| "learning_rate": 0.0002950323515927164, |
| "loss": 9.7989, |
| "step": 4800, |
| "throughput": 12300.697676788242 |
| }, |
| { |
| "epoch": 0.07573727066826542, |
| "grad_norm": 0.11441710591316223, |
| "learning_rate": 0.0002949583976102097, |
| "loss": 9.8139, |
| "step": 4832, |
| "throughput": 12302.73785434334 |
| }, |
| { |
| "epoch": 0.07623884199719433, |
| "grad_norm": 0.12667381763458252, |
| "learning_rate": 0.00029488390767888606, |
| "loss": 9.8028, |
| "step": 4864, |
| "throughput": 12305.123450605599 |
| }, |
| { |
| "epoch": 0.07674041332612325, |
| "grad_norm": 0.1141253337264061, |
| "learning_rate": 0.0002948088821059422, |
| "loss": 9.7975, |
| "step": 4896, |
| "throughput": 12307.419387340185 |
| }, |
| { |
| "epoch": 0.07724198465505215, |
| "grad_norm": 0.11875823140144348, |
| "learning_rate": 0.0002947333212007838, |
| "loss": 9.8001, |
| "step": 4928, |
| "throughput": 12309.448497408639 |
| }, |
| { |
| "epoch": 0.07774355598398107, |
| "grad_norm": 0.12149334698915482, |
| "learning_rate": 0.0002946572252750242, |
| "loss": 9.8045, |
| "step": 4960, |
| "throughput": 12309.602888274778 |
| }, |
| { |
| "epoch": 0.07824512731290997, |
| "grad_norm": 0.11435220390558243, |
| "learning_rate": 0.0002945805946424834, |
| "loss": 9.7856, |
| "step": 4992, |
| "throughput": 12309.117653614778 |
| }, |
| { |
| "epoch": 0.07874669864183889, |
| "grad_norm": 0.12228445708751678, |
| "learning_rate": 0.0002945034296191861, |
| "loss": 9.7938, |
| "step": 5024, |
| "throughput": 12308.370488061273 |
| }, |
| { |
| "epoch": 0.0792482699707678, |
| "grad_norm": 0.11748974025249481, |
| "learning_rate": 0.00029442573052336127, |
| "loss": 9.7885, |
| "step": 5056, |
| "throughput": 12309.201100353435 |
| }, |
| { |
| "epoch": 0.0797498412996967, |
| "grad_norm": 0.11757509410381317, |
| "learning_rate": 0.0002943474976754401, |
| "loss": 9.7451, |
| "step": 5088, |
| "throughput": 12311.036028542083 |
| }, |
| { |
| "epoch": 0.08025141262862562, |
| "grad_norm": 0.1277211606502533, |
| "learning_rate": 0.0002942687313980552, |
| "loss": 9.7726, |
| "step": 5120, |
| "throughput": 12313.223751076262 |
| }, |
| { |
| "epoch": 0.08075298395755452, |
| "grad_norm": 0.12524756789207458, |
| "learning_rate": 0.0002941894320160389, |
| "loss": 9.7733, |
| "step": 5152, |
| "throughput": 12314.977454209267 |
| }, |
| { |
| "epoch": 0.08125455528648344, |
| "grad_norm": 0.12436572462320328, |
| "learning_rate": 0.00029410959985642205, |
| "loss": 9.7558, |
| "step": 5184, |
| "throughput": 12317.09806861488 |
| }, |
| { |
| "epoch": 0.08175612661541234, |
| "grad_norm": 0.11354023963212967, |
| "learning_rate": 0.0002940292352484327, |
| "loss": 9.7416, |
| "step": 5216, |
| "throughput": 12319.223291715813 |
| }, |
| { |
| "epoch": 0.08225769794434126, |
| "grad_norm": 0.11431898921728134, |
| "learning_rate": 0.0002939483385234948, |
| "loss": 9.7403, |
| "step": 5248, |
| "throughput": 12321.033988877938 |
| }, |
| { |
| "epoch": 0.08275926927327017, |
| "grad_norm": 0.12201514840126038, |
| "learning_rate": 0.0002938669100152266, |
| "loss": 9.7622, |
| "step": 5280, |
| "throughput": 12321.809804324736 |
| }, |
| { |
| "epoch": 0.08326084060219907, |
| "grad_norm": 0.12244311720132828, |
| "learning_rate": 0.00029378495005943954, |
| "loss": 9.7318, |
| "step": 5312, |
| "throughput": 12321.709958468915 |
| }, |
| { |
| "epoch": 0.08376241193112799, |
| "grad_norm": 0.12442632764577866, |
| "learning_rate": 0.00029370245899413677, |
| "loss": 9.7324, |
| "step": 5344, |
| "throughput": 12320.893035853058 |
| }, |
| { |
| "epoch": 0.0842639832600569, |
| "grad_norm": 0.10813046246767044, |
| "learning_rate": 0.0002936194371595116, |
| "loss": 9.7355, |
| "step": 5376, |
| "throughput": 12321.800544082063 |
| }, |
| { |
| "epoch": 0.08476555458898581, |
| "grad_norm": 0.1120183914899826, |
| "learning_rate": 0.00029353588489794636, |
| "loss": 9.7248, |
| "step": 5408, |
| "throughput": 12322.747158697055 |
| }, |
| { |
| "epoch": 0.08526712591791472, |
| "grad_norm": 0.1144866794347763, |
| "learning_rate": 0.0002934518025540109, |
| "loss": 9.723, |
| "step": 5440, |
| "throughput": 12324.437603602319 |
| }, |
| { |
| "epoch": 0.08576869724684363, |
| "grad_norm": 0.10953057557344437, |
| "learning_rate": 0.00029336719047446096, |
| "loss": 9.7424, |
| "step": 5472, |
| "throughput": 12326.037578732192 |
| }, |
| { |
| "epoch": 0.08627026857577254, |
| "grad_norm": 0.12415914237499237, |
| "learning_rate": 0.000293282049008237, |
| "loss": 9.7132, |
| "step": 5504, |
| "throughput": 12327.988044033029 |
| }, |
| { |
| "epoch": 0.08677183990470144, |
| "grad_norm": 0.12535545229911804, |
| "learning_rate": 0.00029319637850646273, |
| "loss": 9.7169, |
| "step": 5536, |
| "throughput": 12329.945895602787 |
| }, |
| { |
| "epoch": 0.08727341123363036, |
| "grad_norm": 0.11870857328176498, |
| "learning_rate": 0.0002931101793224435, |
| "loss": 9.7188, |
| "step": 5568, |
| "throughput": 12331.603366242149 |
| }, |
| { |
| "epoch": 0.08777498256255926, |
| "grad_norm": 0.11292309314012527, |
| "learning_rate": 0.0002930234518116651, |
| "loss": 9.7178, |
| "step": 5600, |
| "throughput": 12332.284273719622 |
| }, |
| { |
| "epoch": 0.08827655389148818, |
| "grad_norm": 0.12400523573160172, |
| "learning_rate": 0.000292936196331792, |
| "loss": 9.6843, |
| "step": 5632, |
| "throughput": 12332.697010653936 |
| }, |
| { |
| "epoch": 0.08877812522041709, |
| "grad_norm": 0.11735209077596664, |
| "learning_rate": 0.000292848413242666, |
| "loss": 9.7094, |
| "step": 5664, |
| "throughput": 12332.795802288674 |
| }, |
| { |
| "epoch": 0.089279696549346, |
| "grad_norm": 0.11407672613859177, |
| "learning_rate": 0.0002927601029063049, |
| "loss": 9.6873, |
| "step": 5696, |
| "throughput": 12332.697222211196 |
| }, |
| { |
| "epoch": 0.08978126787827491, |
| "grad_norm": 0.12642164528369904, |
| "learning_rate": 0.0002926712656869007, |
| "loss": 9.6826, |
| "step": 5728, |
| "throughput": 12333.568590858733 |
| }, |
| { |
| "epoch": 0.09028283920720381, |
| "grad_norm": 0.11882464587688446, |
| "learning_rate": 0.0002925819019508184, |
| "loss": 9.6877, |
| "step": 5760, |
| "throughput": 12335.226550369427 |
| }, |
| { |
| "epoch": 0.09078441053613273, |
| "grad_norm": 0.12189045548439026, |
| "learning_rate": 0.0002924920120665943, |
| "loss": 9.6935, |
| "step": 5792, |
| "throughput": 12337.029247432713 |
| }, |
| { |
| "epoch": 0.09128598186506164, |
| "grad_norm": 0.12018447369337082, |
| "learning_rate": 0.00029240159640493463, |
| "loss": 9.689, |
| "step": 5824, |
| "throughput": 12338.4540721314 |
| }, |
| { |
| "epoch": 0.09178755319399055, |
| "grad_norm": 0.11342897266149521, |
| "learning_rate": 0.00029231065533871374, |
| "loss": 9.6768, |
| "step": 5856, |
| "throughput": 12340.23535636152 |
| }, |
| { |
| "epoch": 0.09228912452291946, |
| "grad_norm": 0.11006417125463486, |
| "learning_rate": 0.0002922191892429729, |
| "loss": 9.657, |
| "step": 5888, |
| "throughput": 12341.714272101834 |
| }, |
| { |
| "epoch": 0.09279069585184836, |
| "grad_norm": 0.1147543340921402, |
| "learning_rate": 0.0002921271984949185, |
| "loss": 9.6669, |
| "step": 5920, |
| "throughput": 12343.211380920462 |
| }, |
| { |
| "epoch": 0.09329226718077728, |
| "grad_norm": 0.10900643467903137, |
| "learning_rate": 0.0002920346834739208, |
| "loss": 9.6532, |
| "step": 5952, |
| "throughput": 12343.042496781856 |
| }, |
| { |
| "epoch": 0.09379383850970618, |
| "grad_norm": 0.11826995015144348, |
| "learning_rate": 0.0002919416445615119, |
| "loss": 9.6561, |
| "step": 5984, |
| "throughput": 12342.599329399904 |
| }, |
| { |
| "epoch": 0.0942954098386351, |
| "grad_norm": 0.12266913801431656, |
| "learning_rate": 0.0002918480821413846, |
| "loss": 9.6378, |
| "step": 6016, |
| "throughput": 12342.16939111886 |
| }, |
| { |
| "epoch": 0.094796981167564, |
| "grad_norm": 0.10905318707227707, |
| "learning_rate": 0.0002917539965993906, |
| "loss": 9.6405, |
| "step": 6048, |
| "throughput": 12342.86339634901 |
| }, |
| { |
| "epoch": 0.09529855249649292, |
| "grad_norm": 0.11646851897239685, |
| "learning_rate": 0.00029165938832353885, |
| "loss": 9.6457, |
| "step": 6080, |
| "throughput": 12344.365800153453 |
| }, |
| { |
| "epoch": 0.09580012382542183, |
| "grad_norm": 0.11943021416664124, |
| "learning_rate": 0.00029156425770399434, |
| "loss": 9.6318, |
| "step": 6112, |
| "throughput": 12346.042912433375 |
| }, |
| { |
| "epoch": 0.09630169515435073, |
| "grad_norm": 0.12691287696361542, |
| "learning_rate": 0.0002914686051330759, |
| "loss": 9.6346, |
| "step": 6144, |
| "throughput": 12347.387013004074 |
| }, |
| { |
| "epoch": 0.09680326648327965, |
| "grad_norm": 0.1268269121646881, |
| "learning_rate": 0.00029137243100525506, |
| "loss": 9.6586, |
| "step": 6176, |
| "throughput": 12345.744944764449 |
| }, |
| { |
| "epoch": 0.09730483781220856, |
| "grad_norm": 0.11386391520500183, |
| "learning_rate": 0.00029127573571715416, |
| "loss": 9.6269, |
| "step": 6208, |
| "throughput": 12347.386245501879 |
| }, |
| { |
| "epoch": 0.09780640914113747, |
| "grad_norm": 0.11313401907682419, |
| "learning_rate": 0.00029117851966754495, |
| "loss": 9.6245, |
| "step": 6240, |
| "throughput": 12348.8221084515 |
| }, |
| { |
| "epoch": 0.09830798047006638, |
| "grad_norm": 0.11410092562437057, |
| "learning_rate": 0.00029108078325734666, |
| "loss": 9.6307, |
| "step": 6272, |
| "throughput": 12348.797703159744 |
| }, |
| { |
| "epoch": 0.0988095517989953, |
| "grad_norm": 0.12609340250492096, |
| "learning_rate": 0.0002909825268896245, |
| "loss": 9.6201, |
| "step": 6304, |
| "throughput": 12348.257753633938 |
| }, |
| { |
| "epoch": 0.0993111231279242, |
| "grad_norm": 0.10618647187948227, |
| "learning_rate": 0.000290883750969588, |
| "loss": 9.6123, |
| "step": 6336, |
| "throughput": 12348.513898569872 |
| }, |
| { |
| "epoch": 0.0998126944568531, |
| "grad_norm": 0.12335828691720963, |
| "learning_rate": 0.00029078445590458946, |
| "loss": 9.6015, |
| "step": 6368, |
| "throughput": 12349.050939061655 |
| }, |
| { |
| "epoch": 0.10031426578578202, |
| "grad_norm": 0.10779345035552979, |
| "learning_rate": 0.0002906846421041219, |
| "loss": 9.6393, |
| "step": 6400, |
| "throughput": 12349.737200284571 |
| }, |
| { |
| "epoch": 0.10081583711471093, |
| "grad_norm": 0.11098971217870712, |
| "learning_rate": 0.00029058430997981784, |
| "loss": 9.5854, |
| "step": 6432, |
| "throughput": 12351.193305270057 |
| }, |
| { |
| "epoch": 0.10131740844363984, |
| "grad_norm": 0.11716262996196747, |
| "learning_rate": 0.0002904834599454472, |
| "loss": 9.5901, |
| "step": 6464, |
| "throughput": 12352.517470523717 |
| }, |
| { |
| "epoch": 0.10181897977256875, |
| "grad_norm": 0.10677265375852585, |
| "learning_rate": 0.00029038209241691575, |
| "loss": 9.617, |
| "step": 6496, |
| "throughput": 12354.060061073262 |
| }, |
| { |
| "epoch": 0.10232055110149767, |
| "grad_norm": 0.1280500292778015, |
| "learning_rate": 0.0002902802078122636, |
| "loss": 9.5724, |
| "step": 6528, |
| "throughput": 12355.586840332062 |
| }, |
| { |
| "epoch": 0.10282212243042657, |
| "grad_norm": 0.11151953786611557, |
| "learning_rate": 0.00029017780655166315, |
| "loss": 9.5919, |
| "step": 6560, |
| "throughput": 12356.84589819978 |
| }, |
| { |
| "epoch": 0.10332369375935548, |
| "grad_norm": 0.11358866095542908, |
| "learning_rate": 0.0002900748890574175, |
| "loss": 9.599, |
| "step": 6592, |
| "throughput": 12357.114017955366 |
| }, |
| { |
| "epoch": 0.1038252650882844, |
| "grad_norm": 0.10364415496587753, |
| "learning_rate": 0.0002899714557539586, |
| "loss": 9.5957, |
| "step": 6624, |
| "throughput": 12356.952324675427 |
| }, |
| { |
| "epoch": 0.1043268364172133, |
| "grad_norm": 0.1232060045003891, |
| "learning_rate": 0.00028986750706784574, |
| "loss": 9.5985, |
| "step": 6656, |
| "throughput": 12357.1067247594 |
| }, |
| { |
| "epoch": 0.10482840774614222, |
| "grad_norm": 0.102699875831604, |
| "learning_rate": 0.0002897630434277637, |
| "loss": 9.5758, |
| "step": 6688, |
| "throughput": 12356.907550118318 |
| }, |
| { |
| "epoch": 0.10532997907507112, |
| "grad_norm": 0.10401725023984909, |
| "learning_rate": 0.0002896580652645207, |
| "loss": 9.5691, |
| "step": 6720, |
| "throughput": 12357.6009277936 |
| }, |
| { |
| "epoch": 0.10583155040400004, |
| "grad_norm": 0.11600401997566223, |
| "learning_rate": 0.00028955257301104714, |
| "loss": 9.5597, |
| "step": 6752, |
| "throughput": 12358.876152325516 |
| }, |
| { |
| "epoch": 0.10633312173292894, |
| "grad_norm": 0.1357276439666748, |
| "learning_rate": 0.00028944656710239337, |
| "loss": 9.5518, |
| "step": 6784, |
| "throughput": 12360.335183790394 |
| }, |
| { |
| "epoch": 0.10683469306185785, |
| "grad_norm": 0.1068183034658432, |
| "learning_rate": 0.00028934004797572795, |
| "loss": 9.5883, |
| "step": 6816, |
| "throughput": 12361.487846370224 |
| }, |
| { |
| "epoch": 0.10733626439078676, |
| "grad_norm": 0.10323406755924225, |
| "learning_rate": 0.00028923301607033616, |
| "loss": 9.5374, |
| "step": 6848, |
| "throughput": 12362.91980993743 |
| }, |
| { |
| "epoch": 0.10783783571971567, |
| "grad_norm": 0.12117818742990494, |
| "learning_rate": 0.0002891254718276178, |
| "loss": 9.5873, |
| "step": 6880, |
| "throughput": 12364.151782712226 |
| }, |
| { |
| "epoch": 0.10833940704864459, |
| "grad_norm": 0.11063683032989502, |
| "learning_rate": 0.00028901741569108586, |
| "loss": 9.5572, |
| "step": 6912, |
| "throughput": 12364.86991250105 |
| }, |
| { |
| "epoch": 0.10884097837757349, |
| "grad_norm": 0.12235350161790848, |
| "learning_rate": 0.00028890884810636394, |
| "loss": 9.5629, |
| "step": 6944, |
| "throughput": 12364.687522607777 |
| }, |
| { |
| "epoch": 0.1093425497065024, |
| "grad_norm": 0.10094378143548965, |
| "learning_rate": 0.00028879976952118523, |
| "loss": 9.5556, |
| "step": 6976, |
| "throughput": 12364.93814754636 |
| }, |
| { |
| "epoch": 0.10984412103543131, |
| "grad_norm": 0.1071799248456955, |
| "learning_rate": 0.0002886901803853901, |
| "loss": 9.5773, |
| "step": 7008, |
| "throughput": 12364.533697610548 |
| }, |
| { |
| "epoch": 0.11034569236436022, |
| "grad_norm": 0.10255227237939835, |
| "learning_rate": 0.00028858008115092445, |
| "loss": 9.5436, |
| "step": 7040, |
| "throughput": 12365.488571719223 |
| }, |
| { |
| "epoch": 0.11084726369328914, |
| "grad_norm": 0.11478185653686523, |
| "learning_rate": 0.0002884694722718378, |
| "loss": 9.5448, |
| "step": 7072, |
| "throughput": 12366.375113572562 |
| }, |
| { |
| "epoch": 0.11134883502221804, |
| "grad_norm": 0.11082364618778229, |
| "learning_rate": 0.00028835835420428163, |
| "loss": 9.5343, |
| "step": 7104, |
| "throughput": 12367.574394631327 |
| }, |
| { |
| "epoch": 0.11185040635114696, |
| "grad_norm": 0.10560789704322815, |
| "learning_rate": 0.000288246727406507, |
| "loss": 9.5368, |
| "step": 7136, |
| "throughput": 12368.715921217981 |
| }, |
| { |
| "epoch": 0.11235197768007586, |
| "grad_norm": 0.1051524356007576, |
| "learning_rate": 0.00028813459233886335, |
| "loss": 9.5262, |
| "step": 7168, |
| "throughput": 12370.051759183418 |
| }, |
| { |
| "epoch": 0.11285354900900477, |
| "grad_norm": 0.12176744639873505, |
| "learning_rate": 0.00028802194946379585, |
| "loss": 9.5051, |
| "step": 7200, |
| "throughput": 12371.17860459098 |
| }, |
| { |
| "epoch": 0.11335512033793368, |
| "grad_norm": 0.10709969699382782, |
| "learning_rate": 0.0002879087992458442, |
| "loss": 9.5216, |
| "step": 7232, |
| "throughput": 12372.029559352753 |
| }, |
| { |
| "epoch": 0.11385669166686259, |
| "grad_norm": 0.11609054356813431, |
| "learning_rate": 0.00028779514215164015, |
| "loss": 9.5081, |
| "step": 7264, |
| "throughput": 12372.039048884106 |
| }, |
| { |
| "epoch": 0.11435826299579151, |
| "grad_norm": 0.10819747298955917, |
| "learning_rate": 0.0002876809786499059, |
| "loss": 9.5228, |
| "step": 7296, |
| "throughput": 12371.93998091193 |
| }, |
| { |
| "epoch": 0.11485983432472041, |
| "grad_norm": 0.10557551681995392, |
| "learning_rate": 0.0002875663092114521, |
| "loss": 9.5303, |
| "step": 7328, |
| "throughput": 12372.227380949147 |
| }, |
| { |
| "epoch": 0.11536140565364933, |
| "grad_norm": 0.11606358736753464, |
| "learning_rate": 0.0002874511343091758, |
| "loss": 9.517, |
| "step": 7360, |
| "throughput": 12372.649695724014 |
| }, |
| { |
| "epoch": 0.11586297698257823, |
| "grad_norm": 0.12774042785167694, |
| "learning_rate": 0.00028733545441805874, |
| "loss": 9.5215, |
| "step": 7392, |
| "throughput": 12373.206108875034 |
| }, |
| { |
| "epoch": 0.11636454831150714, |
| "grad_norm": 0.10692732781171799, |
| "learning_rate": 0.00028721927001516503, |
| "loss": 9.5291, |
| "step": 7424, |
| "throughput": 12374.371738841517 |
| }, |
| { |
| "epoch": 0.11686611964043606, |
| "grad_norm": 0.12107761949300766, |
| "learning_rate": 0.00028710258157963955, |
| "loss": 9.5179, |
| "step": 7456, |
| "throughput": 12375.376720026492 |
| }, |
| { |
| "epoch": 0.11736769096936496, |
| "grad_norm": 0.11835253238677979, |
| "learning_rate": 0.00028698538959270577, |
| "loss": 9.5159, |
| "step": 7488, |
| "throughput": 12376.636426577035 |
| }, |
| { |
| "epoch": 0.11786926229829388, |
| "grad_norm": 0.12197201699018478, |
| "learning_rate": 0.00028686769453766366, |
| "loss": 9.5129, |
| "step": 7520, |
| "throughput": 12377.680118744986 |
| }, |
| { |
| "epoch": 0.11837083362722278, |
| "grad_norm": 0.11214063316583633, |
| "learning_rate": 0.00028674949689988814, |
| "loss": 9.4886, |
| "step": 7552, |
| "throughput": 12378.917310693925 |
| }, |
| { |
| "epoch": 0.1188724049561517, |
| "grad_norm": 0.10614926367998123, |
| "learning_rate": 0.00028663079716682654, |
| "loss": 9.4835, |
| "step": 7584, |
| "throughput": 12378.823140998962 |
| }, |
| { |
| "epoch": 0.1193739762850806, |
| "grad_norm": 0.10913848131895065, |
| "learning_rate": 0.00028651159582799695, |
| "loss": 9.4918, |
| "step": 7616, |
| "throughput": 12378.437097872087 |
| }, |
| { |
| "epoch": 0.11987554761400951, |
| "grad_norm": 0.11134051531553268, |
| "learning_rate": 0.000286391893374986, |
| "loss": 9.4976, |
| "step": 7648, |
| "throughput": 12379.085729381744 |
| }, |
| { |
| "epoch": 0.12037711894293843, |
| "grad_norm": 0.10430318862199783, |
| "learning_rate": 0.0002862716903014469, |
| "loss": 9.4942, |
| "step": 7680, |
| "throughput": 12378.795603573722 |
| }, |
| { |
| "epoch": 0.12087869027186733, |
| "grad_norm": 0.10720957070589066, |
| "learning_rate": 0.0002861509871030977, |
| "loss": 9.479, |
| "step": 7712, |
| "throughput": 12379.607401723628 |
| }, |
| { |
| "epoch": 0.12138026160079625, |
| "grad_norm": 0.11613073199987411, |
| "learning_rate": 0.0002860297842777185, |
| "loss": 9.4616, |
| "step": 7744, |
| "throughput": 12380.6370253053 |
| }, |
| { |
| "epoch": 0.12188183292972515, |
| "grad_norm": 0.10742553323507309, |
| "learning_rate": 0.00028590808232515025, |
| "loss": 9.475, |
| "step": 7776, |
| "throughput": 12381.74917855792 |
| }, |
| { |
| "epoch": 0.12238340425865407, |
| "grad_norm": 0.10911645740270615, |
| "learning_rate": 0.00028578588174729214, |
| "loss": 9.4682, |
| "step": 7808, |
| "throughput": 12382.676613473282 |
| }, |
| { |
| "epoch": 0.12288497558758298, |
| "grad_norm": 0.11030824482440948, |
| "learning_rate": 0.0002856631830480997, |
| "loss": 9.472, |
| "step": 7840, |
| "throughput": 12383.85872889057 |
| }, |
| { |
| "epoch": 0.12338654691651188, |
| "grad_norm": 0.10081150382757187, |
| "learning_rate": 0.0002855399867335827, |
| "loss": 9.4654, |
| "step": 7872, |
| "throughput": 12384.80405612001 |
| }, |
| { |
| "epoch": 0.1238881182454408, |
| "grad_norm": 0.11015335470438004, |
| "learning_rate": 0.0002854162933118032, |
| "loss": 9.4729, |
| "step": 7904, |
| "throughput": 12384.971868652405 |
| }, |
| { |
| "epoch": 0.1243896895743697, |
| "grad_norm": 0.1104336678981781, |
| "learning_rate": 0.0002852921032928732, |
| "loss": 9.4595, |
| "step": 7936, |
| "throughput": 12384.722784807582 |
| }, |
| { |
| "epoch": 0.12489126090329862, |
| "grad_norm": 0.10051671415567398, |
| "learning_rate": 0.0002851674171889526, |
| "loss": 9.4595, |
| "step": 7968, |
| "throughput": 12384.931877529722 |
| }, |
| { |
| "epoch": 0.12539283223222752, |
| "grad_norm": 0.10377933830022812, |
| "learning_rate": 0.0002850422355142474, |
| "loss": 9.4561, |
| "step": 8000, |
| "throughput": 12384.742319617793 |
| }, |
| { |
| "epoch": 0.12589440356115644, |
| "grad_norm": 0.0999143123626709, |
| "learning_rate": 0.00028491655878500716, |
| "loss": 9.4604, |
| "step": 8032, |
| "throughput": 12385.513044172185 |
| }, |
| { |
| "epoch": 0.12639597489008533, |
| "grad_norm": 0.10762206465005875, |
| "learning_rate": 0.0002847903875195231, |
| "loss": 9.4473, |
| "step": 8064, |
| "throughput": 12386.24801555529 |
| }, |
| { |
| "epoch": 0.12689754621901425, |
| "grad_norm": 0.10370098799467087, |
| "learning_rate": 0.00028466372223812575, |
| "loss": 9.4184, |
| "step": 8096, |
| "throughput": 12387.234833254606 |
| }, |
| { |
| "epoch": 0.12739911754794317, |
| "grad_norm": 0.10739105939865112, |
| "learning_rate": 0.0002845365634631833, |
| "loss": 9.4831, |
| "step": 8128, |
| "throughput": 12388.129755041251 |
| }, |
| { |
| "epoch": 0.1279006888768721, |
| "grad_norm": 0.12630467116832733, |
| "learning_rate": 0.0002844089117190988, |
| "loss": 9.444, |
| "step": 8160, |
| "throughput": 12389.256771455293 |
| }, |
| { |
| "epoch": 0.12840226020580098, |
| "grad_norm": 0.10570238530635834, |
| "learning_rate": 0.0002842807675323085, |
| "loss": 9.4557, |
| "step": 8192, |
| "throughput": 12390.185039932749 |
| }, |
| { |
| "epoch": 0.1289038315347299, |
| "grad_norm": 0.0973723828792572, |
| "learning_rate": 0.00028415213143127935, |
| "loss": 9.4579, |
| "step": 8224, |
| "throughput": 12388.011068468239 |
| }, |
| { |
| "epoch": 0.1294054028636588, |
| "grad_norm": 0.09969617426395416, |
| "learning_rate": 0.00028402300394650697, |
| "loss": 9.4488, |
| "step": 8256, |
| "throughput": 12387.788565521383 |
| }, |
| { |
| "epoch": 0.1299069741925877, |
| "grad_norm": 0.106363944709301, |
| "learning_rate": 0.0002838933856105136, |
| "loss": 9.4312, |
| "step": 8288, |
| "throughput": 12388.006313569616 |
| }, |
| { |
| "epoch": 0.13040854552151662, |
| "grad_norm": 0.11680560559034348, |
| "learning_rate": 0.0002837632769578455, |
| "loss": 9.4467, |
| "step": 8320, |
| "throughput": 12388.584432786123 |
| }, |
| { |
| "epoch": 0.13091011685044554, |
| "grad_norm": 0.10573258250951767, |
| "learning_rate": 0.00028363267852507133, |
| "loss": 9.43, |
| "step": 8352, |
| "throughput": 12388.676328642412 |
| }, |
| { |
| "epoch": 0.13141168817937446, |
| "grad_norm": 0.10454431176185608, |
| "learning_rate": 0.0002835015908507793, |
| "loss": 9.4489, |
| "step": 8384, |
| "throughput": 12389.374522278958 |
| }, |
| { |
| "epoch": 0.13191325950830335, |
| "grad_norm": 0.1074761152267456, |
| "learning_rate": 0.0002833700144755753, |
| "loss": 9.4229, |
| "step": 8416, |
| "throughput": 12390.318722076217 |
| }, |
| { |
| "epoch": 0.13241483083723227, |
| "grad_norm": 0.10638105869293213, |
| "learning_rate": 0.0002832379499420808, |
| "loss": 9.4412, |
| "step": 8448, |
| "throughput": 12391.3988338955 |
| }, |
| { |
| "epoch": 0.13291640216616118, |
| "grad_norm": 0.11379750072956085, |
| "learning_rate": 0.0002831053977949303, |
| "loss": 9.4209, |
| "step": 8480, |
| "throughput": 12392.285440841459 |
| }, |
| { |
| "epoch": 0.13341797349509008, |
| "grad_norm": 0.09992935508489609, |
| "learning_rate": 0.00028297235858076923, |
| "loss": 9.4292, |
| "step": 8512, |
| "throughput": 12393.139650485358 |
| }, |
| { |
| "epoch": 0.133919544824019, |
| "grad_norm": 0.09835680574178696, |
| "learning_rate": 0.0002828388328482517, |
| "loss": 9.4154, |
| "step": 8544, |
| "throughput": 12393.640883769935 |
| }, |
| { |
| "epoch": 0.1344211161529479, |
| "grad_norm": 0.10513360053300858, |
| "learning_rate": 0.0002827048211480383, |
| "loss": 9.4142, |
| "step": 8576, |
| "throughput": 12393.44078783823 |
| }, |
| { |
| "epoch": 0.13492268748187683, |
| "grad_norm": 0.10509216040372849, |
| "learning_rate": 0.00028257032403279354, |
| "loss": 9.4211, |
| "step": 8608, |
| "throughput": 12393.843707408567 |
| }, |
| { |
| "epoch": 0.13542425881080572, |
| "grad_norm": 0.10336083173751831, |
| "learning_rate": 0.00028243534205718405, |
| "loss": 9.4149, |
| "step": 8640, |
| "throughput": 12394.43407922468 |
| }, |
| { |
| "epoch": 0.13592583013973464, |
| "grad_norm": 0.11077357828617096, |
| "learning_rate": 0.00028229987577787585, |
| "loss": 9.4099, |
| "step": 8672, |
| "throughput": 12394.714826627447 |
| }, |
| { |
| "epoch": 0.13642740146866356, |
| "grad_norm": 0.12169834226369858, |
| "learning_rate": 0.00028216392575353225, |
| "loss": 9.3903, |
| "step": 8704, |
| "throughput": 12395.355541862467 |
| }, |
| { |
| "epoch": 0.13692897279759245, |
| "grad_norm": 0.11460896581411362, |
| "learning_rate": 0.00028202749254481165, |
| "loss": 9.3963, |
| "step": 8736, |
| "throughput": 12396.24861068647 |
| }, |
| { |
| "epoch": 0.13743054412652136, |
| "grad_norm": 0.10884397476911545, |
| "learning_rate": 0.0002818905767143649, |
| "loss": 9.4084, |
| "step": 8768, |
| "throughput": 12397.254228749884 |
| }, |
| { |
| "epoch": 0.13793211545545028, |
| "grad_norm": 0.1012769564986229, |
| "learning_rate": 0.0002817531788268333, |
| "loss": 9.4047, |
| "step": 8800, |
| "throughput": 12398.007134221343 |
| }, |
| { |
| "epoch": 0.1384336867843792, |
| "grad_norm": 0.09802801162004471, |
| "learning_rate": 0.0002816152994488462, |
| "loss": 9.4027, |
| "step": 8832, |
| "throughput": 12398.821127462916 |
| }, |
| { |
| "epoch": 0.1389352581133081, |
| "grad_norm": 0.09760963916778564, |
| "learning_rate": 0.0002814769391490185, |
| "loss": 9.4263, |
| "step": 8864, |
| "throughput": 12399.59773979323 |
| }, |
| { |
| "epoch": 0.139436829442237, |
| "grad_norm": 0.10140734910964966, |
| "learning_rate": 0.0002813380984979486, |
| "loss": 9.3938, |
| "step": 8896, |
| "throughput": 12399.861320612416 |
| }, |
| { |
| "epoch": 0.13993840077116593, |
| "grad_norm": 0.10481631755828857, |
| "learning_rate": 0.00028119877806821557, |
| "loss": 9.3936, |
| "step": 8928, |
| "throughput": 12399.387348726712 |
| }, |
| { |
| "epoch": 0.14043997210009482, |
| "grad_norm": 0.09842522442340851, |
| "learning_rate": 0.00028105897843437746, |
| "loss": 9.4058, |
| "step": 8960, |
| "throughput": 12399.916140995505 |
| }, |
| { |
| "epoch": 0.14094154342902374, |
| "grad_norm": 0.10943567752838135, |
| "learning_rate": 0.0002809187001729683, |
| "loss": 9.4116, |
| "step": 8992, |
| "throughput": 12399.814656388015 |
| }, |
| { |
| "epoch": 0.14144311475795265, |
| "grad_norm": 0.10182485729455948, |
| "learning_rate": 0.00028077794386249604, |
| "loss": 9.3873, |
| "step": 9024, |
| "throughput": 12400.67111512373 |
| }, |
| { |
| "epoch": 0.14194468608688157, |
| "grad_norm": 0.11439745873212814, |
| "learning_rate": 0.0002806367100834401, |
| "loss": 9.3823, |
| "step": 9056, |
| "throughput": 12401.250114755268 |
| }, |
| { |
| "epoch": 0.14244625741581046, |
| "grad_norm": 0.10296665132045746, |
| "learning_rate": 0.00028049499941824906, |
| "loss": 9.3898, |
| "step": 9088, |
| "throughput": 12402.11882297528 |
| }, |
| { |
| "epoch": 0.14294782874473938, |
| "grad_norm": 0.10108328610658646, |
| "learning_rate": 0.0002803528124513382, |
| "loss": 9.3674, |
| "step": 9120, |
| "throughput": 12402.854530836656 |
| }, |
| { |
| "epoch": 0.1434494000736683, |
| "grad_norm": 0.10175160318613052, |
| "learning_rate": 0.00028021014976908676, |
| "loss": 9.3948, |
| "step": 9152, |
| "throughput": 12403.821189917853 |
| }, |
| { |
| "epoch": 0.1439509714025972, |
| "grad_norm": 0.09991069883108139, |
| "learning_rate": 0.0002800670119598363, |
| "loss": 9.3585, |
| "step": 9184, |
| "throughput": 12404.386932082074 |
| }, |
| { |
| "epoch": 0.1444525427315261, |
| "grad_norm": 0.10071180760860443, |
| "learning_rate": 0.0002799233996138874, |
| "loss": 9.3922, |
| "step": 9216, |
| "throughput": 12404.620936018224 |
| }, |
| { |
| "epoch": 0.14495411406045502, |
| "grad_norm": 0.09633713215589523, |
| "learning_rate": 0.00027977931332349786, |
| "loss": 9.3715, |
| "step": 9248, |
| "throughput": 12404.222577241006 |
| }, |
| { |
| "epoch": 0.14545568538938394, |
| "grad_norm": 0.09819021821022034, |
| "learning_rate": 0.00027963475368288006, |
| "loss": 9.3892, |
| "step": 9280, |
| "throughput": 12404.729388610622 |
| }, |
| { |
| "epoch": 0.14595725671831283, |
| "grad_norm": 0.10015096515417099, |
| "learning_rate": 0.00027948972128819823, |
| "loss": 9.367, |
| "step": 9312, |
| "throughput": 12404.946783085732 |
| }, |
| { |
| "epoch": 0.14645882804724175, |
| "grad_norm": 0.0994173064827919, |
| "learning_rate": 0.0002793442167375665, |
| "loss": 9.3612, |
| "step": 9344, |
| "throughput": 12405.370142775204 |
| }, |
| { |
| "epoch": 0.14696039937617067, |
| "grad_norm": 0.10099942237138748, |
| "learning_rate": 0.0002791982406310461, |
| "loss": 9.3731, |
| "step": 9376, |
| "throughput": 12405.942822251614 |
| }, |
| { |
| "epoch": 0.14746197070509956, |
| "grad_norm": 0.10506460070610046, |
| "learning_rate": 0.0002790517935706428, |
| "loss": 9.3701, |
| "step": 9408, |
| "throughput": 12406.739091407724 |
| }, |
| { |
| "epoch": 0.14796354203402848, |
| "grad_norm": 0.10151456296443939, |
| "learning_rate": 0.00027890487616030475, |
| "loss": 9.3723, |
| "step": 9440, |
| "throughput": 12407.6516964536 |
| }, |
| { |
| "epoch": 0.1484651133629574, |
| "grad_norm": 0.10469347983598709, |
| "learning_rate": 0.0002787574890059199, |
| "loss": 9.3405, |
| "step": 9472, |
| "throughput": 12408.330275954717 |
| }, |
| { |
| "epoch": 0.1489666846918863, |
| "grad_norm": 0.09777528792619705, |
| "learning_rate": 0.0002786096327153131, |
| "loss": 9.3857, |
| "step": 9504, |
| "throughput": 12409.100735501179 |
| }, |
| { |
| "epoch": 0.1494682560208152, |
| "grad_norm": 0.10799805074930191, |
| "learning_rate": 0.00027846130789824437, |
| "loss": 9.3455, |
| "step": 9536, |
| "throughput": 12409.336452258403 |
| }, |
| { |
| "epoch": 0.14996982734974412, |
| "grad_norm": 0.10517607629299164, |
| "learning_rate": 0.00027831251516640553, |
| "loss": 9.3661, |
| "step": 9568, |
| "throughput": 12409.25003662909 |
| }, |
| { |
| "epoch": 0.15047139867867304, |
| "grad_norm": 0.10305366665124893, |
| "learning_rate": 0.00027816325513341835, |
| "loss": 9.3567, |
| "step": 9600, |
| "throughput": 12409.543587883387 |
| }, |
| { |
| "epoch": 0.15097297000760193, |
| "grad_norm": 0.1012721136212349, |
| "learning_rate": 0.0002780135284148315, |
| "loss": 9.3639, |
| "step": 9632, |
| "throughput": 12409.81051181453 |
| }, |
| { |
| "epoch": 0.15147454133653085, |
| "grad_norm": 0.10102220624685287, |
| "learning_rate": 0.00027786333562811855, |
| "loss": 9.3481, |
| "step": 9664, |
| "throughput": 12409.927072305443 |
| }, |
| { |
| "epoch": 0.15197611266545977, |
| "grad_norm": 0.10052553564310074, |
| "learning_rate": 0.00027771267739267494, |
| "loss": 9.3439, |
| "step": 9696, |
| "throughput": 12410.603156199979 |
| }, |
| { |
| "epoch": 0.15247768399438866, |
| "grad_norm": 0.09986281394958496, |
| "learning_rate": 0.0002775615543298157, |
| "loss": 9.3327, |
| "step": 9728, |
| "throughput": 12411.311376912594 |
| }, |
| { |
| "epoch": 0.15297925532331758, |
| "grad_norm": 0.09924038499593735, |
| "learning_rate": 0.0002774099670627728, |
| "loss": 9.3511, |
| "step": 9760, |
| "throughput": 12412.067980367869 |
| }, |
| { |
| "epoch": 0.1534808266522465, |
| "grad_norm": 0.1034877747297287, |
| "learning_rate": 0.00027725791621669257, |
| "loss": 9.3601, |
| "step": 9792, |
| "throughput": 12412.759386683938 |
| }, |
| { |
| "epoch": 0.1539823979811754, |
| "grad_norm": 0.09982676804065704, |
| "learning_rate": 0.0002771054024186331, |
| "loss": 9.3615, |
| "step": 9824, |
| "throughput": 12413.450320974767 |
| }, |
| { |
| "epoch": 0.1544839693101043, |
| "grad_norm": 0.09577617049217224, |
| "learning_rate": 0.0002769524262975618, |
| "loss": 9.3295, |
| "step": 9856, |
| "throughput": 12413.697121302801 |
| }, |
| { |
| "epoch": 0.15498554063903322, |
| "grad_norm": 0.10734862834215164, |
| "learning_rate": 0.0002767989884843527, |
| "loss": 9.3307, |
| "step": 9888, |
| "throughput": 12413.508577803374 |
| }, |
| { |
| "epoch": 0.15548711196796214, |
| "grad_norm": 0.09929708391427994, |
| "learning_rate": 0.0002766450896117837, |
| "loss": 9.3416, |
| "step": 9920, |
| "throughput": 12413.928230848545 |
| }, |
| { |
| "epoch": 0.15598868329689103, |
| "grad_norm": 0.10370606929063797, |
| "learning_rate": 0.0002764907303145342, |
| "loss": 9.3548, |
| "step": 9952, |
| "throughput": 12414.38389159902 |
| }, |
| { |
| "epoch": 0.15649025462581995, |
| "grad_norm": 0.098115473985672, |
| "learning_rate": 0.00027633591122918244, |
| "loss": 9.3249, |
| "step": 9984, |
| "throughput": 12414.269640854896 |
| }, |
| { |
| "epoch": 0.15699182595474886, |
| "grad_norm": 0.11088874191045761, |
| "learning_rate": 0.0002761806329942028, |
| "loss": 9.3433, |
| "step": 10016, |
| "throughput": 12415.007216940608 |
| }, |
| { |
| "epoch": 0.15749339728367778, |
| "grad_norm": 0.09496035426855087, |
| "learning_rate": 0.0002760248962499632, |
| "loss": 9.3195, |
| "step": 10048, |
| "throughput": 12415.475000469298 |
| }, |
| { |
| "epoch": 0.15799496861260667, |
| "grad_norm": 0.1069183349609375, |
| "learning_rate": 0.0002758687016387223, |
| "loss": 9.3469, |
| "step": 10080, |
| "throughput": 12416.20782001634 |
| }, |
| { |
| "epoch": 0.1584965399415356, |
| "grad_norm": 0.10577341914176941, |
| "learning_rate": 0.0002757120498046273, |
| "loss": 9.3427, |
| "step": 10112, |
| "throughput": 12416.860885508157 |
| }, |
| { |
| "epoch": 0.1589981112704645, |
| "grad_norm": 0.11294721812009811, |
| "learning_rate": 0.00027555494139371077, |
| "loss": 9.3316, |
| "step": 10144, |
| "throughput": 12417.545013450359 |
| }, |
| { |
| "epoch": 0.1594996825993934, |
| "grad_norm": 0.10189155489206314, |
| "learning_rate": 0.0002753973770538882, |
| "loss": 9.3031, |
| "step": 10176, |
| "throughput": 12417.850483599095 |
| }, |
| { |
| "epoch": 0.16000125392832232, |
| "grad_norm": 0.10261227190494537, |
| "learning_rate": 0.00027523935743495553, |
| "loss": 9.2924, |
| "step": 10208, |
| "throughput": 12418.2050685847 |
| }, |
| { |
| "epoch": 0.16050282525725124, |
| "grad_norm": 0.11461346596479416, |
| "learning_rate": 0.00027508088318858604, |
| "loss": 9.3316, |
| "step": 10240, |
| "throughput": 12418.174694582778 |
| }, |
| { |
| "epoch": 0.16100439658618015, |
| "grad_norm": 0.10292674601078033, |
| "learning_rate": 0.000274921954968328, |
| "loss": 9.3192, |
| "step": 10272, |
| "throughput": 12416.573063562828 |
| }, |
| { |
| "epoch": 0.16150596791510904, |
| "grad_norm": 0.09348347783088684, |
| "learning_rate": 0.0002747625734296019, |
| "loss": 9.3187, |
| "step": 10304, |
| "throughput": 12416.975740478432 |
| }, |
| { |
| "epoch": 0.16200753924403796, |
| "grad_norm": 0.09840043634176254, |
| "learning_rate": 0.00027460273922969757, |
| "loss": 9.3455, |
| "step": 10336, |
| "throughput": 12417.054217394312 |
| }, |
| { |
| "epoch": 0.16250911057296688, |
| "grad_norm": 0.10484471917152405, |
| "learning_rate": 0.0002744424530277719, |
| "loss": 9.314, |
| "step": 10368, |
| "throughput": 12417.481807473956 |
| }, |
| { |
| "epoch": 0.16301068190189577, |
| "grad_norm": 0.09991072863340378, |
| "learning_rate": 0.0002742817154848455, |
| "loss": 9.3004, |
| "step": 10400, |
| "throughput": 12418.165294747014 |
| }, |
| { |
| "epoch": 0.1635122532308247, |
| "grad_norm": 0.10125566273927689, |
| "learning_rate": 0.00027412052726380053, |
| "loss": 9.3233, |
| "step": 10432, |
| "throughput": 12418.964350230903 |
| }, |
| { |
| "epoch": 0.1640138245597536, |
| "grad_norm": 0.09845706075429916, |
| "learning_rate": 0.00027395888902937777, |
| "loss": 9.3054, |
| "step": 10464, |
| "throughput": 12419.410394767716 |
| }, |
| { |
| "epoch": 0.16451539588868253, |
| "grad_norm": 0.09788595885038376, |
| "learning_rate": 0.0002737968014481737, |
| "loss": 9.3053, |
| "step": 10496, |
| "throughput": 12420.121766830405 |
| }, |
| { |
| "epoch": 0.16501696721761142, |
| "grad_norm": 0.10306710004806519, |
| "learning_rate": 0.000273634265188638, |
| "loss": 9.2992, |
| "step": 10528, |
| "throughput": 12420.205403133641 |
| }, |
| { |
| "epoch": 0.16551853854654033, |
| "grad_norm": 0.09703079611063004, |
| "learning_rate": 0.0002734712809210706, |
| "loss": 9.305, |
| "step": 10560, |
| "throughput": 12419.9598115143 |
| }, |
| { |
| "epoch": 0.16602010987546925, |
| "grad_norm": 0.09484507143497467, |
| "learning_rate": 0.00027330784931761925, |
| "loss": 9.2816, |
| "step": 10592, |
| "throughput": 12420.453727918748 |
| }, |
| { |
| "epoch": 0.16652168120439814, |
| "grad_norm": 0.10056675970554352, |
| "learning_rate": 0.0002731439710522763, |
| "loss": 9.2778, |
| "step": 10624, |
| "throughput": 12420.888924283547 |
| }, |
| { |
| "epoch": 0.16702325253332706, |
| "grad_norm": 0.11179500818252563, |
| "learning_rate": 0.00027297964680087617, |
| "loss": 9.2987, |
| "step": 10656, |
| "throughput": 12420.94204471036 |
| }, |
| { |
| "epoch": 0.16752482386225598, |
| "grad_norm": 0.0983198955655098, |
| "learning_rate": 0.0002728148772410926, |
| "loss": 9.306, |
| "step": 10688, |
| "throughput": 12421.60389960054 |
| }, |
| { |
| "epoch": 0.1680263951911849, |
| "grad_norm": 0.09311419725418091, |
| "learning_rate": 0.0002726496630524358, |
| "loss": 9.3227, |
| "step": 10720, |
| "throughput": 12422.161743529983 |
| }, |
| { |
| "epoch": 0.1685279665201138, |
| "grad_norm": 0.09853453934192657, |
| "learning_rate": 0.00027248400491624946, |
| "loss": 9.2681, |
| "step": 10752, |
| "throughput": 12422.810732317097 |
| }, |
| { |
| "epoch": 0.1690295378490427, |
| "grad_norm": 0.0933060273528099, |
| "learning_rate": 0.00027231790351570827, |
| "loss": 9.2915, |
| "step": 10784, |
| "throughput": 12423.395082393106 |
| }, |
| { |
| "epoch": 0.16953110917797162, |
| "grad_norm": 0.10200263559818268, |
| "learning_rate": 0.00027215135953581485, |
| "loss": 9.2958, |
| "step": 10816, |
| "throughput": 12423.998726700142 |
| }, |
| { |
| "epoch": 0.1700326805069005, |
| "grad_norm": 0.10002099722623825, |
| "learning_rate": 0.00027198437366339717, |
| "loss": 9.2777, |
| "step": 10848, |
| "throughput": 12424.287990361512 |
| }, |
| { |
| "epoch": 0.17053425183582943, |
| "grad_norm": 0.0953054279088974, |
| "learning_rate": 0.00027181694658710544, |
| "loss": 9.2702, |
| "step": 10880, |
| "throughput": 12423.905610047961 |
| }, |
| { |
| "epoch": 0.17103582316475835, |
| "grad_norm": 0.10247793793678284, |
| "learning_rate": 0.00027164907899740936, |
| "loss": 9.2791, |
| "step": 10912, |
| "throughput": 12424.41885749256 |
| }, |
| { |
| "epoch": 0.17153739449368727, |
| "grad_norm": 0.10717090964317322, |
| "learning_rate": 0.0002714807715865954, |
| "loss": 9.2969, |
| "step": 10944, |
| "throughput": 12424.8069628263 |
| }, |
| { |
| "epoch": 0.17203896582261616, |
| "grad_norm": 0.09374012798070908, |
| "learning_rate": 0.0002713120250487638, |
| "loss": 9.2734, |
| "step": 10976, |
| "throughput": 12424.6868270757 |
| }, |
| { |
| "epoch": 0.17254053715154508, |
| "grad_norm": 0.10755176097154617, |
| "learning_rate": 0.0002711428400798258, |
| "loss": 9.2656, |
| "step": 11008, |
| "throughput": 12425.323752751452 |
| }, |
| { |
| "epoch": 0.173042108480474, |
| "grad_norm": 0.10529007017612457, |
| "learning_rate": 0.00027097321737750075, |
| "loss": 9.2736, |
| "step": 11040, |
| "throughput": 12425.749055984017 |
| }, |
| { |
| "epoch": 0.17354367980940288, |
| "grad_norm": 0.09935403615236282, |
| "learning_rate": 0.00027080315764131316, |
| "loss": 9.2595, |
| "step": 11072, |
| "throughput": 12426.397228197524 |
| }, |
| { |
| "epoch": 0.1740452511383318, |
| "grad_norm": 0.09364161640405655, |
| "learning_rate": 0.0002706326615725898, |
| "loss": 9.2718, |
| "step": 11104, |
| "throughput": 12427.134187369495 |
| }, |
| { |
| "epoch": 0.17454682246726072, |
| "grad_norm": 0.10694800317287445, |
| "learning_rate": 0.0002704617298744571, |
| "loss": 9.2548, |
| "step": 11136, |
| "throughput": 12427.526446402135 |
| }, |
| { |
| "epoch": 0.17504839379618964, |
| "grad_norm": 0.09899157285690308, |
| "learning_rate": 0.00027029036325183775, |
| "loss": 9.2609, |
| "step": 11168, |
| "throughput": 12427.766299430596 |
| }, |
| { |
| "epoch": 0.17554996512511853, |
| "grad_norm": 0.09556271135807037, |
| "learning_rate": 0.0002701185624114483, |
| "loss": 9.2855, |
| "step": 11200, |
| "throughput": 12427.589754597111 |
| }, |
| { |
| "epoch": 0.17605153645404745, |
| "grad_norm": 0.09693208336830139, |
| "learning_rate": 0.0002699463280617959, |
| "loss": 9.2851, |
| "step": 11232, |
| "throughput": 12427.899615353563 |
| }, |
| { |
| "epoch": 0.17655310778297637, |
| "grad_norm": 0.10468527674674988, |
| "learning_rate": 0.00026977366091317554, |
| "loss": 9.251, |
| "step": 11264, |
| "throughput": 12428.405877491856 |
| }, |
| { |
| "epoch": 0.17705467911190526, |
| "grad_norm": 0.09517171233892441, |
| "learning_rate": 0.00026960056167766704, |
| "loss": 9.261, |
| "step": 11296, |
| "throughput": 12428.646470904001 |
| }, |
| { |
| "epoch": 0.17755625044083417, |
| "grad_norm": 0.10003001242876053, |
| "learning_rate": 0.0002694270310691321, |
| "loss": 9.2499, |
| "step": 11328, |
| "throughput": 12428.8394563157 |
| }, |
| { |
| "epoch": 0.1780578217697631, |
| "grad_norm": 0.09825020283460617, |
| "learning_rate": 0.0002692530698032116, |
| "loss": 9.2489, |
| "step": 11360, |
| "throughput": 12429.263224231341 |
| }, |
| { |
| "epoch": 0.178559393098692, |
| "grad_norm": 0.09543386101722717, |
| "learning_rate": 0.00026907867859732223, |
| "loss": 9.2502, |
| "step": 11392, |
| "throughput": 12429.864680876386 |
| }, |
| { |
| "epoch": 0.1790609644276209, |
| "grad_norm": 0.08976162225008011, |
| "learning_rate": 0.0002689038581706538, |
| "loss": 9.2548, |
| "step": 11424, |
| "throughput": 12430.568483071242 |
| }, |
| { |
| "epoch": 0.17956253575654982, |
| "grad_norm": 0.09635276347398758, |
| "learning_rate": 0.0002687286092441664, |
| "loss": 9.2324, |
| "step": 11456, |
| "throughput": 12430.97783219846 |
| }, |
| { |
| "epoch": 0.18006410708547874, |
| "grad_norm": 0.09948880970478058, |
| "learning_rate": 0.00026855293254058693, |
| "loss": 9.247, |
| "step": 11488, |
| "throughput": 12431.419926216979 |
| }, |
| { |
| "epoch": 0.18056567841440763, |
| "grad_norm": 0.09892695397138596, |
| "learning_rate": 0.0002683768287844068, |
| "loss": 9.2428, |
| "step": 11520, |
| "throughput": 12431.406197621318 |
| }, |
| { |
| "epoch": 0.18106724974333654, |
| "grad_norm": 0.08800537884235382, |
| "learning_rate": 0.0002682002987018783, |
| "loss": 9.2631, |
| "step": 11552, |
| "throughput": 12431.331997975556 |
| }, |
| { |
| "epoch": 0.18156882107226546, |
| "grad_norm": 0.09760425984859467, |
| "learning_rate": 0.00026802334302101214, |
| "loss": 9.2529, |
| "step": 11584, |
| "throughput": 12431.785048037085 |
| }, |
| { |
| "epoch": 0.18207039240119435, |
| "grad_norm": 0.09639564901590347, |
| "learning_rate": 0.000267845962471574, |
| "loss": 9.2467, |
| "step": 11616, |
| "throughput": 12432.096365632751 |
| }, |
| { |
| "epoch": 0.18257196373012327, |
| "grad_norm": 0.09691153466701508, |
| "learning_rate": 0.0002676681577850818, |
| "loss": 9.2362, |
| "step": 11648, |
| "throughput": 12432.214956023765 |
| }, |
| { |
| "epoch": 0.1830735350590522, |
| "grad_norm": 0.10566407442092896, |
| "learning_rate": 0.0002674899296948026, |
| "loss": 9.2356, |
| "step": 11680, |
| "throughput": 12432.750941160994 |
| }, |
| { |
| "epoch": 0.1835751063879811, |
| "grad_norm": 0.10178868472576141, |
| "learning_rate": 0.00026731127893574955, |
| "loss": 9.2464, |
| "step": 11712, |
| "throughput": 12433.210388907002 |
| }, |
| { |
| "epoch": 0.18407667771691, |
| "grad_norm": 0.09865662455558777, |
| "learning_rate": 0.00026713220624467894, |
| "loss": 9.258, |
| "step": 11744, |
| "throughput": 12433.810090653798 |
| }, |
| { |
| "epoch": 0.18457824904583892, |
| "grad_norm": 0.09568797051906586, |
| "learning_rate": 0.00026695271236008703, |
| "loss": 9.2356, |
| "step": 11776, |
| "throughput": 12434.359978227534 |
| }, |
| { |
| "epoch": 0.18507982037476783, |
| "grad_norm": 0.10666729509830475, |
| "learning_rate": 0.00026677279802220726, |
| "loss": 9.2451, |
| "step": 11808, |
| "throughput": 12434.619542280407 |
| }, |
| { |
| "epoch": 0.18558139170369672, |
| "grad_norm": 0.1080276370048523, |
| "learning_rate": 0.00026659246397300673, |
| "loss": 9.2341, |
| "step": 11840, |
| "throughput": 12434.90427034286 |
| }, |
| { |
| "epoch": 0.18608296303262564, |
| "grad_norm": 0.08994440734386444, |
| "learning_rate": 0.00026641171095618366, |
| "loss": 9.2243, |
| "step": 11872, |
| "throughput": 12434.770224584045 |
| }, |
| { |
| "epoch": 0.18658453436155456, |
| "grad_norm": 0.0883726105093956, |
| "learning_rate": 0.0002662305397171641, |
| "loss": 9.2367, |
| "step": 11904, |
| "throughput": 12435.450539027144 |
| }, |
| { |
| "epoch": 0.18708610569048348, |
| "grad_norm": 0.10415124893188477, |
| "learning_rate": 0.0002660489510030986, |
| "loss": 9.2432, |
| "step": 11936, |
| "throughput": 12435.823666373835 |
| }, |
| { |
| "epoch": 0.18758767701941237, |
| "grad_norm": 0.08748330175876617, |
| "learning_rate": 0.00026586694556285975, |
| "loss": 9.2405, |
| "step": 11968, |
| "throughput": 12435.880263397072 |
| }, |
| { |
| "epoch": 0.1880892483483413, |
| "grad_norm": 0.09719226509332657, |
| "learning_rate": 0.0002656845241470384, |
| "loss": 9.2218, |
| "step": 12000, |
| "throughput": 12436.268853279063 |
| }, |
| { |
| "epoch": 0.1885908196772702, |
| "grad_norm": 0.09952506422996521, |
| "learning_rate": 0.0002655016875079411, |
| "loss": 9.2282, |
| "step": 12032, |
| "throughput": 12436.61441153604 |
| }, |
| { |
| "epoch": 0.1890923910061991, |
| "grad_norm": 0.09944985061883926, |
| "learning_rate": 0.00026531843639958656, |
| "loss": 9.2178, |
| "step": 12064, |
| "throughput": 12437.222547862855 |
| }, |
| { |
| "epoch": 0.189593962335128, |
| "grad_norm": 0.09388846158981323, |
| "learning_rate": 0.00026513477157770303, |
| "loss": 9.2241, |
| "step": 12096, |
| "throughput": 12437.750289101668 |
| }, |
| { |
| "epoch": 0.19009553366405693, |
| "grad_norm": 0.09762909263372421, |
| "learning_rate": 0.0002649506937997248, |
| "loss": 9.2254, |
| "step": 12128, |
| "throughput": 12438.002763734487 |
| }, |
| { |
| "epoch": 0.19059710499298585, |
| "grad_norm": 0.09256457537412643, |
| "learning_rate": 0.00026476620382478896, |
| "loss": 9.2364, |
| "step": 12160, |
| "throughput": 12438.39101625529 |
| }, |
| { |
| "epoch": 0.19109867632191474, |
| "grad_norm": 0.09618276357650757, |
| "learning_rate": 0.0002645813024137329, |
| "loss": 9.2358, |
| "step": 12192, |
| "throughput": 12438.071607881528 |
| }, |
| { |
| "epoch": 0.19160024765084366, |
| "grad_norm": 0.09417402744293213, |
| "learning_rate": 0.00026439599032909055, |
| "loss": 9.235, |
| "step": 12224, |
| "throughput": 12438.644950872664 |
| }, |
| { |
| "epoch": 0.19210181897977258, |
| "grad_norm": 0.09499766677618027, |
| "learning_rate": 0.0002642102683350894, |
| "loss": 9.2321, |
| "step": 12256, |
| "throughput": 12439.038275554216 |
| }, |
| { |
| "epoch": 0.19260339030870147, |
| "grad_norm": 0.1137000098824501, |
| "learning_rate": 0.00026402413719764774, |
| "loss": 9.2159, |
| "step": 12288, |
| "throughput": 12439.351334000727 |
| }, |
| { |
| "epoch": 0.19310496163763038, |
| "grad_norm": 0.11453639715909958, |
| "learning_rate": 0.0002638375976843707, |
| "loss": 9.2178, |
| "step": 12320, |
| "throughput": 12437.731017940176 |
| }, |
| { |
| "epoch": 0.1936065329665593, |
| "grad_norm": 0.09753817319869995, |
| "learning_rate": 0.0002636506505645478, |
| "loss": 9.2044, |
| "step": 12352, |
| "throughput": 12438.039993313554 |
| }, |
| { |
| "epoch": 0.19410810429548822, |
| "grad_norm": 0.09776368737220764, |
| "learning_rate": 0.00026346329660914964, |
| "loss": 9.2191, |
| "step": 12384, |
| "throughput": 12438.682273817161 |
| }, |
| { |
| "epoch": 0.1946096756244171, |
| "grad_norm": 0.08489309996366501, |
| "learning_rate": 0.00026327553659082444, |
| "loss": 9.2244, |
| "step": 12416, |
| "throughput": 12439.226287196929 |
| }, |
| { |
| "epoch": 0.19511124695334603, |
| "grad_norm": 0.08857341855764389, |
| "learning_rate": 0.00026308737128389513, |
| "loss": 9.196, |
| "step": 12448, |
| "throughput": 12439.724146748655 |
| }, |
| { |
| "epoch": 0.19561281828227495, |
| "grad_norm": 0.08907376229763031, |
| "learning_rate": 0.0002628988014643558, |
| "loss": 9.2243, |
| "step": 12480, |
| "throughput": 12439.667975570137 |
| }, |
| { |
| "epoch": 0.19611438961120384, |
| "grad_norm": 0.09848618507385254, |
| "learning_rate": 0.00026270982790986916, |
| "loss": 9.2228, |
| "step": 12512, |
| "throughput": 12439.53419868086 |
| }, |
| { |
| "epoch": 0.19661596094013276, |
| "grad_norm": 0.09671392291784286, |
| "learning_rate": 0.00026252045139976254, |
| "loss": 9.2039, |
| "step": 12544, |
| "throughput": 12439.947746294905 |
| }, |
| { |
| "epoch": 0.19711753226906167, |
| "grad_norm": 0.10359849035739899, |
| "learning_rate": 0.00026233067271502536, |
| "loss": 9.1828, |
| "step": 12576, |
| "throughput": 12440.319779950294 |
| }, |
| { |
| "epoch": 0.1976191035979906, |
| "grad_norm": 0.09061454981565475, |
| "learning_rate": 0.0002621404926383054, |
| "loss": 9.2126, |
| "step": 12608, |
| "throughput": 12440.6969953609 |
| }, |
| { |
| "epoch": 0.19812067492691948, |
| "grad_norm": 0.09022842347621918, |
| "learning_rate": 0.0002619499119539059, |
| "loss": 9.1902, |
| "step": 12640, |
| "throughput": 12440.700680783257 |
| }, |
| { |
| "epoch": 0.1986222462558484, |
| "grad_norm": 0.0907374769449234, |
| "learning_rate": 0.0002617589314477821, |
| "loss": 9.1781, |
| "step": 12672, |
| "throughput": 12441.124311331203 |
| }, |
| { |
| "epoch": 0.19912381758477732, |
| "grad_norm": 0.09435463696718216, |
| "learning_rate": 0.0002615675519075383, |
| "loss": 9.1994, |
| "step": 12704, |
| "throughput": 12441.538612785944 |
| }, |
| { |
| "epoch": 0.1996253889137062, |
| "grad_norm": 0.09547092020511627, |
| "learning_rate": 0.00026137577412242415, |
| "loss": 9.1838, |
| "step": 12736, |
| "throughput": 12442.053947258997 |
| }, |
| { |
| "epoch": 0.20012696024263513, |
| "grad_norm": 0.09545017033815384, |
| "learning_rate": 0.00026118359888333193, |
| "loss": 9.1638, |
| "step": 12768, |
| "throughput": 12442.52808605685 |
| }, |
| { |
| "epoch": 0.20062853157156404, |
| "grad_norm": 0.10029665380716324, |
| "learning_rate": 0.00026099102698279276, |
| "loss": 9.1894, |
| "step": 12800, |
| "throughput": 12442.81989626998 |
| }, |
| { |
| "epoch": 0.20113010290049296, |
| "grad_norm": 0.0926876813173294, |
| "learning_rate": 0.0002607980592149739, |
| "loss": 9.1888, |
| "step": 12832, |
| "throughput": 12442.680258798211 |
| }, |
| { |
| "epoch": 0.20163167422942185, |
| "grad_norm": 0.08556065708398819, |
| "learning_rate": 0.00026060469637567484, |
| "loss": 9.1988, |
| "step": 12864, |
| "throughput": 12442.735682073666 |
| }, |
| { |
| "epoch": 0.20213324555835077, |
| "grad_norm": 0.10560698062181473, |
| "learning_rate": 0.0002604109392623246, |
| "loss": 9.2117, |
| "step": 12896, |
| "throughput": 12443.310352249038 |
| }, |
| { |
| "epoch": 0.2026348168872797, |
| "grad_norm": 0.09636469930410385, |
| "learning_rate": 0.00026021678867397803, |
| "loss": 9.1932, |
| "step": 12928, |
| "throughput": 12443.6837281359 |
| }, |
| { |
| "epoch": 0.20313638821620858, |
| "grad_norm": 0.0874796211719513, |
| "learning_rate": 0.00026002224541131274, |
| "loss": 9.1755, |
| "step": 12960, |
| "throughput": 12443.673899619098 |
| }, |
| { |
| "epoch": 0.2036379595451375, |
| "grad_norm": 0.09777245670557022, |
| "learning_rate": 0.00025982731027662575, |
| "loss": 9.1858, |
| "step": 12992, |
| "throughput": 12444.038423715197 |
| }, |
| { |
| "epoch": 0.20413953087406642, |
| "grad_norm": 0.08904954791069031, |
| "learning_rate": 0.00025963198407383015, |
| "loss": 9.1974, |
| "step": 13024, |
| "throughput": 12444.35839014486 |
| }, |
| { |
| "epoch": 0.20464110220299533, |
| "grad_norm": 0.09526413679122925, |
| "learning_rate": 0.0002594362676084517, |
| "loss": 9.1799, |
| "step": 13056, |
| "throughput": 12444.857251219728 |
| }, |
| { |
| "epoch": 0.20514267353192422, |
| "grad_norm": 0.08287783712148666, |
| "learning_rate": 0.0002592401616876258, |
| "loss": 9.183, |
| "step": 13088, |
| "throughput": 12445.341489344219 |
| }, |
| { |
| "epoch": 0.20564424486085314, |
| "grad_norm": 0.09923765808343887, |
| "learning_rate": 0.00025904366712009374, |
| "loss": 9.1939, |
| "step": 13120, |
| "throughput": 12445.612606772658 |
| }, |
| { |
| "epoch": 0.20614581618978206, |
| "grad_norm": 0.08840049058198929, |
| "learning_rate": 0.00025884678471619976, |
| "loss": 9.1789, |
| "step": 13152, |
| "throughput": 12445.759194472117 |
| }, |
| { |
| "epoch": 0.20664738751871095, |
| "grad_norm": 0.08951904624700546, |
| "learning_rate": 0.0002586495152878874, |
| "loss": 9.172, |
| "step": 13184, |
| "throughput": 12445.513086078448 |
| }, |
| { |
| "epoch": 0.20714895884763987, |
| "grad_norm": 0.09838932752609253, |
| "learning_rate": 0.0002584518596486965, |
| "loss": 9.1754, |
| "step": 13216, |
| "throughput": 12446.079319557672 |
| }, |
| { |
| "epoch": 0.2076505301765688, |
| "grad_norm": 0.09289150685071945, |
| "learning_rate": 0.00025825381861375936, |
| "loss": 9.1846, |
| "step": 13248, |
| "throughput": 12446.36823530712 |
| }, |
| { |
| "epoch": 0.2081521015054977, |
| "grad_norm": 0.09429288655519485, |
| "learning_rate": 0.00025805539299979794, |
| "loss": 9.1853, |
| "step": 13280, |
| "throughput": 12446.710311829484 |
| }, |
| { |
| "epoch": 0.2086536728344266, |
| "grad_norm": 0.09022123366594315, |
| "learning_rate": 0.0002578565836251199, |
| "loss": 9.176, |
| "step": 13312, |
| "throughput": 12446.692568537916 |
| }, |
| { |
| "epoch": 0.2091552441633555, |
| "grad_norm": 0.09260479360818863, |
| "learning_rate": 0.0002576573913096158, |
| "loss": 9.18, |
| "step": 13344, |
| "throughput": 12447.103754047945 |
| }, |
| { |
| "epoch": 0.20965681549228443, |
| "grad_norm": 0.09113609790802002, |
| "learning_rate": 0.00025745781687475534, |
| "loss": 9.1733, |
| "step": 13376, |
| "throughput": 12447.567055655258 |
| }, |
| { |
| "epoch": 0.21015838682121332, |
| "grad_norm": 0.10043779760599136, |
| "learning_rate": 0.000257257861143584, |
| "loss": 9.1684, |
| "step": 13408, |
| "throughput": 12447.914304533839 |
| }, |
| { |
| "epoch": 0.21065995815014224, |
| "grad_norm": 0.09665901213884354, |
| "learning_rate": 0.00025705752494071995, |
| "loss": 9.1682, |
| "step": 13440, |
| "throughput": 12448.32566896259 |
| }, |
| { |
| "epoch": 0.21116152947907116, |
| "grad_norm": 0.09273158758878708, |
| "learning_rate": 0.0002568568090923501, |
| "loss": 9.1623, |
| "step": 13472, |
| "throughput": 12448.45948722694 |
| }, |
| { |
| "epoch": 0.21166310080800008, |
| "grad_norm": 0.08880336582660675, |
| "learning_rate": 0.0002566557144262273, |
| "loss": 9.1719, |
| "step": 13504, |
| "throughput": 12448.16848694662 |
| }, |
| { |
| "epoch": 0.21216467213692897, |
| "grad_norm": 0.106996551156044, |
| "learning_rate": 0.00025645424177166663, |
| "loss": 9.1783, |
| "step": 13536, |
| "throughput": 12448.623930365138 |
| }, |
| { |
| "epoch": 0.21266624346585788, |
| "grad_norm": 0.08988650143146515, |
| "learning_rate": 0.0002562523919595418, |
| "loss": 9.1667, |
| "step": 13568, |
| "throughput": 12448.980666446922 |
| }, |
| { |
| "epoch": 0.2131678147947868, |
| "grad_norm": 0.08989129215478897, |
| "learning_rate": 0.0002560501658222821, |
| "loss": 9.1502, |
| "step": 13600, |
| "throughput": 12449.314149661397 |
| }, |
| { |
| "epoch": 0.2136693861237157, |
| "grad_norm": 0.09585653990507126, |
| "learning_rate": 0.0002558475641938686, |
| "loss": 9.1551, |
| "step": 13632, |
| "throughput": 12449.311532938784 |
| }, |
| { |
| "epoch": 0.2141709574526446, |
| "grad_norm": 0.09390981495380402, |
| "learning_rate": 0.00025564458790983114, |
| "loss": 9.1736, |
| "step": 13664, |
| "throughput": 12449.744463766221 |
| }, |
| { |
| "epoch": 0.21467252878157353, |
| "grad_norm": 0.09790906310081482, |
| "learning_rate": 0.0002554412378072445, |
| "loss": 9.1576, |
| "step": 13696, |
| "throughput": 12450.113208718572 |
| }, |
| { |
| "epoch": 0.21517410011050242, |
| "grad_norm": 0.09180324524641037, |
| "learning_rate": 0.0002552375147247251, |
| "loss": 9.1495, |
| "step": 13728, |
| "throughput": 12450.45926139864 |
| }, |
| { |
| "epoch": 0.21567567143943134, |
| "grad_norm": 0.09764540195465088, |
| "learning_rate": 0.0002550334195024275, |
| "loss": 9.1521, |
| "step": 13760, |
| "throughput": 12450.848037094125 |
| }, |
| { |
| "epoch": 0.21617724276836026, |
| "grad_norm": 0.09107381105422974, |
| "learning_rate": 0.00025482895298204096, |
| "loss": 9.1481, |
| "step": 13792, |
| "throughput": 12451.089328643548 |
| }, |
| { |
| "epoch": 0.21667881409728917, |
| "grad_norm": 0.09703461825847626, |
| "learning_rate": 0.0002546241160067861, |
| "loss": 9.1497, |
| "step": 13824, |
| "throughput": 12450.94018235877 |
| }, |
| { |
| "epoch": 0.21718038542621806, |
| "grad_norm": 0.08792301267385483, |
| "learning_rate": 0.00025441890942141124, |
| "loss": 9.1662, |
| "step": 13856, |
| "throughput": 12451.16430652669 |
| }, |
| { |
| "epoch": 0.21768195675514698, |
| "grad_norm": 0.09113834798336029, |
| "learning_rate": 0.00025421333407218884, |
| "loss": 9.1619, |
| "step": 13888, |
| "throughput": 12451.63478506473 |
| }, |
| { |
| "epoch": 0.2181835280840759, |
| "grad_norm": 0.08990354835987091, |
| "learning_rate": 0.0002540073908069124, |
| "loss": 9.1508, |
| "step": 13920, |
| "throughput": 12451.925579345352 |
| }, |
| { |
| "epoch": 0.2186850994130048, |
| "grad_norm": 0.09492233395576477, |
| "learning_rate": 0.0002538010804748924, |
| "loss": 9.1105, |
| "step": 13952, |
| "throughput": 12451.864245608442 |
| }, |
| { |
| "epoch": 0.2191866707419337, |
| "grad_norm": 0.08453084528446198, |
| "learning_rate": 0.0002535944039269533, |
| "loss": 9.1589, |
| "step": 13984, |
| "throughput": 12452.173408506707 |
| }, |
| { |
| "epoch": 0.21968824207086263, |
| "grad_norm": 0.08925742655992508, |
| "learning_rate": 0.0002533873620154299, |
| "loss": 9.1472, |
| "step": 14016, |
| "throughput": 12452.429342118676 |
| }, |
| { |
| "epoch": 0.22018981339979155, |
| "grad_norm": 0.09351540356874466, |
| "learning_rate": 0.0002531799555941635, |
| "loss": 9.1445, |
| "step": 14048, |
| "throughput": 12452.87988335345 |
| }, |
| { |
| "epoch": 0.22069138472872044, |
| "grad_norm": 0.09525007754564285, |
| "learning_rate": 0.00025297218551849885, |
| "loss": 9.122, |
| "step": 14080, |
| "throughput": 12453.234261015668 |
| }, |
| { |
| "epoch": 0.22119295605764935, |
| "grad_norm": 0.0959862768650055, |
| "learning_rate": 0.00025276405264528044, |
| "loss": 9.1476, |
| "step": 14112, |
| "throughput": 12453.426942170903 |
| }, |
| { |
| "epoch": 0.22169452738657827, |
| "grad_norm": 0.09031637012958527, |
| "learning_rate": 0.00025255555783284877, |
| "loss": 9.143, |
| "step": 14144, |
| "throughput": 12453.448582132878 |
| }, |
| { |
| "epoch": 0.22219609871550716, |
| "grad_norm": 0.09333668649196625, |
| "learning_rate": 0.0002523467019410371, |
| "loss": 9.1443, |
| "step": 14176, |
| "throughput": 12453.532394747865 |
| }, |
| { |
| "epoch": 0.22269767004443608, |
| "grad_norm": 0.09974166750907898, |
| "learning_rate": 0.00025213748583116776, |
| "loss": 9.1576, |
| "step": 14208, |
| "throughput": 12454.061305626105 |
| }, |
| { |
| "epoch": 0.223199241373365, |
| "grad_norm": 0.08883793652057648, |
| "learning_rate": 0.0002519279103660486, |
| "loss": 9.1129, |
| "step": 14240, |
| "throughput": 12454.288113899367 |
| }, |
| { |
| "epoch": 0.22370081270229392, |
| "grad_norm": 0.09600594639778137, |
| "learning_rate": 0.0002517179764099694, |
| "loss": 9.1099, |
| "step": 14272, |
| "throughput": 12454.587461563679 |
| }, |
| { |
| "epoch": 0.2242023840312228, |
| "grad_norm": 0.09509039670228958, |
| "learning_rate": 0.00025150768482869846, |
| "loss": 9.1359, |
| "step": 14304, |
| "throughput": 12454.547506360477 |
| }, |
| { |
| "epoch": 0.22470395536015172, |
| "grad_norm": 0.09394937753677368, |
| "learning_rate": 0.0002512970364894789, |
| "loss": 9.1322, |
| "step": 14336, |
| "throughput": 12454.915646449184 |
| }, |
| { |
| "epoch": 0.22520552668908064, |
| "grad_norm": 0.09316191077232361, |
| "learning_rate": 0.00025108603226102515, |
| "loss": 9.1384, |
| "step": 14368, |
| "throughput": 12453.864038449128 |
| }, |
| { |
| "epoch": 0.22570709801800953, |
| "grad_norm": 0.08423268049955368, |
| "learning_rate": 0.0002508746730135191, |
| "loss": 9.1312, |
| "step": 14400, |
| "throughput": 12454.192454638365 |
| }, |
| { |
| "epoch": 0.22620866934693845, |
| "grad_norm": 0.09922663122415543, |
| "learning_rate": 0.00025066295961860704, |
| "loss": 9.1184, |
| "step": 14432, |
| "throughput": 12454.558520463599 |
| }, |
| { |
| "epoch": 0.22671024067586737, |
| "grad_norm": 0.09510110318660736, |
| "learning_rate": 0.0002504508929493957, |
| "loss": 9.126, |
| "step": 14464, |
| "throughput": 12454.530903811134 |
| }, |
| { |
| "epoch": 0.2272118120047963, |
| "grad_norm": 0.09779904037714005, |
| "learning_rate": 0.00025023847388044846, |
| "loss": 9.1127, |
| "step": 14496, |
| "throughput": 12454.323381332644 |
| }, |
| { |
| "epoch": 0.22771338333372518, |
| "grad_norm": 0.09605950117111206, |
| "learning_rate": 0.0002500257032877823, |
| "loss": 9.1277, |
| "step": 14528, |
| "throughput": 12454.842419857887 |
| }, |
| { |
| "epoch": 0.2282149546626541, |
| "grad_norm": 0.09250783175230026, |
| "learning_rate": 0.0002498125820488639, |
| "loss": 9.1286, |
| "step": 14560, |
| "throughput": 12455.171467399261 |
| }, |
| { |
| "epoch": 0.22871652599158301, |
| "grad_norm": 0.08579394966363907, |
| "learning_rate": 0.00024959911104260565, |
| "loss": 9.1233, |
| "step": 14592, |
| "throughput": 12455.492864059066 |
| }, |
| { |
| "epoch": 0.2292180973205119, |
| "grad_norm": 0.0896739736199379, |
| "learning_rate": 0.00024938529114936273, |
| "loss": 9.1357, |
| "step": 14624, |
| "throughput": 12455.516121629098 |
| }, |
| { |
| "epoch": 0.22971966864944082, |
| "grad_norm": 0.09005829691886902, |
| "learning_rate": 0.000249171123250929, |
| "loss": 9.1294, |
| "step": 14656, |
| "throughput": 12455.94223097607 |
| }, |
| { |
| "epoch": 0.23022123997836974, |
| "grad_norm": 0.09095371514558792, |
| "learning_rate": 0.00024895660823053353, |
| "loss": 9.1181, |
| "step": 14688, |
| "throughput": 12456.204985354474 |
| }, |
| { |
| "epoch": 0.23072281130729866, |
| "grad_norm": 0.08464957028627396, |
| "learning_rate": 0.00024874174697283685, |
| "loss": 9.1398, |
| "step": 14720, |
| "throughput": 12456.506742235348 |
| }, |
| { |
| "epoch": 0.23122438263622755, |
| "grad_norm": 0.08647891134023666, |
| "learning_rate": 0.0002485265403639275, |
| "loss": 9.1181, |
| "step": 14752, |
| "throughput": 12456.886134556755 |
| }, |
| { |
| "epoch": 0.23172595396515647, |
| "grad_norm": 0.0868907943367958, |
| "learning_rate": 0.0002483109892913181, |
| "loss": 9.1373, |
| "step": 14784, |
| "throughput": 12456.926867132648 |
| }, |
| { |
| "epoch": 0.23222752529408539, |
| "grad_norm": 0.09411929547786713, |
| "learning_rate": 0.0002480950946439419, |
| "loss": 9.1187, |
| "step": 14816, |
| "throughput": 12456.728607751143 |
| }, |
| { |
| "epoch": 0.23272909662301428, |
| "grad_norm": 0.0950883999466896, |
| "learning_rate": 0.0002478788573121491, |
| "loss": 9.0984, |
| "step": 14848, |
| "throughput": 12457.22976486714 |
| }, |
| { |
| "epoch": 0.2332306679519432, |
| "grad_norm": 0.09563660621643066, |
| "learning_rate": 0.0002476622781877031, |
| "loss": 9.0984, |
| "step": 14880, |
| "throughput": 12457.659338375488 |
| }, |
| { |
| "epoch": 0.2337322392808721, |
| "grad_norm": 0.08693066984415054, |
| "learning_rate": 0.0002474453581637769, |
| "loss": 9.114, |
| "step": 14912, |
| "throughput": 12457.949642433958 |
| }, |
| { |
| "epoch": 0.23423381060980103, |
| "grad_norm": 0.08431732654571533, |
| "learning_rate": 0.00024722809813494933, |
| "loss": 9.116, |
| "step": 14944, |
| "throughput": 12458.053641101553 |
| }, |
| { |
| "epoch": 0.23473538193872992, |
| "grad_norm": 0.09093215316534042, |
| "learning_rate": 0.00024701049899720123, |
| "loss": 9.1089, |
| "step": 14976, |
| "throughput": 12458.21994265974 |
| }, |
| { |
| "epoch": 0.23523695326765884, |
| "grad_norm": 0.0873025506734848, |
| "learning_rate": 0.0002467925616479122, |
| "loss": 9.1335, |
| "step": 15008, |
| "throughput": 12458.469333257306 |
| }, |
| { |
| "epoch": 0.23573852459658776, |
| "grad_norm": 0.09324125945568085, |
| "learning_rate": 0.0002465742869858566, |
| "loss": 9.1183, |
| "step": 15040, |
| "throughput": 12458.901220612812 |
| }, |
| { |
| "epoch": 0.23624009592551665, |
| "grad_norm": 0.08744019269943237, |
| "learning_rate": 0.0002463556759111996, |
| "loss": 9.0946, |
| "step": 15072, |
| "throughput": 12459.200893969739 |
| }, |
| { |
| "epoch": 0.23674166725444556, |
| "grad_norm": 0.09340982139110565, |
| "learning_rate": 0.00024613672932549403, |
| "loss": 9.1079, |
| "step": 15104, |
| "throughput": 12459.360936978843 |
| }, |
| { |
| "epoch": 0.23724323858337448, |
| "grad_norm": 0.10181102901697159, |
| "learning_rate": 0.00024591744813167625, |
| "loss": 9.0957, |
| "step": 15136, |
| "throughput": 12458.99247469336 |
| }, |
| { |
| "epoch": 0.2377448099123034, |
| "grad_norm": 0.09072301536798477, |
| "learning_rate": 0.00024569783323406255, |
| "loss": 9.0942, |
| "step": 15168, |
| "throughput": 12459.357491939969 |
| }, |
| { |
| "epoch": 0.2382463812412323, |
| "grad_norm": 0.09112021327018738, |
| "learning_rate": 0.00024547788553834536, |
| "loss": 9.1048, |
| "step": 15200, |
| "throughput": 12459.768172440561 |
| }, |
| { |
| "epoch": 0.2387479525701612, |
| "grad_norm": 0.10005882382392883, |
| "learning_rate": 0.00024525760595158977, |
| "loss": 9.1169, |
| "step": 15232, |
| "throughput": 12460.056387703413 |
| }, |
| { |
| "epoch": 0.23924952389909013, |
| "grad_norm": 0.08876467496156693, |
| "learning_rate": 0.0002450369953822293, |
| "loss": 9.1045, |
| "step": 15264, |
| "throughput": 12460.229257340188 |
| }, |
| { |
| "epoch": 0.23975109522801902, |
| "grad_norm": 0.08295506238937378, |
| "learning_rate": 0.0002448160547400627, |
| "loss": 9.0954, |
| "step": 15296, |
| "throughput": 12460.40222242608 |
| }, |
| { |
| "epoch": 0.24025266655694794, |
| "grad_norm": 0.0876840129494667, |
| "learning_rate": 0.00024459478493624973, |
| "loss": 9.0972, |
| "step": 15328, |
| "throughput": 12460.757852817978 |
| }, |
| { |
| "epoch": 0.24075423788587685, |
| "grad_norm": 0.08689826726913452, |
| "learning_rate": 0.0002443731868833078, |
| "loss": 9.0995, |
| "step": 15360, |
| "throughput": 12461.081935768496 |
| }, |
| { |
| "epoch": 0.24125580921480577, |
| "grad_norm": 0.09742100536823273, |
| "learning_rate": 0.0002441512614951079, |
| "loss": 9.0689, |
| "step": 15392, |
| "throughput": 12461.290076223115 |
| }, |
| { |
| "epoch": 0.24175738054373466, |
| "grad_norm": 0.0885290801525116, |
| "learning_rate": 0.00024392900968687103, |
| "loss": 9.1068, |
| "step": 15424, |
| "throughput": 12461.528644102316 |
| }, |
| { |
| "epoch": 0.24225895187266358, |
| "grad_norm": 0.08928472548723221, |
| "learning_rate": 0.00024370643237516426, |
| "loss": 9.0912, |
| "step": 15456, |
| "throughput": 12461.374216765722 |
| }, |
| { |
| "epoch": 0.2427605232015925, |
| "grad_norm": 0.08863835781812668, |
| "learning_rate": 0.00024348353047789708, |
| "loss": 9.1112, |
| "step": 15488, |
| "throughput": 12461.407715970065 |
| }, |
| { |
| "epoch": 0.2432620945305214, |
| "grad_norm": 0.09474772959947586, |
| "learning_rate": 0.0002432603049143176, |
| "loss": 9.1056, |
| "step": 15520, |
| "throughput": 12461.8778387018 |
| }, |
| { |
| "epoch": 0.2437636658594503, |
| "grad_norm": 0.09183931350708008, |
| "learning_rate": 0.0002430367566050087, |
| "loss": 9.0976, |
| "step": 15552, |
| "throughput": 12462.158419903451 |
| }, |
| { |
| "epoch": 0.24426523718837923, |
| "grad_norm": 0.08852574229240417, |
| "learning_rate": 0.00024281288647188425, |
| "loss": 9.083, |
| "step": 15584, |
| "throughput": 12462.339616265517 |
| }, |
| { |
| "epoch": 0.24476680851730814, |
| "grad_norm": 0.08312542736530304, |
| "learning_rate": 0.00024258869543818535, |
| "loss": 9.0648, |
| "step": 15616, |
| "throughput": 12462.38442635059 |
| }, |
| { |
| "epoch": 0.24526837984623703, |
| "grad_norm": 0.08816740661859512, |
| "learning_rate": 0.00024236418442847652, |
| "loss": 9.1116, |
| "step": 15648, |
| "throughput": 12462.802347087081 |
| }, |
| { |
| "epoch": 0.24576995117516595, |
| "grad_norm": 0.08942883461713791, |
| "learning_rate": 0.0002421393543686418, |
| "loss": 9.1015, |
| "step": 15680, |
| "throughput": 12463.01906254443 |
| }, |
| { |
| "epoch": 0.24627152250409487, |
| "grad_norm": 0.09535627067089081, |
| "learning_rate": 0.00024191420618588103, |
| "loss": 9.0871, |
| "step": 15712, |
| "throughput": 12463.308121069998 |
| }, |
| { |
| "epoch": 0.24677309383302376, |
| "grad_norm": 0.08258014917373657, |
| "learning_rate": 0.000241688740808706, |
| "loss": 9.0858, |
| "step": 15744, |
| "throughput": 12463.654375533093 |
| }, |
| { |
| "epoch": 0.24727466516195268, |
| "grad_norm": 0.09317824989557266, |
| "learning_rate": 0.0002414629591669366, |
| "loss": 9.0855, |
| "step": 15776, |
| "throughput": 12463.483273297135 |
| }, |
| { |
| "epoch": 0.2477762364908816, |
| "grad_norm": 0.09186193346977234, |
| "learning_rate": 0.0002412368621916969, |
| "loss": 9.0853, |
| "step": 15808, |
| "throughput": 12463.471119369466 |
| }, |
| { |
| "epoch": 0.2482778078198105, |
| "grad_norm": 0.08628609776496887, |
| "learning_rate": 0.0002410104508154116, |
| "loss": 9.0883, |
| "step": 15840, |
| "throughput": 12463.925664207842 |
| }, |
| { |
| "epoch": 0.2487793791487394, |
| "grad_norm": 0.08932186663150787, |
| "learning_rate": 0.00024078372597180183, |
| "loss": 9.0832, |
| "step": 15872, |
| "throughput": 12464.291830861448 |
| }, |
| { |
| "epoch": 0.24928095047766832, |
| "grad_norm": 0.09460990875959396, |
| "learning_rate": 0.00024055668859588157, |
| "loss": 9.0645, |
| "step": 15904, |
| "throughput": 12464.580911922092 |
| }, |
| { |
| "epoch": 0.24978252180659724, |
| "grad_norm": 0.0907793715596199, |
| "learning_rate": 0.0002403293396239536, |
| "loss": 9.076, |
| "step": 15936, |
| "throughput": 12464.736545075013 |
| }, |
| { |
| "epoch": 0.25028409313552613, |
| "grad_norm": 0.09719564020633698, |
| "learning_rate": 0.00024010167999360575, |
| "loss": 9.0965, |
| "step": 15968, |
| "throughput": 12464.852240415541 |
| }, |
| { |
| "epoch": 0.25078566446445505, |
| "grad_norm": 0.0805700495839119, |
| "learning_rate": 0.00023987371064370698, |
| "loss": 9.0711, |
| "step": 16000, |
| "throughput": 12465.15117822591 |
| }, |
| { |
| "epoch": 0.25128723579338397, |
| "grad_norm": 0.09074220061302185, |
| "learning_rate": 0.00023964543251440363, |
| "loss": 9.0491, |
| "step": 16032, |
| "throughput": 12465.322025815385 |
| }, |
| { |
| "epoch": 0.2517888071223129, |
| "grad_norm": 0.09387928247451782, |
| "learning_rate": 0.00023941684654711534, |
| "loss": 9.0807, |
| "step": 16064, |
| "throughput": 12465.66815792879 |
| }, |
| { |
| "epoch": 0.2522903784512418, |
| "grad_norm": 0.08193206787109375, |
| "learning_rate": 0.0002391879536845313, |
| "loss": 9.0775, |
| "step": 16096, |
| "throughput": 12465.936712337047 |
| }, |
| { |
| "epoch": 0.25279194978017067, |
| "grad_norm": 0.08214768022298813, |
| "learning_rate": 0.0002389587548706064, |
| "loss": 9.062, |
| "step": 16128, |
| "throughput": 12465.487703177709 |
| }, |
| { |
| "epoch": 0.2532935211090996, |
| "grad_norm": 0.08649898320436478, |
| "learning_rate": 0.0002387292510505572, |
| "loss": 9.0575, |
| "step": 16160, |
| "throughput": 12465.917880215575 |
| }, |
| { |
| "epoch": 0.2537950924380285, |
| "grad_norm": 0.09687966853380203, |
| "learning_rate": 0.00023849944317085812, |
| "loss": 9.0867, |
| "step": 16192, |
| "throughput": 12466.294707813418 |
| }, |
| { |
| "epoch": 0.2542966637669574, |
| "grad_norm": 0.08462590724229813, |
| "learning_rate": 0.0002382693321792376, |
| "loss": 9.0744, |
| "step": 16224, |
| "throughput": 12466.617902711769 |
| }, |
| { |
| "epoch": 0.25479823509588634, |
| "grad_norm": 0.08881812542676926, |
| "learning_rate": 0.00023803891902467406, |
| "loss": 9.0858, |
| "step": 16256, |
| "throughput": 12466.848251406545 |
| }, |
| { |
| "epoch": 0.25529980642481526, |
| "grad_norm": 0.08228327333927155, |
| "learning_rate": 0.0002378082046573919, |
| "loss": 9.057, |
| "step": 16288, |
| "throughput": 12466.895905251296 |
| }, |
| { |
| "epoch": 0.2558013777537442, |
| "grad_norm": 0.0878542885184288, |
| "learning_rate": 0.00023757719002885793, |
| "loss": 9.0545, |
| "step": 16320, |
| "throughput": 12467.12757205178 |
| }, |
| { |
| "epoch": 0.25630294908267304, |
| "grad_norm": 0.0913226455450058, |
| "learning_rate": 0.00023734587609177725, |
| "loss": 9.0711, |
| "step": 16352, |
| "throughput": 12467.356317805159 |
| }, |
| { |
| "epoch": 0.25680452041160196, |
| "grad_norm": 0.09229837357997894, |
| "learning_rate": 0.000237114263800089, |
| "loss": 9.0747, |
| "step": 16384, |
| "throughput": 12467.666892843608 |
| }, |
| { |
| "epoch": 0.2573060917405309, |
| "grad_norm": 0.08693083375692368, |
| "learning_rate": 0.0002368823541089632, |
| "loss": 9.0735, |
| "step": 16416, |
| "throughput": 12466.558353855355 |
| }, |
| { |
| "epoch": 0.2578076630694598, |
| "grad_norm": 0.08696688711643219, |
| "learning_rate": 0.00023665014797479602, |
| "loss": 9.0649, |
| "step": 16448, |
| "throughput": 12466.40559245942 |
| }, |
| { |
| "epoch": 0.2583092343983887, |
| "grad_norm": 0.08479771018028259, |
| "learning_rate": 0.00023641764635520617, |
| "loss": 9.0544, |
| "step": 16480, |
| "throughput": 12466.56688086586 |
| }, |
| { |
| "epoch": 0.2588108057273176, |
| "grad_norm": 0.08818965405225754, |
| "learning_rate": 0.0002361848502090311, |
| "loss": 9.0554, |
| "step": 16512, |
| "throughput": 12466.926689103695 |
| }, |
| { |
| "epoch": 0.25931237705624655, |
| "grad_norm": 0.08338773995637894, |
| "learning_rate": 0.0002359517604963228, |
| "loss": 9.0725, |
| "step": 16544, |
| "throughput": 12467.23486422751 |
| }, |
| { |
| "epoch": 0.2598139483851754, |
| "grad_norm": 0.0901360809803009, |
| "learning_rate": 0.0002357183781783439, |
| "loss": 9.0556, |
| "step": 16576, |
| "throughput": 12467.402090050127 |
| }, |
| { |
| "epoch": 0.2603155197141043, |
| "grad_norm": 0.085529625415802, |
| "learning_rate": 0.0002354847042175638, |
| "loss": 9.0426, |
| "step": 16608, |
| "throughput": 12467.666467634639 |
| }, |
| { |
| "epoch": 0.26081709104303324, |
| "grad_norm": 0.08443993330001831, |
| "learning_rate": 0.0002352507395776546, |
| "loss": 9.0501, |
| "step": 16640, |
| "throughput": 12467.807254167117 |
| }, |
| { |
| "epoch": 0.26131866237196216, |
| "grad_norm": 0.08707763254642487, |
| "learning_rate": 0.00023501648522348715, |
| "loss": 9.0651, |
| "step": 16672, |
| "throughput": 12468.114144258147 |
| }, |
| { |
| "epoch": 0.2618202337008911, |
| "grad_norm": 0.09138821065425873, |
| "learning_rate": 0.0002347819421211271, |
| "loss": 9.0681, |
| "step": 16704, |
| "throughput": 12468.226462686382 |
| }, |
| { |
| "epoch": 0.26232180502982, |
| "grad_norm": 0.0871325135231018, |
| "learning_rate": 0.00023454711123783092, |
| "loss": 9.0517, |
| "step": 16736, |
| "throughput": 12468.42955905754 |
| }, |
| { |
| "epoch": 0.2628233763587489, |
| "grad_norm": 0.08815670013427734, |
| "learning_rate": 0.00023431199354204192, |
| "loss": 9.0533, |
| "step": 16768, |
| "throughput": 12468.204008980289 |
| }, |
| { |
| "epoch": 0.2633249476876778, |
| "grad_norm": 0.09001166373491287, |
| "learning_rate": 0.00023407659000338607, |
| "loss": 9.0623, |
| "step": 16800, |
| "throughput": 12468.376115452253 |
| }, |
| { |
| "epoch": 0.2638265190166067, |
| "grad_norm": 0.09031942486763, |
| "learning_rate": 0.00023384090159266833, |
| "loss": 9.0574, |
| "step": 16832, |
| "throughput": 12468.806218505566 |
| }, |
| { |
| "epoch": 0.2643280903455356, |
| "grad_norm": 0.08624611794948578, |
| "learning_rate": 0.00023360492928186838, |
| "loss": 9.0482, |
| "step": 16864, |
| "throughput": 12469.183607749183 |
| }, |
| { |
| "epoch": 0.26482966167446453, |
| "grad_norm": 0.09370267391204834, |
| "learning_rate": 0.00023336867404413674, |
| "loss": 9.0566, |
| "step": 16896, |
| "throughput": 12469.430245980588 |
| }, |
| { |
| "epoch": 0.26533123300339345, |
| "grad_norm": 0.08853529393672943, |
| "learning_rate": 0.0002331321368537907, |
| "loss": 9.0371, |
| "step": 16928, |
| "throughput": 12469.541779301038 |
| }, |
| { |
| "epoch": 0.26583280433232237, |
| "grad_norm": 0.08989576250314713, |
| "learning_rate": 0.0002328953186863103, |
| "loss": 9.0451, |
| "step": 16960, |
| "throughput": 12469.598915485374 |
| }, |
| { |
| "epoch": 0.2663343756612513, |
| "grad_norm": 0.08611343055963516, |
| "learning_rate": 0.00023265822051833442, |
| "loss": 9.056, |
| "step": 16992, |
| "throughput": 12469.884718462825 |
| }, |
| { |
| "epoch": 0.26683594699018015, |
| "grad_norm": 0.0905052199959755, |
| "learning_rate": 0.00023242084332765662, |
| "loss": 9.0236, |
| "step": 17024, |
| "throughput": 12469.978079412462 |
| }, |
| { |
| "epoch": 0.26733751831910907, |
| "grad_norm": 0.08522782474756241, |
| "learning_rate": 0.0002321831880932211, |
| "loss": 9.0232, |
| "step": 17056, |
| "throughput": 12470.208961711229 |
| }, |
| { |
| "epoch": 0.267839089648038, |
| "grad_norm": 0.08784380555152893, |
| "learning_rate": 0.00023194525579511876, |
| "loss": 9.0504, |
| "step": 17088, |
| "throughput": 12470.301773323268 |
| }, |
| { |
| "epoch": 0.2683406609769669, |
| "grad_norm": 0.08720999956130981, |
| "learning_rate": 0.00023170704741458308, |
| "loss": 9.0575, |
| "step": 17120, |
| "throughput": 12470.064606079304 |
| }, |
| { |
| "epoch": 0.2688422323058958, |
| "grad_norm": 0.09138575941324234, |
| "learning_rate": 0.00023146856393398615, |
| "loss": 9.0648, |
| "step": 17152, |
| "throughput": 12470.468089389984 |
| }, |
| { |
| "epoch": 0.26934380363482474, |
| "grad_norm": 0.0843966007232666, |
| "learning_rate": 0.0002312298063368346, |
| "loss": 9.0507, |
| "step": 17184, |
| "throughput": 12470.801132714467 |
| }, |
| { |
| "epoch": 0.26984537496375366, |
| "grad_norm": 0.08482661098241806, |
| "learning_rate": 0.00023099077560776536, |
| "loss": 9.045, |
| "step": 17216, |
| "throughput": 12471.096914700734 |
| }, |
| { |
| "epoch": 0.2703469462926825, |
| "grad_norm": 0.08337967842817307, |
| "learning_rate": 0.00023075147273254195, |
| "loss": 9.0286, |
| "step": 17248, |
| "throughput": 12471.145123746845 |
| }, |
| { |
| "epoch": 0.27084851762161144, |
| "grad_norm": 0.09060463309288025, |
| "learning_rate": 0.0002305118986980501, |
| "loss": 9.0553, |
| "step": 17280, |
| "throughput": 12471.393467544609 |
| }, |
| { |
| "epoch": 0.27135008895054036, |
| "grad_norm": 0.08552798628807068, |
| "learning_rate": 0.00023027205449229388, |
| "loss": 9.0473, |
| "step": 17312, |
| "throughput": 12471.505573657802 |
| }, |
| { |
| "epoch": 0.2718516602794693, |
| "grad_norm": 0.09061522781848907, |
| "learning_rate": 0.00023003194110439145, |
| "loss": 9.0404, |
| "step": 17344, |
| "throughput": 12471.799272716305 |
| }, |
| { |
| "epoch": 0.2723532316083982, |
| "grad_norm": 0.08229053765535355, |
| "learning_rate": 0.00022979155952457118, |
| "loss": 9.0418, |
| "step": 17376, |
| "throughput": 12471.917751952064 |
| }, |
| { |
| "epoch": 0.2728548029373271, |
| "grad_norm": 0.08962202817201614, |
| "learning_rate": 0.00022955091074416733, |
| "loss": 9.0345, |
| "step": 17408, |
| "throughput": 12472.004623410867 |
| }, |
| { |
| "epoch": 0.27335637426625603, |
| "grad_norm": 0.07952834665775299, |
| "learning_rate": 0.0002293099957556163, |
| "loss": 9.0516, |
| "step": 17440, |
| "throughput": 12471.662820281383 |
| }, |
| { |
| "epoch": 0.2738579455951849, |
| "grad_norm": 0.08492957800626755, |
| "learning_rate": 0.00022906881555245212, |
| "loss": 9.0019, |
| "step": 17472, |
| "throughput": 12472.05627389257 |
| }, |
| { |
| "epoch": 0.2743595169241138, |
| "grad_norm": 0.08579554408788681, |
| "learning_rate": 0.0002288273711293028, |
| "loss": 9.013, |
| "step": 17504, |
| "throughput": 12472.39775437576 |
| }, |
| { |
| "epoch": 0.27486108825304273, |
| "grad_norm": 0.0875297486782074, |
| "learning_rate": 0.00022858566348188568, |
| "loss": 9.055, |
| "step": 17536, |
| "throughput": 12472.693638964387 |
| }, |
| { |
| "epoch": 0.27536265958197165, |
| "grad_norm": 0.08446948975324631, |
| "learning_rate": 0.00022834369360700394, |
| "loss": 9.0329, |
| "step": 17568, |
| "throughput": 12472.90183148617 |
| }, |
| { |
| "epoch": 0.27586423091090057, |
| "grad_norm": 0.08951190859079361, |
| "learning_rate": 0.00022810146250254196, |
| "loss": 9.0438, |
| "step": 17600, |
| "throughput": 12473.13702330547 |
| }, |
| { |
| "epoch": 0.2763658022398295, |
| "grad_norm": 0.08349533379077911, |
| "learning_rate": 0.00022785897116746166, |
| "loss": 9.014, |
| "step": 17632, |
| "throughput": 12473.249106840682 |
| }, |
| { |
| "epoch": 0.2768673735687584, |
| "grad_norm": 0.08524182438850403, |
| "learning_rate": 0.00022761622060179793, |
| "loss": 9.0231, |
| "step": 17664, |
| "throughput": 12473.447528872795 |
| }, |
| { |
| "epoch": 0.27736894489768726, |
| "grad_norm": 0.0913608968257904, |
| "learning_rate": 0.00022737321180665488, |
| "loss": 9.0312, |
| "step": 17696, |
| "throughput": 12473.66558932742 |
| }, |
| { |
| "epoch": 0.2778705162266162, |
| "grad_norm": 0.09343055635690689, |
| "learning_rate": 0.00022712994578420143, |
| "loss": 9.0572, |
| "step": 17728, |
| "throughput": 12473.783745762661 |
| }, |
| { |
| "epoch": 0.2783720875555451, |
| "grad_norm": 0.09041980654001236, |
| "learning_rate": 0.00022688642353766746, |
| "loss": 9.0042, |
| "step": 17760, |
| "throughput": 12473.63779914607 |
| }, |
| { |
| "epoch": 0.278873658884474, |
| "grad_norm": 0.08400727808475494, |
| "learning_rate": 0.00022664264607133937, |
| "loss": 9.0289, |
| "step": 17792, |
| "throughput": 12473.776377954498 |
| }, |
| { |
| "epoch": 0.27937523021340294, |
| "grad_norm": 0.08442512899637222, |
| "learning_rate": 0.00022639861439055617, |
| "loss": 9.0285, |
| "step": 17824, |
| "throughput": 12474.12696957615 |
| }, |
| { |
| "epoch": 0.27987680154233185, |
| "grad_norm": 0.08340003341436386, |
| "learning_rate": 0.00022615432950170528, |
| "loss": 9.0122, |
| "step": 17856, |
| "throughput": 12474.5182505348 |
| }, |
| { |
| "epoch": 0.2803783728712608, |
| "grad_norm": 0.0845126211643219, |
| "learning_rate": 0.00022590979241221825, |
| "loss": 9.0138, |
| "step": 17888, |
| "throughput": 12474.690420465082 |
| }, |
| { |
| "epoch": 0.28087994420018964, |
| "grad_norm": 0.09434281289577484, |
| "learning_rate": 0.00022566500413056677, |
| "loss": 9.0015, |
| "step": 17920, |
| "throughput": 12474.861393165009 |
| }, |
| { |
| "epoch": 0.28138151552911855, |
| "grad_norm": 0.08486516028642654, |
| "learning_rate": 0.00022541996566625841, |
| "loss": 9.0289, |
| "step": 17952, |
| "throughput": 12474.946996187711 |
| }, |
| { |
| "epoch": 0.28188308685804747, |
| "grad_norm": 0.08841805905103683, |
| "learning_rate": 0.00022517467802983266, |
| "loss": 9.0236, |
| "step": 17984, |
| "throughput": 12475.122543622343 |
| }, |
| { |
| "epoch": 0.2823846581869764, |
| "grad_norm": 0.08873917162418365, |
| "learning_rate": 0.0002249291422328563, |
| "loss": 9.0198, |
| "step": 18016, |
| "throughput": 12475.351462062104 |
| }, |
| { |
| "epoch": 0.2828862295159053, |
| "grad_norm": 0.08457177132368088, |
| "learning_rate": 0.00022468335928791977, |
| "loss": 9.0135, |
| "step": 18048, |
| "throughput": 12475.472738775841 |
| }, |
| { |
| "epoch": 0.2833878008448342, |
| "grad_norm": 0.08301544189453125, |
| "learning_rate": 0.00022443733020863262, |
| "loss": 9.0039, |
| "step": 18080, |
| "throughput": 12475.459476439604 |
| }, |
| { |
| "epoch": 0.28388937217376314, |
| "grad_norm": 0.08134116977453232, |
| "learning_rate": 0.00022419105600961955, |
| "loss": 9.009, |
| "step": 18112, |
| "throughput": 12475.444797157037 |
| }, |
| { |
| "epoch": 0.284390943502692, |
| "grad_norm": 0.091349758207798, |
| "learning_rate": 0.00022394453770651607, |
| "loss": 9.0186, |
| "step": 18144, |
| "throughput": 12475.755194761135 |
| }, |
| { |
| "epoch": 0.2848925148316209, |
| "grad_norm": 0.08693146705627441, |
| "learning_rate": 0.00022369777631596436, |
| "loss": 8.9856, |
| "step": 18176, |
| "throughput": 12476.1312937397 |
| }, |
| { |
| "epoch": 0.28539408616054984, |
| "grad_norm": 0.09362078458070755, |
| "learning_rate": 0.00022345077285560914, |
| "loss": 9.0205, |
| "step": 18208, |
| "throughput": 12476.409711464714 |
| }, |
| { |
| "epoch": 0.28589565748947876, |
| "grad_norm": 0.0814775675535202, |
| "learning_rate": 0.00022320352834409343, |
| "loss": 9.0158, |
| "step": 18240, |
| "throughput": 12476.478521129118 |
| }, |
| { |
| "epoch": 0.2863972288184077, |
| "grad_norm": 0.09202456474304199, |
| "learning_rate": 0.0002229560438010543, |
| "loss": 8.986, |
| "step": 18272, |
| "throughput": 12476.62019737783 |
| }, |
| { |
| "epoch": 0.2868988001473366, |
| "grad_norm": 0.08562493324279785, |
| "learning_rate": 0.00022270832024711882, |
| "loss": 9.027, |
| "step": 18304, |
| "throughput": 12476.907886201338 |
| }, |
| { |
| "epoch": 0.2874003714762655, |
| "grad_norm": 0.08332978188991547, |
| "learning_rate": 0.00022246035870389952, |
| "loss": 8.997, |
| "step": 18336, |
| "throughput": 12477.075444601718 |
| }, |
| { |
| "epoch": 0.2879019428051944, |
| "grad_norm": 0.09485689550638199, |
| "learning_rate": 0.00022221216019399067, |
| "loss": 9.0143, |
| "step": 18368, |
| "throughput": 12477.11013677757 |
| }, |
| { |
| "epoch": 0.2884035141341233, |
| "grad_norm": 0.08859565109014511, |
| "learning_rate": 0.00022196372574096357, |
| "loss": 9.0016, |
| "step": 18400, |
| "throughput": 12477.177361621938 |
| }, |
| { |
| "epoch": 0.2889050854630522, |
| "grad_norm": 0.08996104449033737, |
| "learning_rate": 0.00022171505636936272, |
| "loss": 9.0183, |
| "step": 18432, |
| "throughput": 12476.974357767254 |
| }, |
| { |
| "epoch": 0.28940665679198113, |
| "grad_norm": 0.08240412175655365, |
| "learning_rate": 0.00022146615310470125, |
| "loss": 9.0214, |
| "step": 18464, |
| "throughput": 12476.223451799962 |
| }, |
| { |
| "epoch": 0.28990822812091005, |
| "grad_norm": 0.08608371764421463, |
| "learning_rate": 0.0002212170169734571, |
| "loss": 9.0067, |
| "step": 18496, |
| "throughput": 12476.53300311954 |
| }, |
| { |
| "epoch": 0.29040979944983897, |
| "grad_norm": 0.0910928025841713, |
| "learning_rate": 0.0002209676490030683, |
| "loss": 8.9914, |
| "step": 18528, |
| "throughput": 12476.796098408216 |
| }, |
| { |
| "epoch": 0.2909113707787679, |
| "grad_norm": 0.08564506471157074, |
| "learning_rate": 0.0002207180502219291, |
| "loss": 9.0139, |
| "step": 18560, |
| "throughput": 12476.923207970885 |
| }, |
| { |
| "epoch": 0.29141294210769675, |
| "grad_norm": 0.09284878522157669, |
| "learning_rate": 0.00022046822165938565, |
| "loss": 8.9929, |
| "step": 18592, |
| "throughput": 12477.036495695165 |
| }, |
| { |
| "epoch": 0.29191451343662567, |
| "grad_norm": 0.08678165823221207, |
| "learning_rate": 0.00022021816434573168, |
| "loss": 8.9992, |
| "step": 18624, |
| "throughput": 12477.242414439332 |
| }, |
| { |
| "epoch": 0.2924160847655546, |
| "grad_norm": 0.08513201773166656, |
| "learning_rate": 0.0002199678793122043, |
| "loss": 9.0067, |
| "step": 18656, |
| "throughput": 12477.385674047086 |
| }, |
| { |
| "epoch": 0.2929176560944835, |
| "grad_norm": 0.08333751559257507, |
| "learning_rate": 0.0002197173675909797, |
| "loss": 8.9823, |
| "step": 18688, |
| "throughput": 12477.468328011044 |
| }, |
| { |
| "epoch": 0.2934192274234124, |
| "grad_norm": 0.08798499405384064, |
| "learning_rate": 0.00021946663021516895, |
| "loss": 9.0078, |
| "step": 18720, |
| "throughput": 12477.499937727878 |
| }, |
| { |
| "epoch": 0.29392079875234134, |
| "grad_norm": 0.0856068879365921, |
| "learning_rate": 0.0002192156682188138, |
| "loss": 9.0173, |
| "step": 18752, |
| "throughput": 12477.421765774649 |
| }, |
| { |
| "epoch": 0.29442237008127026, |
| "grad_norm": 0.08265390992164612, |
| "learning_rate": 0.00021896448263688224, |
| "loss": 8.9792, |
| "step": 18784, |
| "throughput": 12477.603668330741 |
| }, |
| { |
| "epoch": 0.2949239414101991, |
| "grad_norm": 0.08359767496585846, |
| "learning_rate": 0.00021871307450526428, |
| "loss": 8.9992, |
| "step": 18816, |
| "throughput": 12477.905816630331 |
| }, |
| { |
| "epoch": 0.29542551273912804, |
| "grad_norm": 0.08933964371681213, |
| "learning_rate": 0.00021846144486076794, |
| "loss": 8.9771, |
| "step": 18848, |
| "throughput": 12478.262695014355 |
| }, |
| { |
| "epoch": 0.29592708406805696, |
| "grad_norm": 0.08512786775827408, |
| "learning_rate": 0.00021820959474111448, |
| "loss": 8.9957, |
| "step": 18880, |
| "throughput": 12478.297900233913 |
| }, |
| { |
| "epoch": 0.2964286553969859, |
| "grad_norm": 0.08374758064746857, |
| "learning_rate": 0.00021795752518493462, |
| "loss": 8.9974, |
| "step": 18912, |
| "throughput": 12478.560628901585 |
| }, |
| { |
| "epoch": 0.2969302267259148, |
| "grad_norm": 0.09408605843782425, |
| "learning_rate": 0.0002177052372317639, |
| "loss": 8.9884, |
| "step": 18944, |
| "throughput": 12478.682912323291 |
| }, |
| { |
| "epoch": 0.2974317980548437, |
| "grad_norm": 0.08023850619792938, |
| "learning_rate": 0.00021745273192203871, |
| "loss": 8.9801, |
| "step": 18976, |
| "throughput": 12478.841125057868 |
| }, |
| { |
| "epoch": 0.2979333693837726, |
| "grad_norm": 0.08267463743686676, |
| "learning_rate": 0.00021720001029709152, |
| "loss": 8.9955, |
| "step": 19008, |
| "throughput": 12479.080966465785 |
| }, |
| { |
| "epoch": 0.2984349407127015, |
| "grad_norm": 0.08654811233282089, |
| "learning_rate": 0.00021694707339914722, |
| "loss": 8.9867, |
| "step": 19040, |
| "throughput": 12478.997052386876 |
| }, |
| { |
| "epoch": 0.2989365120416304, |
| "grad_norm": 0.08444288372993469, |
| "learning_rate": 0.00021669392227131816, |
| "loss": 8.9951, |
| "step": 19072, |
| "throughput": 12478.872801380136 |
| }, |
| { |
| "epoch": 0.2994380833705593, |
| "grad_norm": 0.08606360852718353, |
| "learning_rate": 0.0002164405579576005, |
| "loss": 9.0, |
| "step": 19104, |
| "throughput": 12479.01897110598 |
| }, |
| { |
| "epoch": 0.29993965469948825, |
| "grad_norm": 0.08675479143857956, |
| "learning_rate": 0.0002161869815028694, |
| "loss": 8.985, |
| "step": 19136, |
| "throughput": 12479.319002499655 |
| }, |
| { |
| "epoch": 0.30044122602841716, |
| "grad_norm": 0.08233457058668137, |
| "learning_rate": 0.00021593319395287483, |
| "loss": 8.9862, |
| "step": 19168, |
| "throughput": 12479.665124742653 |
| }, |
| { |
| "epoch": 0.3009427973573461, |
| "grad_norm": 0.0809185728430748, |
| "learning_rate": 0.0002156791963542374, |
| "loss": 8.9818, |
| "step": 19200, |
| "throughput": 12479.90954284929 |
| }, |
| { |
| "epoch": 0.30144436868627494, |
| "grad_norm": 0.08249987661838531, |
| "learning_rate": 0.00021542498975444404, |
| "loss": 8.97, |
| "step": 19232, |
| "throughput": 12479.959052127691 |
| }, |
| { |
| "epoch": 0.30194594001520386, |
| "grad_norm": 0.0827348455786705, |
| "learning_rate": 0.0002151705752018435, |
| "loss": 8.9766, |
| "step": 19264, |
| "throughput": 12480.163904790157 |
| }, |
| { |
| "epoch": 0.3024475113441328, |
| "grad_norm": 0.08294457942247391, |
| "learning_rate": 0.0002149159537456421, |
| "loss": 8.9846, |
| "step": 19296, |
| "throughput": 12480.258854661215 |
| }, |
| { |
| "epoch": 0.3029490826730617, |
| "grad_norm": 0.08031655102968216, |
| "learning_rate": 0.00021466112643589948, |
| "loss": 8.9328, |
| "step": 19328, |
| "throughput": 12480.488916400263 |
| }, |
| { |
| "epoch": 0.3034506540019906, |
| "grad_norm": 0.08047141134738922, |
| "learning_rate": 0.00021440609432352427, |
| "loss": 8.997, |
| "step": 19360, |
| "throughput": 12480.4834522581 |
| }, |
| { |
| "epoch": 0.30395222533091953, |
| "grad_norm": 0.08190543204545975, |
| "learning_rate": 0.00021415085846026961, |
| "loss": 8.9921, |
| "step": 19392, |
| "throughput": 12480.460116763796 |
| }, |
| { |
| "epoch": 0.30445379665984845, |
| "grad_norm": 0.08133073151111603, |
| "learning_rate": 0.00021389541989872904, |
| "loss": 8.9605, |
| "step": 19424, |
| "throughput": 12480.449168736688 |
| }, |
| { |
| "epoch": 0.3049553679887773, |
| "grad_norm": 0.08820736408233643, |
| "learning_rate": 0.00021363977969233186, |
| "loss": 8.9873, |
| "step": 19456, |
| "throughput": 12480.73539843842 |
| }, |
| { |
| "epoch": 0.30545693931770623, |
| "grad_norm": 0.08675903081893921, |
| "learning_rate": 0.000213383938895339, |
| "loss": 8.9809, |
| "step": 19488, |
| "throughput": 12481.072723554666 |
| }, |
| { |
| "epoch": 0.30595851064663515, |
| "grad_norm": 0.08426385372877121, |
| "learning_rate": 0.00021312789856283885, |
| "loss": 8.9639, |
| "step": 19520, |
| "throughput": 12481.321765084207 |
| }, |
| { |
| "epoch": 0.30646008197556407, |
| "grad_norm": 0.08889193087816238, |
| "learning_rate": 0.0002128716597507423, |
| "loss": 8.9951, |
| "step": 19552, |
| "throughput": 12481.474794690199 |
| }, |
| { |
| "epoch": 0.306961653304493, |
| "grad_norm": 0.08428184688091278, |
| "learning_rate": 0.00021261522351577906, |
| "loss": 8.9918, |
| "step": 19584, |
| "throughput": 12481.585242922429 |
| }, |
| { |
| "epoch": 0.3074632246334219, |
| "grad_norm": 0.08475416898727417, |
| "learning_rate": 0.00021235859091549294, |
| "loss": 8.987, |
| "step": 19616, |
| "throughput": 12481.706000617602 |
| }, |
| { |
| "epoch": 0.3079647959623508, |
| "grad_norm": 0.07965004444122314, |
| "learning_rate": 0.0002121017630082375, |
| "loss": 8.9772, |
| "step": 19648, |
| "throughput": 12481.899747104279 |
| }, |
| { |
| "epoch": 0.3084663672912797, |
| "grad_norm": 0.07988788187503815, |
| "learning_rate": 0.0002118447408531718, |
| "loss": 8.9472, |
| "step": 19680, |
| "throughput": 12481.973442574455 |
| }, |
| { |
| "epoch": 0.3089679386202086, |
| "grad_norm": 0.08512098342180252, |
| "learning_rate": 0.00021158752551025603, |
| "loss": 8.9638, |
| "step": 19712, |
| "throughput": 12481.8821989218 |
| }, |
| { |
| "epoch": 0.3094695099491375, |
| "grad_norm": 0.08206585049629211, |
| "learning_rate": 0.0002113301180402469, |
| "loss": 8.9916, |
| "step": 19744, |
| "throughput": 12481.923126716347 |
| }, |
| { |
| "epoch": 0.30997108127806644, |
| "grad_norm": 0.08557426184415817, |
| "learning_rate": 0.0002110725195046937, |
| "loss": 8.9807, |
| "step": 19776, |
| "throughput": 12482.157295879151 |
| }, |
| { |
| "epoch": 0.31047265260699536, |
| "grad_norm": 0.08954965323209763, |
| "learning_rate": 0.00021081473096593348, |
| "loss": 8.976, |
| "step": 19808, |
| "throughput": 12482.426387632684 |
| }, |
| { |
| "epoch": 0.3109742239359243, |
| "grad_norm": 0.08191045373678207, |
| "learning_rate": 0.000210556753487087, |
| "loss": 8.968, |
| "step": 19840, |
| "throughput": 12482.767802868539 |
| }, |
| { |
| "epoch": 0.3114757952648532, |
| "grad_norm": 0.083841472864151, |
| "learning_rate": 0.00021029858813205408, |
| "loss": 8.9452, |
| "step": 19872, |
| "throughput": 12482.79312026922 |
| }, |
| { |
| "epoch": 0.31197736659378206, |
| "grad_norm": 0.08564392477273941, |
| "learning_rate": 0.00021004023596550946, |
| "loss": 8.9712, |
| "step": 19904, |
| "throughput": 12482.98858331792 |
| }, |
| { |
| "epoch": 0.312478937922711, |
| "grad_norm": 0.08598575741052628, |
| "learning_rate": 0.00020978169805289823, |
| "loss": 8.9772, |
| "step": 19936, |
| "throughput": 12483.168994339207 |
| }, |
| { |
| "epoch": 0.3129805092516399, |
| "grad_norm": 0.08651523292064667, |
| "learning_rate": 0.0002095229754604315, |
| "loss": 8.9647, |
| "step": 19968, |
| "throughput": 12483.301932936576 |
| }, |
| { |
| "epoch": 0.3134820805805688, |
| "grad_norm": 0.08066987246274948, |
| "learning_rate": 0.00020926406925508202, |
| "loss": 8.9706, |
| "step": 20000, |
| "throughput": 12483.468640673438 |
| }, |
| { |
| "epoch": 0.31398365190949773, |
| "grad_norm": 0.08455055952072144, |
| "learning_rate": 0.00020900498050457973, |
| "loss": 8.9811, |
| "step": 20032, |
| "throughput": 12483.389454275892 |
| }, |
| { |
| "epoch": 0.31448522323842665, |
| "grad_norm": 0.07816529273986816, |
| "learning_rate": 0.0002087457102774074, |
| "loss": 8.9669, |
| "step": 20064, |
| "throughput": 12483.280268234008 |
| }, |
| { |
| "epoch": 0.31498679456735557, |
| "grad_norm": 0.0842684879899025, |
| "learning_rate": 0.00020848625964279622, |
| "loss": 8.9567, |
| "step": 20096, |
| "throughput": 12483.441512925816 |
| }, |
| { |
| "epoch": 0.31548836589628443, |
| "grad_norm": 0.0792151615023613, |
| "learning_rate": 0.0002082266296707214, |
| "loss": 8.9627, |
| "step": 20128, |
| "throughput": 12483.70604001735 |
| }, |
| { |
| "epoch": 0.31598993722521335, |
| "grad_norm": 0.08477991074323654, |
| "learning_rate": 0.0002079668214318977, |
| "loss": 8.9572, |
| "step": 20160, |
| "throughput": 12484.02449720299 |
| }, |
| { |
| "epoch": 0.31649150855414226, |
| "grad_norm": 0.08497337996959686, |
| "learning_rate": 0.00020770683599777507, |
| "loss": 8.9547, |
| "step": 20192, |
| "throughput": 12484.258667934406 |
| }, |
| { |
| "epoch": 0.3169930798830712, |
| "grad_norm": 0.07823009788990021, |
| "learning_rate": 0.0002074466744405342, |
| "loss": 8.9537, |
| "step": 20224, |
| "throughput": 12484.284754838114 |
| }, |
| { |
| "epoch": 0.3174946512120001, |
| "grad_norm": 0.08207400888204575, |
| "learning_rate": 0.00020718633783308214, |
| "loss": 8.945, |
| "step": 20256, |
| "throughput": 12484.460604663742 |
| }, |
| { |
| "epoch": 0.317996222540929, |
| "grad_norm": 0.08717039227485657, |
| "learning_rate": 0.00020692582724904778, |
| "loss": 8.957, |
| "step": 20288, |
| "throughput": 12484.521166834636 |
| }, |
| { |
| "epoch": 0.31849779386985794, |
| "grad_norm": 0.08661855012178421, |
| "learning_rate": 0.00020666514376277762, |
| "loss": 8.9567, |
| "step": 20320, |
| "throughput": 12484.745023419275 |
| }, |
| { |
| "epoch": 0.3189993651987868, |
| "grad_norm": 0.08689385652542114, |
| "learning_rate": 0.00020640428844933108, |
| "loss": 8.9604, |
| "step": 20352, |
| "throughput": 12484.6409939241 |
| }, |
| { |
| "epoch": 0.3195009365277157, |
| "grad_norm": 0.08753615617752075, |
| "learning_rate": 0.00020614326238447623, |
| "loss": 8.9588, |
| "step": 20384, |
| "throughput": 12484.499387366826 |
| }, |
| { |
| "epoch": 0.32000250785664464, |
| "grad_norm": 0.08454905450344086, |
| "learning_rate": 0.0002058820666446854, |
| "loss": 8.9554, |
| "step": 20416, |
| "throughput": 12484.73143159062 |
| }, |
| { |
| "epoch": 0.32050407918557355, |
| "grad_norm": 0.08608614653348923, |
| "learning_rate": 0.00020562070230713058, |
| "loss": 8.9639, |
| "step": 20448, |
| "throughput": 12484.917610846744 |
| }, |
| { |
| "epoch": 0.32100565051450247, |
| "grad_norm": 0.08359532803297043, |
| "learning_rate": 0.00020535917044967899, |
| "loss": 8.9463, |
| "step": 20480, |
| "throughput": 12485.221669401204 |
| }, |
| { |
| "epoch": 0.3215072218434314, |
| "grad_norm": 0.09456279873847961, |
| "learning_rate": 0.00020509747215088887, |
| "loss": 8.952, |
| "step": 20512, |
| "throughput": 12484.477850075247 |
| }, |
| { |
| "epoch": 0.3220087931723603, |
| "grad_norm": 0.08295111358165741, |
| "learning_rate": 0.00020483560849000475, |
| "loss": 8.9512, |
| "step": 20544, |
| "throughput": 12484.502256651174 |
| }, |
| { |
| "epoch": 0.32251036450128917, |
| "grad_norm": 0.08061188459396362, |
| "learning_rate": 0.00020457358054695317, |
| "loss": 8.9436, |
| "step": 20576, |
| "throughput": 12484.75864322967 |
| }, |
| { |
| "epoch": 0.3230119358302181, |
| "grad_norm": 0.08765893429517746, |
| "learning_rate": 0.00020431138940233808, |
| "loss": 8.9409, |
| "step": 20608, |
| "throughput": 12484.795386174996 |
| }, |
| { |
| "epoch": 0.323513507159147, |
| "grad_norm": 0.08479173481464386, |
| "learning_rate": 0.00020404903613743664, |
| "loss": 8.9421, |
| "step": 20640, |
| "throughput": 12484.957643859123 |
| }, |
| { |
| "epoch": 0.3240150784880759, |
| "grad_norm": 0.08192974328994751, |
| "learning_rate": 0.0002037865218341944, |
| "loss": 8.951, |
| "step": 20672, |
| "throughput": 12485.044667605318 |
| }, |
| { |
| "epoch": 0.32451664981700484, |
| "grad_norm": 0.08531540632247925, |
| "learning_rate": 0.00020352384757522113, |
| "loss": 8.9339, |
| "step": 20704, |
| "throughput": 12484.871636248427 |
| }, |
| { |
| "epoch": 0.32501822114593376, |
| "grad_norm": 0.0843503326177597, |
| "learning_rate": 0.00020326101444378633, |
| "loss": 8.9596, |
| "step": 20736, |
| "throughput": 12484.93287006955 |
| }, |
| { |
| "epoch": 0.3255197924748627, |
| "grad_norm": 0.08475443720817566, |
| "learning_rate": 0.0002029980235238145, |
| "loss": 8.9572, |
| "step": 20768, |
| "throughput": 12485.100293172296 |
| }, |
| { |
| "epoch": 0.32602136380379154, |
| "grad_norm": 0.09080865979194641, |
| "learning_rate": 0.0002027348758998811, |
| "loss": 8.9502, |
| "step": 20800, |
| "throughput": 12485.411867627947 |
| }, |
| { |
| "epoch": 0.32652293513272046, |
| "grad_norm": 0.08043165504932404, |
| "learning_rate": 0.0002024715726572076, |
| "loss": 8.9619, |
| "step": 20832, |
| "throughput": 12485.646951030745 |
| }, |
| { |
| "epoch": 0.3270245064616494, |
| "grad_norm": 0.0829484760761261, |
| "learning_rate": 0.0002022081148816574, |
| "loss": 8.9353, |
| "step": 20864, |
| "throughput": 12485.772787112774 |
| }, |
| { |
| "epoch": 0.3275260777905783, |
| "grad_norm": 0.09577605128288269, |
| "learning_rate": 0.0002019445036597312, |
| "loss": 8.9345, |
| "step": 20896, |
| "throughput": 12485.953043466774 |
| }, |
| { |
| "epoch": 0.3280276491195072, |
| "grad_norm": 0.08556250482797623, |
| "learning_rate": 0.00020168074007856232, |
| "loss": 8.9435, |
| "step": 20928, |
| "throughput": 12486.017639406224 |
| }, |
| { |
| "epoch": 0.32852922044843613, |
| "grad_norm": 0.08058907091617584, |
| "learning_rate": 0.00020141682522591272, |
| "loss": 8.9289, |
| "step": 20960, |
| "throughput": 12486.170992647689 |
| }, |
| { |
| "epoch": 0.32903079177736505, |
| "grad_norm": 0.08227542787790298, |
| "learning_rate": 0.0002011527601901679, |
| "loss": 8.9489, |
| "step": 20992, |
| "throughput": 12486.233883077815 |
| }, |
| { |
| "epoch": 0.3295323631062939, |
| "grad_norm": 0.08085721731185913, |
| "learning_rate": 0.00020088854606033292, |
| "loss": 8.9545, |
| "step": 21024, |
| "throughput": 12486.051737341617 |
| }, |
| { |
| "epoch": 0.33003393443522283, |
| "grad_norm": 0.09055308252573013, |
| "learning_rate": 0.00020062418392602767, |
| "loss": 8.9369, |
| "step": 21056, |
| "throughput": 12486.145621966194 |
| }, |
| { |
| "epoch": 0.33053550576415175, |
| "grad_norm": 0.07927481830120087, |
| "learning_rate": 0.00020035967487748226, |
| "loss": 8.9486, |
| "step": 21088, |
| "throughput": 12486.34457992798 |
| }, |
| { |
| "epoch": 0.33103707709308067, |
| "grad_norm": 0.0844564139842987, |
| "learning_rate": 0.00020009502000553286, |
| "loss": 8.9383, |
| "step": 21120, |
| "throughput": 12486.611307376023 |
| }, |
| { |
| "epoch": 0.3315386484220096, |
| "grad_norm": 0.07819940149784088, |
| "learning_rate": 0.00019983022040161692, |
| "loss": 8.9262, |
| "step": 21152, |
| "throughput": 12486.924114265516 |
| }, |
| { |
| "epoch": 0.3320402197509385, |
| "grad_norm": 0.08591725677251816, |
| "learning_rate": 0.00019956527715776887, |
| "loss": 8.9463, |
| "step": 21184, |
| "throughput": 12487.161038107502 |
| }, |
| { |
| "epoch": 0.3325417910798674, |
| "grad_norm": 0.07994264364242554, |
| "learning_rate": 0.0001993001913666153, |
| "loss": 8.9448, |
| "step": 21216, |
| "throughput": 12487.150532210248 |
| }, |
| { |
| "epoch": 0.3330433624087963, |
| "grad_norm": 0.08380083739757538, |
| "learning_rate": 0.00019903496412137093, |
| "loss": 8.9406, |
| "step": 21248, |
| "throughput": 12487.258940781852 |
| }, |
| { |
| "epoch": 0.3335449337377252, |
| "grad_norm": 0.0842265859246254, |
| "learning_rate": 0.00019876959651583362, |
| "loss": 8.9426, |
| "step": 21280, |
| "throughput": 12487.332584155203 |
| }, |
| { |
| "epoch": 0.3340465050666541, |
| "grad_norm": 0.08240363746881485, |
| "learning_rate": 0.00019850408964438023, |
| "loss": 8.9497, |
| "step": 21312, |
| "throughput": 12487.40224732748 |
| }, |
| { |
| "epoch": 0.33454807639558304, |
| "grad_norm": 0.09238409996032715, |
| "learning_rate": 0.00019823844460196177, |
| "loss": 8.944, |
| "step": 21344, |
| "throughput": 12487.584956361457 |
| }, |
| { |
| "epoch": 0.33504964772451196, |
| "grad_norm": 0.08725325018167496, |
| "learning_rate": 0.00019797266248409932, |
| "loss": 8.918, |
| "step": 21376, |
| "throughput": 12487.304178496872 |
| }, |
| { |
| "epoch": 0.3355512190534409, |
| "grad_norm": 0.08461218327283859, |
| "learning_rate": 0.000197706744386879, |
| "loss": 8.9337, |
| "step": 21408, |
| "throughput": 12487.563503986132 |
| }, |
| { |
| "epoch": 0.3360527903823698, |
| "grad_norm": 0.08346909284591675, |
| "learning_rate": 0.00019744069140694795, |
| "loss": 8.9519, |
| "step": 21440, |
| "throughput": 12487.72316076954 |
| }, |
| { |
| "epoch": 0.33655436171129866, |
| "grad_norm": 0.08190145343542099, |
| "learning_rate": 0.00019717450464150935, |
| "loss": 8.9081, |
| "step": 21472, |
| "throughput": 12488.030838551273 |
| }, |
| { |
| "epoch": 0.3370559330402276, |
| "grad_norm": 0.07997757941484451, |
| "learning_rate": 0.00019690818518831827, |
| "loss": 8.9402, |
| "step": 21504, |
| "throughput": 12488.24650412556 |
| }, |
| { |
| "epoch": 0.3375575043691565, |
| "grad_norm": 0.08381337672472, |
| "learning_rate": 0.0001966417341456769, |
| "loss": 8.9227, |
| "step": 21536, |
| "throughput": 12488.279142058242 |
| }, |
| { |
| "epoch": 0.3380590756980854, |
| "grad_norm": 0.08766081184148788, |
| "learning_rate": 0.0001963751526124301, |
| "loss": 8.9325, |
| "step": 21568, |
| "throughput": 12488.457524807016 |
| }, |
| { |
| "epoch": 0.3385606470270143, |
| "grad_norm": 0.08199552446603775, |
| "learning_rate": 0.00019610844168796096, |
| "loss": 8.9479, |
| "step": 21600, |
| "throughput": 12488.495104920967 |
| }, |
| { |
| "epoch": 0.33906221835594325, |
| "grad_norm": 0.08472704142332077, |
| "learning_rate": 0.0001958416024721861, |
| "loss": 8.9206, |
| "step": 21632, |
| "throughput": 12488.592639697072 |
| }, |
| { |
| "epoch": 0.33956378968487216, |
| "grad_norm": 0.09208200871944427, |
| "learning_rate": 0.00019557463606555118, |
| "loss": 8.9175, |
| "step": 21664, |
| "throughput": 12488.699079971992 |
| }, |
| { |
| "epoch": 0.340065361013801, |
| "grad_norm": 0.08901580423116684, |
| "learning_rate": 0.0001953075435690266, |
| "loss": 8.9482, |
| "step": 21696, |
| "throughput": 12488.521563231709 |
| }, |
| { |
| "epoch": 0.34056693234272994, |
| "grad_norm": 0.08557577431201935, |
| "learning_rate": 0.0001950403260841024, |
| "loss": 8.9027, |
| "step": 21728, |
| "throughput": 12488.646320404407 |
| }, |
| { |
| "epoch": 0.34106850367165886, |
| "grad_norm": 0.08255859464406967, |
| "learning_rate": 0.0001947729847127845, |
| "loss": 8.9463, |
| "step": 21760, |
| "throughput": 12488.833395405463 |
| }, |
| { |
| "epoch": 0.3415700750005878, |
| "grad_norm": 0.08207535743713379, |
| "learning_rate": 0.00019450552055758934, |
| "loss": 8.91, |
| "step": 21792, |
| "throughput": 12489.129996873391 |
| }, |
| { |
| "epoch": 0.3420716463295167, |
| "grad_norm": 0.0815618485212326, |
| "learning_rate": 0.00019423793472153996, |
| "loss": 8.9103, |
| "step": 21824, |
| "throughput": 12489.336254605087 |
| }, |
| { |
| "epoch": 0.3425732176584456, |
| "grad_norm": 0.08508460223674774, |
| "learning_rate": 0.0001939702283081611, |
| "loss": 8.9172, |
| "step": 21856, |
| "throughput": 12489.435337129702 |
| }, |
| { |
| "epoch": 0.34307478898737453, |
| "grad_norm": 0.07797224074602127, |
| "learning_rate": 0.00019370240242147488, |
| "loss": 8.93, |
| "step": 21888, |
| "throughput": 12489.610602421475 |
| }, |
| { |
| "epoch": 0.3435763603163034, |
| "grad_norm": 0.08201367408037186, |
| "learning_rate": 0.000193434458165996, |
| "loss": 8.937, |
| "step": 21920, |
| "throughput": 12489.67687894913 |
| }, |
| { |
| "epoch": 0.3440779316452323, |
| "grad_norm": 0.07757963240146637, |
| "learning_rate": 0.00019316639664672733, |
| "loss": 8.9071, |
| "step": 21952, |
| "throughput": 12489.828498519091 |
| }, |
| { |
| "epoch": 0.34457950297416123, |
| "grad_norm": 0.08153794705867767, |
| "learning_rate": 0.00019289821896915544, |
| "loss": 8.9108, |
| "step": 21984, |
| "throughput": 12489.855986021807 |
| }, |
| { |
| "epoch": 0.34508107430309015, |
| "grad_norm": 0.08086151629686356, |
| "learning_rate": 0.00019262992623924585, |
| "loss": 8.92, |
| "step": 22016, |
| "throughput": 12489.738904543394 |
| }, |
| { |
| "epoch": 0.34558264563201907, |
| "grad_norm": 0.08799968659877777, |
| "learning_rate": 0.00019236151956343852, |
| "loss": 8.8953, |
| "step": 22048, |
| "throughput": 12489.764779282503 |
| }, |
| { |
| "epoch": 0.346084216960948, |
| "grad_norm": 0.08785464614629745, |
| "learning_rate": 0.00019209300004864341, |
| "loss": 8.9214, |
| "step": 22080, |
| "throughput": 12489.914629776307 |
| }, |
| { |
| "epoch": 0.3465857882898769, |
| "grad_norm": 0.08560092747211456, |
| "learning_rate": 0.00019182436880223585, |
| "loss": 8.9188, |
| "step": 22112, |
| "throughput": 12490.212450694407 |
| }, |
| { |
| "epoch": 0.34708735961880577, |
| "grad_norm": 0.08484335988759995, |
| "learning_rate": 0.00019155562693205178, |
| "loss": 8.9132, |
| "step": 22144, |
| "throughput": 12490.511833539385 |
| }, |
| { |
| "epoch": 0.3475889309477347, |
| "grad_norm": 0.08568539470434189, |
| "learning_rate": 0.00019128677554638355, |
| "loss": 8.8883, |
| "step": 22176, |
| "throughput": 12490.672564495928 |
| }, |
| { |
| "epoch": 0.3480905022766636, |
| "grad_norm": 0.08342450857162476, |
| "learning_rate": 0.0001910178157539751, |
| "loss": 8.9109, |
| "step": 22208, |
| "throughput": 12490.765383820944 |
| }, |
| { |
| "epoch": 0.3485920736055925, |
| "grad_norm": 0.08115052431821823, |
| "learning_rate": 0.00019074874866401733, |
| "loss": 8.9371, |
| "step": 22240, |
| "throughput": 12490.853232384366 |
| }, |
| { |
| "epoch": 0.34909364493452144, |
| "grad_norm": 0.08548019826412201, |
| "learning_rate": 0.00019047957538614375, |
| "loss": 8.9111, |
| "step": 22272, |
| "throughput": 12490.9313045578 |
| }, |
| { |
| "epoch": 0.34959521626345036, |
| "grad_norm": 0.08596168458461761, |
| "learning_rate": 0.00019021029703042576, |
| "loss": 8.9106, |
| "step": 22304, |
| "throughput": 12490.990787166018 |
| }, |
| { |
| "epoch": 0.3500967875923793, |
| "grad_norm": 0.0818779468536377, |
| "learning_rate": 0.0001899409147073681, |
| "loss": 8.9162, |
| "step": 22336, |
| "throughput": 12490.918567562465 |
| }, |
| { |
| "epoch": 0.35059835892130814, |
| "grad_norm": 0.09088233858346939, |
| "learning_rate": 0.0001896714295279043, |
| "loss": 8.9048, |
| "step": 22368, |
| "throughput": 12490.87804323234 |
| }, |
| { |
| "epoch": 0.35109993025023706, |
| "grad_norm": 0.08182457089424133, |
| "learning_rate": 0.00018940184260339194, |
| "loss": 8.9223, |
| "step": 22400, |
| "throughput": 12491.16283867804 |
| }, |
| { |
| "epoch": 0.351601501579166, |
| "grad_norm": 0.0823916345834732, |
| "learning_rate": 0.00018913215504560838, |
| "loss": 8.9362, |
| "step": 22432, |
| "throughput": 12491.302279886577 |
| }, |
| { |
| "epoch": 0.3521030729080949, |
| "grad_norm": 0.0841815397143364, |
| "learning_rate": 0.0001888623679667459, |
| "loss": 8.9064, |
| "step": 22464, |
| "throughput": 12491.589846482098 |
| }, |
| { |
| "epoch": 0.3526046442370238, |
| "grad_norm": 0.08144458383321762, |
| "learning_rate": 0.00018859248247940722, |
| "loss": 8.9139, |
| "step": 22496, |
| "throughput": 12491.790941636938 |
| }, |
| { |
| "epoch": 0.35310621556595273, |
| "grad_norm": 0.08848226815462112, |
| "learning_rate": 0.0001883224996966008, |
| "loss": 8.9261, |
| "step": 22528, |
| "throughput": 12491.801249014014 |
| }, |
| { |
| "epoch": 0.35360778689488165, |
| "grad_norm": 0.08245450258255005, |
| "learning_rate": 0.00018805242073173653, |
| "loss": 8.9141, |
| "step": 22560, |
| "throughput": 12491.01421133125 |
| }, |
| { |
| "epoch": 0.3541093582238105, |
| "grad_norm": 0.0856962725520134, |
| "learning_rate": 0.00018778224669862087, |
| "loss": 8.9172, |
| "step": 22592, |
| "throughput": 12491.096014213206 |
| }, |
| { |
| "epoch": 0.35461092955273943, |
| "grad_norm": 0.08765954524278641, |
| "learning_rate": 0.0001875119787114523, |
| "loss": 8.8969, |
| "step": 22624, |
| "throughput": 12491.141670027499 |
| }, |
| { |
| "epoch": 0.35511250088166835, |
| "grad_norm": 0.08133430778980255, |
| "learning_rate": 0.00018724161788481676, |
| "loss": 8.909, |
| "step": 22656, |
| "throughput": 12491.235690121868 |
| }, |
| { |
| "epoch": 0.35561407221059727, |
| "grad_norm": 0.07738807797431946, |
| "learning_rate": 0.00018697116533368316, |
| "loss": 8.8928, |
| "step": 22688, |
| "throughput": 12491.025826665942 |
| }, |
| { |
| "epoch": 0.3561156435395262, |
| "grad_norm": 0.08607741445302963, |
| "learning_rate": 0.00018670062217339867, |
| "loss": 8.9262, |
| "step": 22720, |
| "throughput": 12491.256184899139 |
| }, |
| { |
| "epoch": 0.3566172148684551, |
| "grad_norm": 0.08493805676698685, |
| "learning_rate": 0.0001864299895196839, |
| "loss": 8.9354, |
| "step": 22752, |
| "throughput": 12491.3963119095 |
| }, |
| { |
| "epoch": 0.357118786197384, |
| "grad_norm": 0.08784986287355423, |
| "learning_rate": 0.00018615926848862893, |
| "loss": 8.9295, |
| "step": 22784, |
| "throughput": 12491.676705107877 |
| }, |
| { |
| "epoch": 0.3576203575263129, |
| "grad_norm": 0.08774597197771072, |
| "learning_rate": 0.00018588846019668793, |
| "loss": 8.8907, |
| "step": 22816, |
| "throughput": 12491.886952645285 |
| }, |
| { |
| "epoch": 0.3581219288552418, |
| "grad_norm": 0.08723000437021255, |
| "learning_rate": 0.00018561756576067524, |
| "loss": 8.8931, |
| "step": 22848, |
| "throughput": 12491.986990443083 |
| }, |
| { |
| "epoch": 0.3586235001841707, |
| "grad_norm": 0.0842309296131134, |
| "learning_rate": 0.0001853465862977602, |
| "loss": 8.8862, |
| "step": 22880, |
| "throughput": 12492.07097622102 |
| }, |
| { |
| "epoch": 0.35912507151309964, |
| "grad_norm": 0.08362879604101181, |
| "learning_rate": 0.00018507552292546295, |
| "loss": 8.8787, |
| "step": 22912, |
| "throughput": 12492.275117711019 |
| }, |
| { |
| "epoch": 0.35962664284202855, |
| "grad_norm": 0.08330900967121124, |
| "learning_rate": 0.00018480437676164968, |
| "loss": 8.8744, |
| "step": 22944, |
| "throughput": 12492.206883242661 |
| }, |
| { |
| "epoch": 0.3601282141709575, |
| "grad_norm": 0.08307932317256927, |
| "learning_rate": 0.00018453314892452795, |
| "loss": 8.9092, |
| "step": 22976, |
| "throughput": 12492.343599424185 |
| }, |
| { |
| "epoch": 0.36062978549988634, |
| "grad_norm": 0.07551781088113785, |
| "learning_rate": 0.00018426184053264215, |
| "loss": 8.9058, |
| "step": 23008, |
| "throughput": 12492.164113570247 |
| }, |
| { |
| "epoch": 0.36113135682881525, |
| "grad_norm": 0.0835670456290245, |
| "learning_rate": 0.0001839904527048689, |
| "loss": 8.9104, |
| "step": 23040, |
| "throughput": 12492.282754142729 |
| }, |
| { |
| "epoch": 0.36163292815774417, |
| "grad_norm": 0.07876232266426086, |
| "learning_rate": 0.0001837189865604124, |
| "loss": 8.8814, |
| "step": 23072, |
| "throughput": 12492.42812864148 |
| }, |
| { |
| "epoch": 0.3621344994866731, |
| "grad_norm": 0.08294995129108429, |
| "learning_rate": 0.00018344744321879987, |
| "loss": 8.9042, |
| "step": 23104, |
| "throughput": 12492.70628042669 |
| }, |
| { |
| "epoch": 0.362636070815602, |
| "grad_norm": 0.08761091530323029, |
| "learning_rate": 0.0001831758237998768, |
| "loss": 8.8812, |
| "step": 23136, |
| "throughput": 12492.979705813483 |
| }, |
| { |
| "epoch": 0.3631376421445309, |
| "grad_norm": 0.08298665285110474, |
| "learning_rate": 0.00018290412942380252, |
| "loss": 8.8642, |
| "step": 23168, |
| "throughput": 12493.119589836599 |
| }, |
| { |
| "epoch": 0.36363921347345984, |
| "grad_norm": 0.08473809063434601, |
| "learning_rate": 0.00018263236121104543, |
| "loss": 8.8885, |
| "step": 23200, |
| "throughput": 12493.119772385908 |
| }, |
| { |
| "epoch": 0.3641407848023887, |
| "grad_norm": 0.0838628038764, |
| "learning_rate": 0.00018236052028237847, |
| "loss": 8.8879, |
| "step": 23232, |
| "throughput": 12493.347540635039 |
| }, |
| { |
| "epoch": 0.3646423561313176, |
| "grad_norm": 0.08237037807703018, |
| "learning_rate": 0.0001820886077588744, |
| "loss": 8.891, |
| "step": 23264, |
| "throughput": 12493.378674750831 |
| }, |
| { |
| "epoch": 0.36514392746024654, |
| "grad_norm": 0.08084205538034439, |
| "learning_rate": 0.00018181662476190127, |
| "loss": 8.906, |
| "step": 23296, |
| "throughput": 12493.29387889249 |
| }, |
| { |
| "epoch": 0.36564549878917546, |
| "grad_norm": 0.07574167102575302, |
| "learning_rate": 0.00018154457241311773, |
| "loss": 8.8886, |
| "step": 23328, |
| "throughput": 12493.272845143263 |
| }, |
| { |
| "epoch": 0.3661470701181044, |
| "grad_norm": 0.07789778709411621, |
| "learning_rate": 0.00018127245183446858, |
| "loss": 8.8813, |
| "step": 23360, |
| "throughput": 12493.230559544294 |
| }, |
| { |
| "epoch": 0.3666486414470333, |
| "grad_norm": 0.08263697475194931, |
| "learning_rate": 0.00018100026414817987, |
| "loss": 8.8993, |
| "step": 23392, |
| "throughput": 12493.44952418477 |
| }, |
| { |
| "epoch": 0.3671502127759622, |
| "grad_norm": 0.08247614651918411, |
| "learning_rate": 0.00018072801047675432, |
| "loss": 8.8895, |
| "step": 23424, |
| "throughput": 12493.643180969875 |
| }, |
| { |
| "epoch": 0.3676517841048911, |
| "grad_norm": 0.08045326173305511, |
| "learning_rate": 0.00018045569194296697, |
| "loss": 8.8869, |
| "step": 23456, |
| "throughput": 12493.910386986438 |
| }, |
| { |
| "epoch": 0.36815335543382, |
| "grad_norm": 0.08219939470291138, |
| "learning_rate": 0.00018018330966986022, |
| "loss": 8.8781, |
| "step": 23488, |
| "throughput": 12494.108872504368 |
| }, |
| { |
| "epoch": 0.3686549267627489, |
| "grad_norm": 0.0800880491733551, |
| "learning_rate": 0.00017991086478073943, |
| "loss": 8.9001, |
| "step": 23520, |
| "throughput": 12494.105943629875 |
| }, |
| { |
| "epoch": 0.36915649809167783, |
| "grad_norm": 0.08422353863716125, |
| "learning_rate": 0.0001796383583991681, |
| "loss": 8.8848, |
| "step": 23552, |
| "throughput": 12494.258574568401 |
| }, |
| { |
| "epoch": 0.36965806942060675, |
| "grad_norm": 0.07969654351472855, |
| "learning_rate": 0.00017936579164896333, |
| "loss": 8.886, |
| "step": 23584, |
| "throughput": 12494.279822975659 |
| }, |
| { |
| "epoch": 0.37015964074953567, |
| "grad_norm": 0.08507449179887772, |
| "learning_rate": 0.0001790931656541912, |
| "loss": 8.897, |
| "step": 23616, |
| "throughput": 12494.323654439937 |
| }, |
| { |
| "epoch": 0.3706612120784646, |
| "grad_norm": 0.08387662470340729, |
| "learning_rate": 0.00017882048153916214, |
| "loss": 8.9107, |
| "step": 23648, |
| "throughput": 12494.32082459582 |
| }, |
| { |
| "epoch": 0.37116278340739345, |
| "grad_norm": 0.08334920555353165, |
| "learning_rate": 0.00017854774042842626, |
| "loss": 8.9006, |
| "step": 23680, |
| "throughput": 12494.24012124739 |
| }, |
| { |
| "epoch": 0.37166435473632237, |
| "grad_norm": 0.08700676262378693, |
| "learning_rate": 0.00017827494344676873, |
| "loss": 8.8973, |
| "step": 23712, |
| "throughput": 12494.50655693926 |
| }, |
| { |
| "epoch": 0.3721659260652513, |
| "grad_norm": 0.07877275347709656, |
| "learning_rate": 0.000178002091719205, |
| "loss": 8.8749, |
| "step": 23744, |
| "throughput": 12494.635084936986 |
| }, |
| { |
| "epoch": 0.3726674973941802, |
| "grad_norm": 0.08263049274682999, |
| "learning_rate": 0.00017772918637097657, |
| "loss": 8.8834, |
| "step": 23776, |
| "throughput": 12494.894484086817 |
| }, |
| { |
| "epoch": 0.3731690687231091, |
| "grad_norm": 0.08499334007501602, |
| "learning_rate": 0.00017745622852754575, |
| "loss": 8.8862, |
| "step": 23808, |
| "throughput": 12495.071060301598 |
| }, |
| { |
| "epoch": 0.37367064005203804, |
| "grad_norm": 0.08328446745872498, |
| "learning_rate": 0.00017718321931459163, |
| "loss": 8.8783, |
| "step": 23840, |
| "throughput": 12495.137190776335 |
| }, |
| { |
| "epoch": 0.37417221138096696, |
| "grad_norm": 0.08706315606832504, |
| "learning_rate": 0.00017691015985800488, |
| "loss": 8.8969, |
| "step": 23872, |
| "throughput": 12495.214628327643 |
| }, |
| { |
| "epoch": 0.3746737827098958, |
| "grad_norm": 0.08637236803770065, |
| "learning_rate": 0.0001766370512838836, |
| "loss": 8.8764, |
| "step": 23904, |
| "throughput": 12495.361833829736 |
| }, |
| { |
| "epoch": 0.37517535403882474, |
| "grad_norm": 0.08245234936475754, |
| "learning_rate": 0.00017636389471852834, |
| "loss": 8.8771, |
| "step": 23936, |
| "throughput": 12495.349511779505 |
| }, |
| { |
| "epoch": 0.37567692536775366, |
| "grad_norm": 0.07800939679145813, |
| "learning_rate": 0.0001760906912884376, |
| "loss": 8.8775, |
| "step": 23968, |
| "throughput": 12495.340517367329 |
| }, |
| { |
| "epoch": 0.3761784966966826, |
| "grad_norm": 0.08624035865068436, |
| "learning_rate": 0.00017581744212030308, |
| "loss": 8.8739, |
| "step": 24000, |
| "throughput": 12495.316485319103 |
| }, |
| { |
| "epoch": 0.3766800680256115, |
| "grad_norm": 0.08235324919223785, |
| "learning_rate": 0.00017554414834100525, |
| "loss": 8.8649, |
| "step": 24032, |
| "throughput": 12495.474988710415 |
| }, |
| { |
| "epoch": 0.3771816393545404, |
| "grad_norm": 0.08272941410541534, |
| "learning_rate": 0.00017527081107760834, |
| "loss": 8.8668, |
| "step": 24064, |
| "throughput": 12495.685198313075 |
| }, |
| { |
| "epoch": 0.37768321068346933, |
| "grad_norm": 0.0816485658288002, |
| "learning_rate": 0.00017499743145735615, |
| "loss": 8.8801, |
| "step": 24096, |
| "throughput": 12495.873403249487 |
| }, |
| { |
| "epoch": 0.3781847820123982, |
| "grad_norm": 0.08586040884256363, |
| "learning_rate": 0.00017472401060766697, |
| "loss": 8.8779, |
| "step": 24128, |
| "throughput": 12496.129806396117 |
| }, |
| { |
| "epoch": 0.3786863533413271, |
| "grad_norm": 0.07926472276449203, |
| "learning_rate": 0.0001744505496561292, |
| "loss": 8.882, |
| "step": 24160, |
| "throughput": 12496.258584111014 |
| }, |
| { |
| "epoch": 0.379187924670256, |
| "grad_norm": 0.08123722672462463, |
| "learning_rate": 0.00017417704973049668, |
| "loss": 8.8913, |
| "step": 24192, |
| "throughput": 12496.246278915552 |
| }, |
| { |
| "epoch": 0.37968949599918494, |
| "grad_norm": 0.08329039812088013, |
| "learning_rate": 0.00017390351195868385, |
| "loss": 8.8809, |
| "step": 24224, |
| "throughput": 12496.423361994388 |
| }, |
| { |
| "epoch": 0.38019106732811386, |
| "grad_norm": 0.08363816887140274, |
| "learning_rate": 0.00017362993746876135, |
| "loss": 8.879, |
| "step": 24256, |
| "throughput": 12496.36571410399 |
| }, |
| { |
| "epoch": 0.3806926386570428, |
| "grad_norm": 0.07615621387958527, |
| "learning_rate": 0.00017335632738895113, |
| "loss": 8.8774, |
| "step": 24288, |
| "throughput": 12496.385541333166 |
| }, |
| { |
| "epoch": 0.3811942099859717, |
| "grad_norm": 0.08446671068668365, |
| "learning_rate": 0.000173082682847622, |
| "loss": 8.875, |
| "step": 24320, |
| "throughput": 12496.301883161885 |
| }, |
| { |
| "epoch": 0.38169578131490056, |
| "grad_norm": 0.07616794854402542, |
| "learning_rate": 0.0001728090049732848, |
| "loss": 8.8687, |
| "step": 24352, |
| "throughput": 12496.402973511722 |
| }, |
| { |
| "epoch": 0.3821973526438295, |
| "grad_norm": 0.08101163804531097, |
| "learning_rate": 0.00017253529489458802, |
| "loss": 8.8822, |
| "step": 24384, |
| "throughput": 12496.606250274554 |
| }, |
| { |
| "epoch": 0.3826989239727584, |
| "grad_norm": 0.08165282756090164, |
| "learning_rate": 0.00017226155374031271, |
| "loss": 8.8651, |
| "step": 24416, |
| "throughput": 12496.778618678321 |
| }, |
| { |
| "epoch": 0.3832004953016873, |
| "grad_norm": 0.08300849050283432, |
| "learning_rate": 0.0001719877826393683, |
| "loss": 8.8663, |
| "step": 24448, |
| "throughput": 12497.034343131501 |
| }, |
| { |
| "epoch": 0.38370206663061623, |
| "grad_norm": 0.08287367969751358, |
| "learning_rate": 0.00017171398272078752, |
| "loss": 8.8555, |
| "step": 24480, |
| "throughput": 12497.208343750086 |
| }, |
| { |
| "epoch": 0.38420363795954515, |
| "grad_norm": 0.07704202830791473, |
| "learning_rate": 0.00017144015511372208, |
| "loss": 8.8612, |
| "step": 24512, |
| "throughput": 12497.156610468535 |
| }, |
| { |
| "epoch": 0.38470520928847407, |
| "grad_norm": 0.08153821527957916, |
| "learning_rate": 0.00017116630094743792, |
| "loss": 8.8589, |
| "step": 24544, |
| "throughput": 12497.372581395 |
| }, |
| { |
| "epoch": 0.38520678061740293, |
| "grad_norm": 0.0911712720990181, |
| "learning_rate": 0.00017089242135131036, |
| "loss": 8.86, |
| "step": 24576, |
| "throughput": 12497.384800573247 |
| }, |
| { |
| "epoch": 0.38570835194633185, |
| "grad_norm": 0.08098107576370239, |
| "learning_rate": 0.0001706185174548197, |
| "loss": 8.8532, |
| "step": 24608, |
| "throughput": 12496.52204489201 |
| }, |
| { |
| "epoch": 0.38620992327526077, |
| "grad_norm": 0.08704471588134766, |
| "learning_rate": 0.0001703445903875464, |
| "loss": 8.8599, |
| "step": 24640, |
| "throughput": 12496.435891320403 |
| }, |
| { |
| "epoch": 0.3867114946041897, |
| "grad_norm": 0.07287541031837463, |
| "learning_rate": 0.00017007064127916644, |
| "loss": 8.8649, |
| "step": 24672, |
| "throughput": 12496.530306289622 |
| }, |
| { |
| "epoch": 0.3872130659331186, |
| "grad_norm": 0.08605916053056717, |
| "learning_rate": 0.0001697966712594469, |
| "loss": 8.8758, |
| "step": 24704, |
| "throughput": 12496.624926668494 |
| }, |
| { |
| "epoch": 0.3877146372620475, |
| "grad_norm": 0.08018253743648529, |
| "learning_rate": 0.00016952268145824082, |
| "loss": 8.8595, |
| "step": 24736, |
| "throughput": 12496.806147490599 |
| }, |
| { |
| "epoch": 0.38821620859097644, |
| "grad_norm": 0.08926723152399063, |
| "learning_rate": 0.00016924867300548304, |
| "loss": 8.8501, |
| "step": 24768, |
| "throughput": 12497.061754200398 |
| }, |
| { |
| "epoch": 0.3887177799199053, |
| "grad_norm": 0.08146440982818604, |
| "learning_rate": 0.00016897464703118515, |
| "loss": 8.8759, |
| "step": 24800, |
| "throughput": 12497.260149261956 |
| }, |
| { |
| "epoch": 0.3892193512488342, |
| "grad_norm": 0.08535002171993256, |
| "learning_rate": 0.00016870060466543112, |
| "loss": 8.8354, |
| "step": 24832, |
| "throughput": 12497.26649049281 |
| }, |
| { |
| "epoch": 0.38972092257776314, |
| "grad_norm": 0.08504383265972137, |
| "learning_rate": 0.0001684265470383725, |
| "loss": 8.8764, |
| "step": 24864, |
| "throughput": 12497.399025265984 |
| }, |
| { |
| "epoch": 0.39022249390669206, |
| "grad_norm": 0.07920678704977036, |
| "learning_rate": 0.0001681524752802237, |
| "loss": 8.8827, |
| "step": 24896, |
| "throughput": 12497.535226442347 |
| }, |
| { |
| "epoch": 0.390724065235621, |
| "grad_norm": 0.07792805880308151, |
| "learning_rate": 0.00016787839052125758, |
| "loss": 8.8628, |
| "step": 24928, |
| "throughput": 12497.435883965982 |
| }, |
| { |
| "epoch": 0.3912256365645499, |
| "grad_norm": 0.07983972132205963, |
| "learning_rate": 0.00016760429389180037, |
| "loss": 8.879, |
| "step": 24960, |
| "throughput": 12497.395890011567 |
| }, |
| { |
| "epoch": 0.3917272078934788, |
| "grad_norm": 0.07780390232801437, |
| "learning_rate": 0.00016733018652222744, |
| "loss": 8.8389, |
| "step": 24992, |
| "throughput": 12497.41402842221 |
| }, |
| { |
| "epoch": 0.3922287792224077, |
| "grad_norm": 0.07822234183549881, |
| "learning_rate": 0.0001670560695429584, |
| "loss": 8.8569, |
| "step": 25024, |
| "throughput": 12497.555066487259 |
| }, |
| { |
| "epoch": 0.3927303505513366, |
| "grad_norm": 0.08859014511108398, |
| "learning_rate": 0.00016678194408445245, |
| "loss": 8.8311, |
| "step": 25056, |
| "throughput": 12497.761692108925 |
| }, |
| { |
| "epoch": 0.3932319218802655, |
| "grad_norm": 0.08568881452083588, |
| "learning_rate": 0.00016650781127720382, |
| "loss": 8.8668, |
| "step": 25088, |
| "throughput": 12497.92260971887 |
| }, |
| { |
| "epoch": 0.39373349320919443, |
| "grad_norm": 0.08156470954418182, |
| "learning_rate": 0.00016623367225173703, |
| "loss": 8.8681, |
| "step": 25120, |
| "throughput": 12498.166384791184 |
| }, |
| { |
| "epoch": 0.39423506453812335, |
| "grad_norm": 0.0885007381439209, |
| "learning_rate": 0.00016595952813860216, |
| "loss": 8.8592, |
| "step": 25152, |
| "throughput": 12498.292358025039 |
| }, |
| { |
| "epoch": 0.39473663586705227, |
| "grad_norm": 0.08698877692222595, |
| "learning_rate": 0.00016568538006837046, |
| "loss": 8.8666, |
| "step": 25184, |
| "throughput": 12498.292639086796 |
| }, |
| { |
| "epoch": 0.3952382071959812, |
| "grad_norm": 0.0833880603313446, |
| "learning_rate": 0.00016541122917162934, |
| "loss": 8.8482, |
| "step": 25216, |
| "throughput": 12498.369864553024 |
| }, |
| { |
| "epoch": 0.39573977852491005, |
| "grad_norm": 0.07714637368917465, |
| "learning_rate": 0.00016513707657897785, |
| "loss": 8.877, |
| "step": 25248, |
| "throughput": 12498.344570077656 |
| }, |
| { |
| "epoch": 0.39624134985383896, |
| "grad_norm": 0.07872116565704346, |
| "learning_rate": 0.00016486292342102215, |
| "loss": 8.857, |
| "step": 25280, |
| "throughput": 12498.259394281939 |
| }, |
| { |
| "epoch": 0.3967429211827679, |
| "grad_norm": 0.0763251781463623, |
| "learning_rate": 0.0001645887708283707, |
| "loss": 8.8524, |
| "step": 25312, |
| "throughput": 12498.192551632414 |
| }, |
| { |
| "epoch": 0.3972444925116968, |
| "grad_norm": 0.0815349817276001, |
| "learning_rate": 0.00016431461993162954, |
| "loss": 8.8548, |
| "step": 25344, |
| "throughput": 12498.342362005955 |
| }, |
| { |
| "epoch": 0.3977460638406257, |
| "grad_norm": 0.08260805159807205, |
| "learning_rate": 0.00016404047186139784, |
| "loss": 8.8503, |
| "step": 25376, |
| "throughput": 12498.53850460718 |
| }, |
| { |
| "epoch": 0.39824763516955464, |
| "grad_norm": 0.08485168218612671, |
| "learning_rate": 0.00016376632774826297, |
| "loss": 8.8444, |
| "step": 25408, |
| "throughput": 12498.708249727906 |
| }, |
| { |
| "epoch": 0.39874920649848355, |
| "grad_norm": 0.07725406438112259, |
| "learning_rate": 0.0001634921887227962, |
| "loss": 8.8515, |
| "step": 25440, |
| "throughput": 12498.95239798028 |
| }, |
| { |
| "epoch": 0.3992507778274124, |
| "grad_norm": 0.08141039311885834, |
| "learning_rate": 0.00016321805591554755, |
| "loss": 8.845, |
| "step": 25472, |
| "throughput": 12499.127438050664 |
| }, |
| { |
| "epoch": 0.39975234915634134, |
| "grad_norm": 0.0798976719379425, |
| "learning_rate": 0.00016294393045704163, |
| "loss": 8.8547, |
| "step": 25504, |
| "throughput": 12499.066997003883 |
| }, |
| { |
| "epoch": 0.40025392048527025, |
| "grad_norm": 0.08230122923851013, |
| "learning_rate": 0.00016266981347777255, |
| "loss": 8.8483, |
| "step": 25536, |
| "throughput": 12499.249046093964 |
| }, |
| { |
| "epoch": 0.40075549181419917, |
| "grad_norm": 0.07611089199781418, |
| "learning_rate": 0.00016239570610819963, |
| "loss": 8.8369, |
| "step": 25568, |
| "throughput": 12499.307959676233 |
| }, |
| { |
| "epoch": 0.4012570631431281, |
| "grad_norm": 0.08280283212661743, |
| "learning_rate": 0.00016212160947874242, |
| "loss": 8.8398, |
| "step": 25600, |
| "throughput": 12499.148777309101 |
| }, |
| { |
| "epoch": 0.401758634472057, |
| "grad_norm": 0.09161941707134247, |
| "learning_rate": 0.00016184752471977627, |
| "loss": 8.839, |
| "step": 25632, |
| "throughput": 12499.14721527316 |
| }, |
| { |
| "epoch": 0.4022602058009859, |
| "grad_norm": 0.0951690599322319, |
| "learning_rate": 0.0001615734529616275, |
| "loss": 8.8305, |
| "step": 25664, |
| "throughput": 12499.27000679289 |
| }, |
| { |
| "epoch": 0.4027617771299148, |
| "grad_norm": 0.0823458731174469, |
| "learning_rate": 0.00016129939533456888, |
| "loss": 8.8506, |
| "step": 25696, |
| "throughput": 12499.367547398746 |
| }, |
| { |
| "epoch": 0.4032633484588437, |
| "grad_norm": 0.08224660158157349, |
| "learning_rate": 0.00016102535296881485, |
| "loss": 8.8392, |
| "step": 25728, |
| "throughput": 12499.606853458625 |
| }, |
| { |
| "epoch": 0.4037649197877726, |
| "grad_norm": 0.07632069289684296, |
| "learning_rate": 0.00016075132699451701, |
| "loss": 8.8528, |
| "step": 25760, |
| "throughput": 12499.774179612356 |
| }, |
| { |
| "epoch": 0.40426649111670154, |
| "grad_norm": 0.08515363931655884, |
| "learning_rate": 0.00016047731854175917, |
| "loss": 8.8424, |
| "step": 25792, |
| "throughput": 12499.938706886793 |
| }, |
| { |
| "epoch": 0.40476806244563046, |
| "grad_norm": 0.0868089571595192, |
| "learning_rate": 0.00016020332874055313, |
| "loss": 8.8523, |
| "step": 25824, |
| "throughput": 12499.876721209184 |
| }, |
| { |
| "epoch": 0.4052696337745594, |
| "grad_norm": 0.08290804177522659, |
| "learning_rate": 0.00015992935872083356, |
| "loss": 8.8574, |
| "step": 25856, |
| "throughput": 12500.070181450272 |
| }, |
| { |
| "epoch": 0.4057712051034883, |
| "grad_norm": 0.10937785357236862, |
| "learning_rate": 0.00015965540961245363, |
| "loss": 8.8286, |
| "step": 25888, |
| "throughput": 12500.06492258528 |
| }, |
| { |
| "epoch": 0.40627277643241716, |
| "grad_norm": 0.08178723603487015, |
| "learning_rate": 0.0001593814825451803, |
| "loss": 8.8433, |
| "step": 25920, |
| "throughput": 12500.081162426208 |
| }, |
| { |
| "epoch": 0.4067743477613461, |
| "grad_norm": 0.08957145363092422, |
| "learning_rate": 0.00015910757864868967, |
| "loss": 8.8489, |
| "step": 25952, |
| "throughput": 12499.956948132949 |
| }, |
| { |
| "epoch": 0.407275919090275, |
| "grad_norm": 0.07651602476835251, |
| "learning_rate": 0.0001588336990525621, |
| "loss": 8.8528, |
| "step": 25984, |
| "throughput": 12500.06247363665 |
| }, |
| { |
| "epoch": 0.4077774904192039, |
| "grad_norm": 0.08387041836977005, |
| "learning_rate": 0.00015855984488627792, |
| "loss": 8.8567, |
| "step": 26016, |
| "throughput": 12500.208528946365 |
| }, |
| { |
| "epoch": 0.40827906174813283, |
| "grad_norm": 0.07692224532365799, |
| "learning_rate": 0.00015828601727921248, |
| "loss": 8.8327, |
| "step": 26048, |
| "throughput": 12500.392543525437 |
| }, |
| { |
| "epoch": 0.40878063307706175, |
| "grad_norm": 0.08937270939350128, |
| "learning_rate": 0.0001580122173606317, |
| "loss": 8.8506, |
| "step": 26080, |
| "throughput": 12500.54759119154 |
| }, |
| { |
| "epoch": 0.40928220440599067, |
| "grad_norm": 0.0803786963224411, |
| "learning_rate": 0.00015773844625968726, |
| "loss": 8.8404, |
| "step": 26112, |
| "throughput": 12500.779817177776 |
| }, |
| { |
| "epoch": 0.40978377573491953, |
| "grad_norm": 0.07738781720399857, |
| "learning_rate": 0.00015746470510541197, |
| "loss": 8.815, |
| "step": 26144, |
| "throughput": 12500.808397361501 |
| }, |
| { |
| "epoch": 0.41028534706384845, |
| "grad_norm": 0.09304353594779968, |
| "learning_rate": 0.00015719099502671516, |
| "loss": 8.8271, |
| "step": 26176, |
| "throughput": 12500.886080461782 |
| }, |
| { |
| "epoch": 0.41078691839277737, |
| "grad_norm": 0.08904910832643509, |
| "learning_rate": 0.00015691731715237802, |
| "loss": 8.8203, |
| "step": 26208, |
| "throughput": 12500.956352095312 |
| }, |
| { |
| "epoch": 0.4112884897217063, |
| "grad_norm": 0.07861921936273575, |
| "learning_rate": 0.00015664367261104887, |
| "loss": 8.8447, |
| "step": 26240, |
| "throughput": 12500.905992308413 |
| }, |
| { |
| "epoch": 0.4117900610506352, |
| "grad_norm": 0.08831379562616348, |
| "learning_rate": 0.00015637006253123865, |
| "loss": 8.834, |
| "step": 26272, |
| "throughput": 12500.824906221855 |
| }, |
| { |
| "epoch": 0.4122916323795641, |
| "grad_norm": 0.08581741154193878, |
| "learning_rate": 0.00015609648804131612, |
| "loss": 8.8145, |
| "step": 26304, |
| "throughput": 12500.845637212557 |
| }, |
| { |
| "epoch": 0.41279320370849304, |
| "grad_norm": 0.08051755279302597, |
| "learning_rate": 0.00015582295026950332, |
| "loss": 8.8362, |
| "step": 26336, |
| "throughput": 12500.988015783307 |
| }, |
| { |
| "epoch": 0.4132947750374219, |
| "grad_norm": 0.08957970142364502, |
| "learning_rate": 0.00015554945034387075, |
| "loss": 8.8285, |
| "step": 26368, |
| "throughput": 12501.173553645169 |
| }, |
| { |
| "epoch": 0.4137963463663508, |
| "grad_norm": 0.07820248603820801, |
| "learning_rate": 0.00015527598939233303, |
| "loss": 8.8344, |
| "step": 26400, |
| "throughput": 12501.333332567217 |
| }, |
| { |
| "epoch": 0.41429791769527974, |
| "grad_norm": 0.07684613764286041, |
| "learning_rate": 0.00015500256854264385, |
| "loss": 8.8242, |
| "step": 26432, |
| "throughput": 12501.561419958342 |
| }, |
| { |
| "epoch": 0.41479948902420866, |
| "grad_norm": 0.08777330070734024, |
| "learning_rate": 0.00015472918892239166, |
| "loss": 8.8374, |
| "step": 26464, |
| "throughput": 12501.681715732624 |
| }, |
| { |
| "epoch": 0.4153010603531376, |
| "grad_norm": 0.08308760821819305, |
| "learning_rate": 0.00015445585165899475, |
| "loss": 8.8335, |
| "step": 26496, |
| "throughput": 12501.681390072552 |
| }, |
| { |
| "epoch": 0.4158026316820665, |
| "grad_norm": 0.0822068527340889, |
| "learning_rate": 0.00015418255787969692, |
| "loss": 8.8303, |
| "step": 26528, |
| "throughput": 12501.842470612857 |
| }, |
| { |
| "epoch": 0.4163042030109954, |
| "grad_norm": 0.07649709284305573, |
| "learning_rate": 0.0001539093087115624, |
| "loss": 8.8497, |
| "step": 26560, |
| "throughput": 12501.80819770379 |
| }, |
| { |
| "epoch": 0.4168057743399243, |
| "grad_norm": 0.08079321682453156, |
| "learning_rate": 0.00015363610528147163, |
| "loss": 8.8393, |
| "step": 26592, |
| "throughput": 12501.674214507679 |
| }, |
| { |
| "epoch": 0.4173073456688532, |
| "grad_norm": 0.08345367014408112, |
| "learning_rate": 0.00015336294871611637, |
| "loss": 8.8202, |
| "step": 26624, |
| "throughput": 12501.62953861138 |
| }, |
| { |
| "epoch": 0.4178089169977821, |
| "grad_norm": 0.07904289662837982, |
| "learning_rate": 0.00015308984014199511, |
| "loss": 8.8326, |
| "step": 26656, |
| "throughput": 12501.080528407167 |
| }, |
| { |
| "epoch": 0.418310488326711, |
| "grad_norm": 0.08403212577104568, |
| "learning_rate": 0.00015281678068540836, |
| "loss": 8.8156, |
| "step": 26688, |
| "throughput": 12501.172306996934 |
| }, |
| { |
| "epoch": 0.41881205965563995, |
| "grad_norm": 0.08107221871614456, |
| "learning_rate": 0.00015254377147245424, |
| "loss": 8.8123, |
| "step": 26720, |
| "throughput": 12501.392173340688 |
| }, |
| { |
| "epoch": 0.41931363098456886, |
| "grad_norm": 0.08362643420696259, |
| "learning_rate": 0.00015227081362902343, |
| "loss": 8.8411, |
| "step": 26752, |
| "throughput": 12501.543097902952 |
| }, |
| { |
| "epoch": 0.4198152023134978, |
| "grad_norm": 0.08034635335206985, |
| "learning_rate": 0.000151997908280795, |
| "loss": 8.8134, |
| "step": 26784, |
| "throughput": 12501.628132213427 |
| }, |
| { |
| "epoch": 0.42031677364242664, |
| "grad_norm": 0.0822720155119896, |
| "learning_rate": 0.0001517250565532313, |
| "loss": 8.8242, |
| "step": 26816, |
| "throughput": 12501.645734944539 |
| }, |
| { |
| "epoch": 0.42081834497135556, |
| "grad_norm": 0.08424656093120575, |
| "learning_rate": 0.00015145225957157373, |
| "loss": 8.8146, |
| "step": 26848, |
| "throughput": 12501.785904689688 |
| }, |
| { |
| "epoch": 0.4213199163002845, |
| "grad_norm": 0.07683435082435608, |
| "learning_rate": 0.00015117951846083786, |
| "loss": 8.8272, |
| "step": 26880, |
| "throughput": 12501.788421695752 |
| }, |
| { |
| "epoch": 0.4218214876292134, |
| "grad_norm": 0.08346127718687057, |
| "learning_rate": 0.0001509068343458088, |
| "loss": 8.8126, |
| "step": 26912, |
| "throughput": 12501.779378240033 |
| }, |
| { |
| "epoch": 0.4223230589581423, |
| "grad_norm": 0.07858198881149292, |
| "learning_rate": 0.00015063420835103667, |
| "loss": 8.8373, |
| "step": 26944, |
| "throughput": 12501.67446416126 |
| }, |
| { |
| "epoch": 0.42282463028707123, |
| "grad_norm": 0.07885950058698654, |
| "learning_rate": 0.0001503616416008319, |
| "loss": 8.8379, |
| "step": 26976, |
| "throughput": 12501.74871067608 |
| }, |
| { |
| "epoch": 0.42332620161600015, |
| "grad_norm": 0.08099333196878433, |
| "learning_rate": 0.00015008913521926052, |
| "loss": 8.8305, |
| "step": 27008, |
| "throughput": 12501.839545957073 |
| }, |
| { |
| "epoch": 0.423827772944929, |
| "grad_norm": 0.08029799163341522, |
| "learning_rate": 0.00014981669033013972, |
| "loss": 8.8205, |
| "step": 27040, |
| "throughput": 12502.066045863343 |
| }, |
| { |
| "epoch": 0.42432934427385793, |
| "grad_norm": 0.08457601815462112, |
| "learning_rate": 0.00014954430805703302, |
| "loss": 8.8005, |
| "step": 27072, |
| "throughput": 12502.214232006745 |
| }, |
| { |
| "epoch": 0.42483091560278685, |
| "grad_norm": 0.08561043441295624, |
| "learning_rate": 0.00014927198952324568, |
| "loss": 8.7984, |
| "step": 27104, |
| "throughput": 12502.378735296224 |
| }, |
| { |
| "epoch": 0.42533248693171577, |
| "grad_norm": 0.08229973167181015, |
| "learning_rate": 0.00014899973585182012, |
| "loss": 8.828, |
| "step": 27136, |
| "throughput": 12502.534067644634 |
| }, |
| { |
| "epoch": 0.4258340582606447, |
| "grad_norm": 0.08372906595468521, |
| "learning_rate": 0.00014872754816553141, |
| "loss": 8.805, |
| "step": 27168, |
| "throughput": 12502.498069887373 |
| }, |
| { |
| "epoch": 0.4263356295895736, |
| "grad_norm": 0.08462665230035782, |
| "learning_rate": 0.00014845542758688222, |
| "loss": 8.8426, |
| "step": 27200, |
| "throughput": 12502.522762260758 |
| }, |
| { |
| "epoch": 0.42683720091850247, |
| "grad_norm": 0.07806643843650818, |
| "learning_rate": 0.00014818337523809876, |
| "loss": 8.824, |
| "step": 27232, |
| "throughput": 12502.55702013303 |
| }, |
| { |
| "epoch": 0.4273387722474314, |
| "grad_norm": 0.08268404752016068, |
| "learning_rate": 0.0001479113922411256, |
| "loss": 8.8181, |
| "step": 27264, |
| "throughput": 12502.444745720913 |
| }, |
| { |
| "epoch": 0.4278403435763603, |
| "grad_norm": 0.07644987851381302, |
| "learning_rate": 0.00014763947971762153, |
| "loss": 8.8116, |
| "step": 27296, |
| "throughput": 12502.521799841566 |
| }, |
| { |
| "epoch": 0.4283419149052892, |
| "grad_norm": 0.07754526287317276, |
| "learning_rate": 0.00014736763878895457, |
| "loss": 8.8104, |
| "step": 27328, |
| "throughput": 12502.750098933497 |
| }, |
| { |
| "epoch": 0.42884348623421814, |
| "grad_norm": 0.08302488178014755, |
| "learning_rate": 0.00014709587057619748, |
| "loss": 8.8339, |
| "step": 27360, |
| "throughput": 12502.846138213567 |
| }, |
| { |
| "epoch": 0.42934505756314706, |
| "grad_norm": 0.0887962132692337, |
| "learning_rate": 0.0001468241762001232, |
| "loss": 8.8151, |
| "step": 27392, |
| "throughput": 12502.98997233486 |
| }, |
| { |
| "epoch": 0.429846628892076, |
| "grad_norm": 0.07846581190824509, |
| "learning_rate": 0.00014655255678120015, |
| "loss": 8.8135, |
| "step": 27424, |
| "throughput": 12503.21332257332 |
| }, |
| { |
| "epoch": 0.43034820022100484, |
| "grad_norm": 0.07606692612171173, |
| "learning_rate": 0.0001462810134395876, |
| "loss": 8.8011, |
| "step": 27456, |
| "throughput": 12503.302442524873 |
| }, |
| { |
| "epoch": 0.43084977154993376, |
| "grad_norm": 0.08284489810466766, |
| "learning_rate": 0.0001460095472951311, |
| "loss": 8.834, |
| "step": 27488, |
| "throughput": 12503.297877481697 |
| }, |
| { |
| "epoch": 0.4313513428788627, |
| "grad_norm": 0.08123686164617538, |
| "learning_rate": 0.0001457381594673579, |
| "loss": 8.8202, |
| "step": 27520, |
| "throughput": 12503.346816106572 |
| }, |
| { |
| "epoch": 0.4318529142077916, |
| "grad_norm": 0.08661215752363205, |
| "learning_rate": 0.00014546685107547205, |
| "loss": 8.813, |
| "step": 27552, |
| "throughput": 12503.391456162526 |
| }, |
| { |
| "epoch": 0.4323544855367205, |
| "grad_norm": 0.07993298023939133, |
| "learning_rate": 0.00014519562323835034, |
| "loss": 8.8252, |
| "step": 27584, |
| "throughput": 12503.144877970894 |
| }, |
| { |
| "epoch": 0.43285605686564943, |
| "grad_norm": 0.08379069715738297, |
| "learning_rate": 0.000144924477074537, |
| "loss": 8.8113, |
| "step": 27616, |
| "throughput": 12503.21132642366 |
| }, |
| { |
| "epoch": 0.43335762819457835, |
| "grad_norm": 0.07715869694948196, |
| "learning_rate": 0.00014465341370223977, |
| "loss": 8.8169, |
| "step": 27648, |
| "throughput": 12503.427370544727 |
| }, |
| { |
| "epoch": 0.4338591995235072, |
| "grad_norm": 0.07746557891368866, |
| "learning_rate": 0.00014438243423932476, |
| "loss": 8.7993, |
| "step": 27680, |
| "throughput": 12503.528196923007 |
| }, |
| { |
| "epoch": 0.43436077085243613, |
| "grad_norm": 0.08105004578828812, |
| "learning_rate": 0.00014411153980331198, |
| "loss": 8.8118, |
| "step": 27712, |
| "throughput": 12503.745351961994 |
| }, |
| { |
| "epoch": 0.43486234218136505, |
| "grad_norm": 0.07802123576402664, |
| "learning_rate": 0.00014384073151137104, |
| "loss": 8.7873, |
| "step": 27744, |
| "throughput": 12503.894860879096 |
| }, |
| { |
| "epoch": 0.43536391351029397, |
| "grad_norm": 0.0854947492480278, |
| "learning_rate": 0.00014357001048031603, |
| "loss": 8.8103, |
| "step": 27776, |
| "throughput": 12503.983286527413 |
| }, |
| { |
| "epoch": 0.4358654848392229, |
| "grad_norm": 0.08572660386562347, |
| "learning_rate": 0.00014329937782660136, |
| "loss": 8.8024, |
| "step": 27808, |
| "throughput": 12503.977311837294 |
| }, |
| { |
| "epoch": 0.4363670561681518, |
| "grad_norm": 0.08063625544309616, |
| "learning_rate": 0.00014302883466631676, |
| "loss": 8.8278, |
| "step": 27840, |
| "throughput": 12504.113899698337 |
| }, |
| { |
| "epoch": 0.4368686274970807, |
| "grad_norm": 0.08082283288240433, |
| "learning_rate": 0.0001427583821151832, |
| "loss": 8.8121, |
| "step": 27872, |
| "throughput": 12504.080774410691 |
| }, |
| { |
| "epoch": 0.4373701988260096, |
| "grad_norm": 0.07952408492565155, |
| "learning_rate": 0.0001424880212885477, |
| "loss": 8.8099, |
| "step": 27904, |
| "throughput": 12503.973240062027 |
| }, |
| { |
| "epoch": 0.4378717701549385, |
| "grad_norm": 0.08380618691444397, |
| "learning_rate": 0.0001422177533013791, |
| "loss": 8.8345, |
| "step": 27936, |
| "throughput": 12504.020545306383 |
| }, |
| { |
| "epoch": 0.4383733414838674, |
| "grad_norm": 0.0780053436756134, |
| "learning_rate": 0.00014194757926826342, |
| "loss": 8.8071, |
| "step": 27968, |
| "throughput": 12504.18453474137 |
| }, |
| { |
| "epoch": 0.43887491281279634, |
| "grad_norm": 0.0810895636677742, |
| "learning_rate": 0.00014167750030339915, |
| "loss": 8.8046, |
| "step": 28000, |
| "throughput": 12504.34025809574 |
| }, |
| { |
| "epoch": 0.43937648414172525, |
| "grad_norm": 0.07910797744989395, |
| "learning_rate": 0.00014140751752059278, |
| "loss": 8.7905, |
| "step": 28032, |
| "throughput": 12504.500546568357 |
| }, |
| { |
| "epoch": 0.4398780554706542, |
| "grad_norm": 0.07732567191123962, |
| "learning_rate": 0.0001411376320332541, |
| "loss": 8.8035, |
| "step": 28064, |
| "throughput": 12504.638004289523 |
| }, |
| { |
| "epoch": 0.4403796267995831, |
| "grad_norm": 0.0854324921965599, |
| "learning_rate": 0.0001408678449543916, |
| "loss": 8.8217, |
| "step": 28096, |
| "throughput": 12504.787997647045 |
| }, |
| { |
| "epoch": 0.44088119812851195, |
| "grad_norm": 0.08483397215604782, |
| "learning_rate": 0.00014059815739660806, |
| "loss": 8.7962, |
| "step": 28128, |
| "throughput": 12504.872527000663 |
| }, |
| { |
| "epoch": 0.44138276945744087, |
| "grad_norm": 0.08548751473426819, |
| "learning_rate": 0.00014032857047209573, |
| "loss": 8.8012, |
| "step": 28160, |
| "throughput": 12504.923092080882 |
| }, |
| { |
| "epoch": 0.4418843407863698, |
| "grad_norm": 0.08244451135396957, |
| "learning_rate": 0.0001400590852926319, |
| "loss": 8.799, |
| "step": 28192, |
| "throughput": 12504.924302578516 |
| }, |
| { |
| "epoch": 0.4423859121152987, |
| "grad_norm": 0.08533225208520889, |
| "learning_rate": 0.00013978970296957423, |
| "loss": 8.7928, |
| "step": 28224, |
| "throughput": 12504.876186012274 |
| }, |
| { |
| "epoch": 0.4428874834442276, |
| "grad_norm": 0.07828489691019058, |
| "learning_rate": 0.00013952042461385625, |
| "loss": 8.7974, |
| "step": 28256, |
| "throughput": 12504.820262170877 |
| }, |
| { |
| "epoch": 0.44338905477315654, |
| "grad_norm": 0.08020341396331787, |
| "learning_rate": 0.00013925125133598266, |
| "loss": 8.8, |
| "step": 28288, |
| "throughput": 12504.884078631061 |
| }, |
| { |
| "epoch": 0.44389062610208546, |
| "grad_norm": 0.08242405205965042, |
| "learning_rate": 0.0001389821842460249, |
| "loss": 8.8036, |
| "step": 28320, |
| "throughput": 12505.056763796627 |
| }, |
| { |
| "epoch": 0.4443921974310143, |
| "grad_norm": 0.08134979009628296, |
| "learning_rate": 0.00013871322445361642, |
| "loss": 8.8092, |
| "step": 28352, |
| "throughput": 12505.178490608681 |
| }, |
| { |
| "epoch": 0.44489376875994324, |
| "grad_norm": 0.0873064249753952, |
| "learning_rate": 0.00013844437306794822, |
| "loss": 8.8024, |
| "step": 28384, |
| "throughput": 12505.394171807027 |
| }, |
| { |
| "epoch": 0.44539534008887216, |
| "grad_norm": 0.07534735649824142, |
| "learning_rate": 0.00013817563119776415, |
| "loss": 8.781, |
| "step": 28416, |
| "throughput": 12505.479880378469 |
| }, |
| { |
| "epoch": 0.4458969114178011, |
| "grad_norm": 0.08811984956264496, |
| "learning_rate": 0.00013790699995135658, |
| "loss": 8.7893, |
| "step": 28448, |
| "throughput": 12505.621916456059 |
| }, |
| { |
| "epoch": 0.44639848274673, |
| "grad_norm": 0.08254732936620712, |
| "learning_rate": 0.00013763848043656148, |
| "loss": 8.8034, |
| "step": 28480, |
| "throughput": 12505.628225728338 |
| }, |
| { |
| "epoch": 0.4469000540756589, |
| "grad_norm": 0.08350877463817596, |
| "learning_rate": 0.00013737007376075414, |
| "loss": 8.7836, |
| "step": 28512, |
| "throughput": 12505.672086429726 |
| }, |
| { |
| "epoch": 0.44740162540458783, |
| "grad_norm": 0.08083383738994598, |
| "learning_rate": 0.0001371017810308445, |
| "loss": 8.7824, |
| "step": 28544, |
| "throughput": 12505.701112095701 |
| }, |
| { |
| "epoch": 0.4479031967335167, |
| "grad_norm": 0.08054768294095993, |
| "learning_rate": 0.00013683360335327264, |
| "loss": 8.8157, |
| "step": 28576, |
| "throughput": 12505.449680561696 |
| }, |
| { |
| "epoch": 0.4484047680624456, |
| "grad_norm": 0.08552040904760361, |
| "learning_rate": 0.000136565541834004, |
| "loss": 8.792, |
| "step": 28608, |
| "throughput": 12505.508398606178 |
| }, |
| { |
| "epoch": 0.44890633939137453, |
| "grad_norm": 0.08794131875038147, |
| "learning_rate": 0.00013629759757852512, |
| "loss": 8.7987, |
| "step": 28640, |
| "throughput": 12505.669945796693 |
| }, |
| { |
| "epoch": 0.44940791072030345, |
| "grad_norm": 0.08450362831354141, |
| "learning_rate": 0.00013602977169183884, |
| "loss": 8.7623, |
| "step": 28672, |
| "throughput": 12505.776475800698 |
| }, |
| { |
| "epoch": 0.44990948204923237, |
| "grad_norm": 0.08908046782016754, |
| "learning_rate": 0.00013576206527846004, |
| "loss": 8.7837, |
| "step": 28704, |
| "throughput": 12505.181770324087 |
| }, |
| { |
| "epoch": 0.4504110533781613, |
| "grad_norm": 0.08419577032327652, |
| "learning_rate": 0.00013549447944241066, |
| "loss": 8.7983, |
| "step": 28736, |
| "throughput": 12505.265880752857 |
| }, |
| { |
| "epoch": 0.4509126247070902, |
| "grad_norm": 0.07569599896669388, |
| "learning_rate": 0.00013522701528721553, |
| "loss": 8.7892, |
| "step": 28768, |
| "throughput": 12505.406476236294 |
| }, |
| { |
| "epoch": 0.45141419603601907, |
| "grad_norm": 0.07981006801128387, |
| "learning_rate": 0.00013495967391589757, |
| "loss": 8.7834, |
| "step": 28800, |
| "throughput": 12505.43962817237 |
| }, |
| { |
| "epoch": 0.451915767364948, |
| "grad_norm": 0.08539936691522598, |
| "learning_rate": 0.00013469245643097345, |
| "loss": 8.785, |
| "step": 28832, |
| "throughput": 12505.481130315899 |
| }, |
| { |
| "epoch": 0.4524173386938769, |
| "grad_norm": 0.09472446888685226, |
| "learning_rate": 0.0001344253639344488, |
| "loss": 8.7916, |
| "step": 28864, |
| "throughput": 12505.544241528234 |
| }, |
| { |
| "epoch": 0.4529189100228058, |
| "grad_norm": 0.08921679109334946, |
| "learning_rate": 0.00013415839752781392, |
| "loss": 8.7943, |
| "step": 28896, |
| "throughput": 12505.45157406719 |
| }, |
| { |
| "epoch": 0.45342048135173474, |
| "grad_norm": 0.07996971905231476, |
| "learning_rate": 0.00013389155831203904, |
| "loss": 8.7959, |
| "step": 28928, |
| "throughput": 12505.422222633157 |
| }, |
| { |
| "epoch": 0.45392205268066366, |
| "grad_norm": 0.08380214124917984, |
| "learning_rate": 0.0001336248473875699, |
| "loss": 8.7908, |
| "step": 28960, |
| "throughput": 12505.626056080559 |
| }, |
| { |
| "epoch": 0.4544236240095926, |
| "grad_norm": 0.08199769258499146, |
| "learning_rate": 0.00013335826585432313, |
| "loss": 8.781, |
| "step": 28992, |
| "throughput": 12505.795266845515 |
| }, |
| { |
| "epoch": 0.45492519533852144, |
| "grad_norm": 0.0836104303598404, |
| "learning_rate": 0.00013309181481168173, |
| "loss": 8.7806, |
| "step": 29024, |
| "throughput": 12505.904108385801 |
| }, |
| { |
| "epoch": 0.45542676666745036, |
| "grad_norm": 0.08294175565242767, |
| "learning_rate": 0.00013282549535849065, |
| "loss": 8.7792, |
| "step": 29056, |
| "throughput": 12506.039652361887 |
| }, |
| { |
| "epoch": 0.4559283379963793, |
| "grad_norm": 0.08239220827817917, |
| "learning_rate": 0.00013255930859305205, |
| "loss": 8.7722, |
| "step": 29088, |
| "throughput": 12506.181981745243 |
| }, |
| { |
| "epoch": 0.4564299093253082, |
| "grad_norm": 0.08480555564165115, |
| "learning_rate": 0.000132293255613121, |
| "loss": 8.7934, |
| "step": 29120, |
| "throughput": 12506.28065800319 |
| }, |
| { |
| "epoch": 0.4569314806542371, |
| "grad_norm": 0.07948991656303406, |
| "learning_rate": 0.00013202733751590067, |
| "loss": 8.7755, |
| "step": 29152, |
| "throughput": 12506.191430123663 |
| }, |
| { |
| "epoch": 0.45743305198316603, |
| "grad_norm": 0.07923830300569534, |
| "learning_rate": 0.00013176155539803818, |
| "loss": 8.7846, |
| "step": 29184, |
| "throughput": 12506.31910845052 |
| }, |
| { |
| "epoch": 0.45793462331209495, |
| "grad_norm": 0.08175349235534668, |
| "learning_rate": 0.00013149591035561977, |
| "loss": 8.7768, |
| "step": 29216, |
| "throughput": 12506.16222401864 |
| }, |
| { |
| "epoch": 0.4584361946410238, |
| "grad_norm": 0.07527513056993484, |
| "learning_rate": 0.00013123040348416633, |
| "loss": 8.7737, |
| "step": 29248, |
| "throughput": 12506.083389719804 |
| }, |
| { |
| "epoch": 0.4589377659699527, |
| "grad_norm": 0.08060269057750702, |
| "learning_rate": 0.00013096503587862906, |
| "loss": 8.7995, |
| "step": 29280, |
| "throughput": 12506.285669988638 |
| }, |
| { |
| "epoch": 0.45943933729888164, |
| "grad_norm": 0.08061324805021286, |
| "learning_rate": 0.00013069980863338466, |
| "loss": 8.7762, |
| "step": 29312, |
| "throughput": 12506.460119233994 |
| }, |
| { |
| "epoch": 0.45994090862781056, |
| "grad_norm": 0.07880888134241104, |
| "learning_rate": 0.00013043472284223113, |
| "loss": 8.788, |
| "step": 29344, |
| "throughput": 12506.569670524006 |
| }, |
| { |
| "epoch": 0.4604424799567395, |
| "grad_norm": 0.08075280487537384, |
| "learning_rate": 0.00013016977959838305, |
| "loss": 8.7876, |
| "step": 29376, |
| "throughput": 12506.773916148955 |
| }, |
| { |
| "epoch": 0.4609440512856684, |
| "grad_norm": 0.08283714205026627, |
| "learning_rate": 0.00012990497999446714, |
| "loss": 8.7854, |
| "step": 29408, |
| "throughput": 12506.853457567413 |
| }, |
| { |
| "epoch": 0.4614456226145973, |
| "grad_norm": 0.07776404172182083, |
| "learning_rate": 0.00012964032512251773, |
| "loss": 8.7846, |
| "step": 29440, |
| "throughput": 12506.998261842957 |
| }, |
| { |
| "epoch": 0.4619471939435262, |
| "grad_norm": 0.0844292938709259, |
| "learning_rate": 0.00012937581607397236, |
| "loss": 8.7803, |
| "step": 29472, |
| "throughput": 12506.968712999418 |
| }, |
| { |
| "epoch": 0.4624487652724551, |
| "grad_norm": 0.07433100044727325, |
| "learning_rate": 0.00012911145393966703, |
| "loss": 8.7934, |
| "step": 29504, |
| "throughput": 12507.010283981752 |
| }, |
| { |
| "epoch": 0.462950336601384, |
| "grad_norm": 0.08090733736753464, |
| "learning_rate": 0.00012884723980983206, |
| "loss": 8.7857, |
| "step": 29536, |
| "throughput": 12506.893742771588 |
| }, |
| { |
| "epoch": 0.46345190793031293, |
| "grad_norm": 0.07989344745874405, |
| "learning_rate": 0.00012858317477408728, |
| "loss": 8.79, |
| "step": 29568, |
| "throughput": 12506.84533351148 |
| }, |
| { |
| "epoch": 0.46395347925924185, |
| "grad_norm": 0.07905034720897675, |
| "learning_rate": 0.00012831925992143765, |
| "loss": 8.7932, |
| "step": 29600, |
| "throughput": 12506.91298107222 |
| }, |
| { |
| "epoch": 0.46445505058817077, |
| "grad_norm": 0.07559997588396072, |
| "learning_rate": 0.00012805549634026882, |
| "loss": 8.7792, |
| "step": 29632, |
| "throughput": 12507.071866990049 |
| }, |
| { |
| "epoch": 0.4649566219170997, |
| "grad_norm": 0.08120205253362656, |
| "learning_rate": 0.00012779188511834256, |
| "loss": 8.7904, |
| "step": 29664, |
| "throughput": 12507.233106886928 |
| }, |
| { |
| "epoch": 0.46545819324602855, |
| "grad_norm": 0.08174072206020355, |
| "learning_rate": 0.00012752842734279238, |
| "loss": 8.7693, |
| "step": 29696, |
| "throughput": 12507.373090242085 |
| }, |
| { |
| "epoch": 0.46595976457495747, |
| "grad_norm": 0.08241820335388184, |
| "learning_rate": 0.0001272651241001189, |
| "loss": 8.7826, |
| "step": 29728, |
| "throughput": 12507.438613262173 |
| }, |
| { |
| "epoch": 0.4664613359038864, |
| "grad_norm": 0.0764114111661911, |
| "learning_rate": 0.00012700197647618549, |
| "loss": 8.7917, |
| "step": 29760, |
| "throughput": 12507.574595308435 |
| }, |
| { |
| "epoch": 0.4669629072328153, |
| "grad_norm": 0.0877339169383049, |
| "learning_rate": 0.00012673898555621373, |
| "loss": 8.7797, |
| "step": 29792, |
| "throughput": 12507.562236454418 |
| }, |
| { |
| "epoch": 0.4674644785617442, |
| "grad_norm": 0.08090822398662567, |
| "learning_rate": 0.00012647615242477887, |
| "loss": 8.745, |
| "step": 29824, |
| "throughput": 12507.588567247858 |
| }, |
| { |
| "epoch": 0.46796604989067314, |
| "grad_norm": 0.07626835256814957, |
| "learning_rate": 0.0001262134781658056, |
| "loss": 8.7662, |
| "step": 29856, |
| "throughput": 12507.644819327088 |
| }, |
| { |
| "epoch": 0.46846762121960206, |
| "grad_norm": 0.0867987796664238, |
| "learning_rate": 0.00012595096386256336, |
| "loss": 8.7677, |
| "step": 29888, |
| "throughput": 12507.488479913989 |
| }, |
| { |
| "epoch": 0.4689691925485309, |
| "grad_norm": 0.08022241294384003, |
| "learning_rate": 0.0001256886105976619, |
| "loss": 8.7928, |
| "step": 29920, |
| "throughput": 12507.475755007914 |
| }, |
| { |
| "epoch": 0.46947076387745984, |
| "grad_norm": 0.0800698921084404, |
| "learning_rate": 0.0001254264194530468, |
| "loss": 8.7815, |
| "step": 29952, |
| "throughput": 12507.627904220677 |
| }, |
| { |
| "epoch": 0.46997233520638876, |
| "grad_norm": 0.08612282574176788, |
| "learning_rate": 0.00012516439150999525, |
| "loss": 8.779, |
| "step": 29984, |
| "throughput": 12507.827197284922 |
| }, |
| { |
| "epoch": 0.4704739065353177, |
| "grad_norm": 0.07774586230516434, |
| "learning_rate": 0.00012490252784911113, |
| "loss": 8.7475, |
| "step": 30016, |
| "throughput": 12507.941832818991 |
| }, |
| { |
| "epoch": 0.4709754778642466, |
| "grad_norm": 0.07533223181962967, |
| "learning_rate": 0.000124640829550321, |
| "loss": 8.7656, |
| "step": 30048, |
| "throughput": 12508.084077309297 |
| }, |
| { |
| "epoch": 0.4714770491931755, |
| "grad_norm": 0.08107419312000275, |
| "learning_rate": 0.00012437929769286942, |
| "loss": 8.7687, |
| "step": 30080, |
| "throughput": 12508.211097889753 |
| }, |
| { |
| "epoch": 0.47197862052210443, |
| "grad_norm": 0.07990825921297073, |
| "learning_rate": 0.0001241179333553146, |
| "loss": 8.7837, |
| "step": 30112, |
| "throughput": 12508.302930498237 |
| }, |
| { |
| "epoch": 0.4724801918510333, |
| "grad_norm": 0.08068916946649551, |
| "learning_rate": 0.00012385673761552374, |
| "loss": 8.7643, |
| "step": 30144, |
| "throughput": 12508.208036804326 |
| }, |
| { |
| "epoch": 0.4729817631799622, |
| "grad_norm": 0.08091454207897186, |
| "learning_rate": 0.00012359571155066894, |
| "loss": 8.7732, |
| "step": 30176, |
| "throughput": 12508.296185243455 |
| }, |
| { |
| "epoch": 0.47348333450889113, |
| "grad_norm": 0.17354527115821838, |
| "learning_rate": 0.00012333485623722238, |
| "loss": 8.7897, |
| "step": 30208, |
| "throughput": 12508.14258921723 |
| }, |
| { |
| "epoch": 0.47398490583782005, |
| "grad_norm": 0.08511584997177124, |
| "learning_rate": 0.00012307417275095222, |
| "loss": 8.7677, |
| "step": 30240, |
| "throughput": 12508.115507616394 |
| }, |
| { |
| "epoch": 0.47448647716674897, |
| "grad_norm": 0.07827948778867722, |
| "learning_rate": 0.00012281366216691786, |
| "loss": 8.7539, |
| "step": 30272, |
| "throughput": 12508.305558338443 |
| }, |
| { |
| "epoch": 0.4749880484956779, |
| "grad_norm": 0.07761247456073761, |
| "learning_rate": 0.00012255332555946582, |
| "loss": 8.7526, |
| "step": 30304, |
| "throughput": 12508.455505946777 |
| }, |
| { |
| "epoch": 0.4754896198246068, |
| "grad_norm": 0.08157996088266373, |
| "learning_rate": 0.00012229316400222493, |
| "loss": 8.7795, |
| "step": 30336, |
| "throughput": 12508.555800396765 |
| }, |
| { |
| "epoch": 0.47599119115353566, |
| "grad_norm": 0.07903724908828735, |
| "learning_rate": 0.00012203317856810232, |
| "loss": 8.7757, |
| "step": 30368, |
| "throughput": 12508.690458329196 |
| }, |
| { |
| "epoch": 0.4764927624824646, |
| "grad_norm": 0.08179880678653717, |
| "learning_rate": 0.0001217733703292786, |
| "loss": 8.7522, |
| "step": 30400, |
| "throughput": 12508.814542608767 |
| }, |
| { |
| "epoch": 0.4769943338113935, |
| "grad_norm": 0.07757299393415451, |
| "learning_rate": 0.0001215137403572038, |
| "loss": 8.7659, |
| "step": 30432, |
| "throughput": 12508.907694365285 |
| }, |
| { |
| "epoch": 0.4774959051403224, |
| "grad_norm": 0.0800146833062172, |
| "learning_rate": 0.00012125428972259264, |
| "loss": 8.7598, |
| "step": 30464, |
| "throughput": 12508.851198702956 |
| }, |
| { |
| "epoch": 0.47799747646925134, |
| "grad_norm": 0.07752782106399536, |
| "learning_rate": 0.0001209950194954203, |
| "loss": 8.7707, |
| "step": 30496, |
| "throughput": 12508.925468249761 |
| }, |
| { |
| "epoch": 0.47849904779818025, |
| "grad_norm": 0.08032878488302231, |
| "learning_rate": 0.00012073593074491802, |
| "loss": 8.7868, |
| "step": 30528, |
| "throughput": 12508.849551927575 |
| }, |
| { |
| "epoch": 0.4790006191271092, |
| "grad_norm": 0.0815482884645462, |
| "learning_rate": 0.0001204770245395685, |
| "loss": 8.7762, |
| "step": 30560, |
| "throughput": 12508.749956563777 |
| }, |
| { |
| "epoch": 0.47950219045603804, |
| "grad_norm": 0.07393264025449753, |
| "learning_rate": 0.00012021830194710178, |
| "loss": 8.7618, |
| "step": 30592, |
| "throughput": 12508.921022993149 |
| }, |
| { |
| "epoch": 0.48000376178496695, |
| "grad_norm": 0.09324093163013458, |
| "learning_rate": 0.00011995976403449054, |
| "loss": 8.7621, |
| "step": 30624, |
| "throughput": 12509.087942919288 |
| }, |
| { |
| "epoch": 0.48050533311389587, |
| "grad_norm": 0.08318941295146942, |
| "learning_rate": 0.00011970141186794592, |
| "loss": 8.7732, |
| "step": 30656, |
| "throughput": 12509.245774551542 |
| }, |
| { |
| "epoch": 0.4810069044428248, |
| "grad_norm": 0.08678317815065384, |
| "learning_rate": 0.00011944324651291299, |
| "loss": 8.7464, |
| "step": 30688, |
| "throughput": 12509.332430369472 |
| }, |
| { |
| "epoch": 0.4815084757717537, |
| "grad_norm": 0.07876382023096085, |
| "learning_rate": 0.00011918526903406647, |
| "loss": 8.7479, |
| "step": 30720, |
| "throughput": 12509.453968965496 |
| }, |
| { |
| "epoch": 0.4820100471006826, |
| "grad_norm": 0.08849357813596725, |
| "learning_rate": 0.0001189274804953063, |
| "loss": 8.7674, |
| "step": 30752, |
| "throughput": 12508.958433858608 |
| }, |
| { |
| "epoch": 0.48251161842961154, |
| "grad_norm": 0.07928457856178284, |
| "learning_rate": 0.00011866988195975307, |
| "loss": 8.7614, |
| "step": 30784, |
| "throughput": 12508.846489232832 |
| }, |
| { |
| "epoch": 0.4830131897585404, |
| "grad_norm": 0.12978075444698334, |
| "learning_rate": 0.00011841247448974398, |
| "loss": 8.7638, |
| "step": 30816, |
| "throughput": 12508.88239726129 |
| }, |
| { |
| "epoch": 0.4835147610874693, |
| "grad_norm": 0.07514094561338425, |
| "learning_rate": 0.00011815525914682817, |
| "loss": 8.7428, |
| "step": 30848, |
| "throughput": 12508.814152962192 |
| }, |
| { |
| "epoch": 0.48401633241639824, |
| "grad_norm": 0.08580697327852249, |
| "learning_rate": 0.00011789823699176249, |
| "loss": 8.7646, |
| "step": 30880, |
| "throughput": 12508.78627396142 |
| }, |
| { |
| "epoch": 0.48451790374532716, |
| "grad_norm": 0.08245430886745453, |
| "learning_rate": 0.00011764140908450703, |
| "loss": 8.753, |
| "step": 30912, |
| "throughput": 12508.808191222613 |
| }, |
| { |
| "epoch": 0.4850194750742561, |
| "grad_norm": 0.07886088639497757, |
| "learning_rate": 0.0001173847764842209, |
| "loss": 8.7662, |
| "step": 30944, |
| "throughput": 12508.962621729643 |
| }, |
| { |
| "epoch": 0.485521046403185, |
| "grad_norm": 0.07890153676271439, |
| "learning_rate": 0.00011712834024925766, |
| "loss": 8.7624, |
| "step": 30976, |
| "throughput": 12509.141366691623 |
| }, |
| { |
| "epoch": 0.4860226177321139, |
| "grad_norm": 0.08036927133798599, |
| "learning_rate": 0.00011687210143716116, |
| "loss": 8.7479, |
| "step": 31008, |
| "throughput": 12509.238001841131 |
| }, |
| { |
| "epoch": 0.4865241890610428, |
| "grad_norm": 0.09068219363689423, |
| "learning_rate": 0.00011661606110466095, |
| "loss": 8.7535, |
| "step": 31040, |
| "throughput": 12509.368151742632 |
| }, |
| { |
| "epoch": 0.4870257603899717, |
| "grad_norm": 0.0785689428448677, |
| "learning_rate": 0.00011636022030766818, |
| "loss": 8.7651, |
| "step": 31072, |
| "throughput": 12509.487875607168 |
| }, |
| { |
| "epoch": 0.4875273317189006, |
| "grad_norm": 0.08110499382019043, |
| "learning_rate": 0.00011610458010127093, |
| "loss": 8.7536, |
| "step": 31104, |
| "throughput": 12509.537437094328 |
| }, |
| { |
| "epoch": 0.48802890304782953, |
| "grad_norm": 0.07937299460172653, |
| "learning_rate": 0.00011584914153973036, |
| "loss": 8.7736, |
| "step": 31136, |
| "throughput": 12509.438853906258 |
| }, |
| { |
| "epoch": 0.48853047437675845, |
| "grad_norm": 0.08056695759296417, |
| "learning_rate": 0.00011559390567647571, |
| "loss": 8.7492, |
| "step": 31168, |
| "throughput": 12509.480462998552 |
| }, |
| { |
| "epoch": 0.48903204570568737, |
| "grad_norm": 0.08015215396881104, |
| "learning_rate": 0.00011533887356410052, |
| "loss": 8.7586, |
| "step": 31200, |
| "throughput": 12509.36621670406 |
| }, |
| { |
| "epoch": 0.4895336170346163, |
| "grad_norm": 0.08530990779399872, |
| "learning_rate": 0.00011508404625435791, |
| "loss": 8.7608, |
| "step": 31232, |
| "throughput": 12509.389685983637 |
| }, |
| { |
| "epoch": 0.49003518836354515, |
| "grad_norm": 0.08087247610092163, |
| "learning_rate": 0.00011482942479815651, |
| "loss": 8.7399, |
| "step": 31264, |
| "throughput": 12509.533223065642 |
| }, |
| { |
| "epoch": 0.49053675969247407, |
| "grad_norm": 0.08212879300117493, |
| "learning_rate": 0.00011457501024555593, |
| "loss": 8.7591, |
| "step": 31296, |
| "throughput": 12509.71621584718 |
| }, |
| { |
| "epoch": 0.491038331021403, |
| "grad_norm": 0.07677069306373596, |
| "learning_rate": 0.00011432080364576256, |
| "loss": 8.7386, |
| "step": 31328, |
| "throughput": 12509.86786846776 |
| }, |
| { |
| "epoch": 0.4915399023503319, |
| "grad_norm": 0.08998126536607742, |
| "learning_rate": 0.00011406680604712517, |
| "loss": 8.7581, |
| "step": 31360, |
| "throughput": 12509.947022465163 |
| }, |
| { |
| "epoch": 0.4920414736792608, |
| "grad_norm": 0.07648464292287827, |
| "learning_rate": 0.00011381301849713059, |
| "loss": 8.7573, |
| "step": 31392, |
| "throughput": 12510.063359933165 |
| }, |
| { |
| "epoch": 0.49254304500818974, |
| "grad_norm": 0.080910325050354, |
| "learning_rate": 0.00011355944204239944, |
| "loss": 8.7566, |
| "step": 31424, |
| "throughput": 12510.18732631582 |
| }, |
| { |
| "epoch": 0.4930446163371186, |
| "grad_norm": 0.0793800875544548, |
| "learning_rate": 0.0001133060777286818, |
| "loss": 8.7457, |
| "step": 31456, |
| "throughput": 12510.05788704578 |
| }, |
| { |
| "epoch": 0.4935461876660475, |
| "grad_norm": 0.08065943419933319, |
| "learning_rate": 0.00011305292660085278, |
| "loss": 8.739, |
| "step": 31488, |
| "throughput": 12510.145206585308 |
| }, |
| { |
| "epoch": 0.49404775899497644, |
| "grad_norm": 0.08295351266860962, |
| "learning_rate": 0.00011279998970290844, |
| "loss": 8.7673, |
| "step": 31520, |
| "throughput": 12510.0537774648 |
| }, |
| { |
| "epoch": 0.49454933032390536, |
| "grad_norm": 0.08329456299543381, |
| "learning_rate": 0.0001125472680779613, |
| "loss": 8.7515, |
| "step": 31552, |
| "throughput": 12510.008718802392 |
| }, |
| { |
| "epoch": 0.4950509016528343, |
| "grad_norm": 0.08850586414337158, |
| "learning_rate": 0.00011229476276823608, |
| "loss": 8.7395, |
| "step": 31584, |
| "throughput": 12510.16297776667 |
| }, |
| { |
| "epoch": 0.4955524729817632, |
| "grad_norm": 0.08080103248357773, |
| "learning_rate": 0.00011204247481506535, |
| "loss": 8.7392, |
| "step": 31616, |
| "throughput": 12510.3125035182 |
| }, |
| { |
| "epoch": 0.4960540443106921, |
| "grad_norm": 0.08016198873519897, |
| "learning_rate": 0.00011179040525888552, |
| "loss": 8.7455, |
| "step": 31648, |
| "throughput": 12510.463262300123 |
| }, |
| { |
| "epoch": 0.496555615639621, |
| "grad_norm": 0.0868767723441124, |
| "learning_rate": 0.00011153855513923207, |
| "loss": 8.7339, |
| "step": 31680, |
| "throughput": 12510.564056947635 |
| }, |
| { |
| "epoch": 0.4970571869685499, |
| "grad_norm": 0.08294384926557541, |
| "learning_rate": 0.00011128692549473568, |
| "loss": 8.7557, |
| "step": 31712, |
| "throughput": 12510.745317389034 |
| }, |
| { |
| "epoch": 0.4975587582974788, |
| "grad_norm": 0.10098681598901749, |
| "learning_rate": 0.00011103551736311777, |
| "loss": 8.7379, |
| "step": 31744, |
| "throughput": 12510.86022010489 |
| }, |
| { |
| "epoch": 0.4980603296264077, |
| "grad_norm": 0.08104648441076279, |
| "learning_rate": 0.0001107843317811862, |
| "loss": 8.7329, |
| "step": 31776, |
| "throughput": 12510.681606577396 |
| }, |
| { |
| "epoch": 0.49856190095533665, |
| "grad_norm": 0.08391673862934113, |
| "learning_rate": 0.00011053336978483102, |
| "loss": 8.7606, |
| "step": 31808, |
| "throughput": 12510.734798121777 |
| }, |
| { |
| "epoch": 0.49906347228426556, |
| "grad_norm": 0.08090377599000931, |
| "learning_rate": 0.00011028263240902033, |
| "loss": 8.7285, |
| "step": 31840, |
| "throughput": 12510.676006699603 |
| }, |
| { |
| "epoch": 0.4995650436131945, |
| "grad_norm": 0.08488581329584122, |
| "learning_rate": 0.0001100321206877957, |
| "loss": 8.7327, |
| "step": 31872, |
| "throughput": 12510.641308355624 |
| }, |
| { |
| "epoch": 0.5000666149421233, |
| "grad_norm": 0.07887663692235947, |
| "learning_rate": 0.00010978183565426832, |
| "loss": 8.7461, |
| "step": 31904, |
| "throughput": 12510.75024742261 |
| }, |
| { |
| "epoch": 0.5005681862710523, |
| "grad_norm": 0.0869184285402298, |
| "learning_rate": 0.00010953177834061435, |
| "loss": 8.7597, |
| "step": 31936, |
| "throughput": 12510.895384025353 |
| }, |
| { |
| "epoch": 0.5010697575999812, |
| "grad_norm": 0.08527307957410812, |
| "learning_rate": 0.00010928194977807091, |
| "loss": 8.7394, |
| "step": 31968, |
| "throughput": 12511.070523100629 |
| }, |
| { |
| "epoch": 0.5015713289289101, |
| "grad_norm": 0.08460355550050735, |
| "learning_rate": 0.00010903235099693174, |
| "loss": 8.7285, |
| "step": 32000, |
| "throughput": 12511.11540023717 |
| }, |
| { |
| "epoch": 0.502072900257839, |
| "grad_norm": 0.09160695970058441, |
| "learning_rate": 0.00010878298302654294, |
| "loss": 8.7587, |
| "step": 32032, |
| "throughput": 12511.295692044801 |
| }, |
| { |
| "epoch": 0.5025744715867679, |
| "grad_norm": 0.081563301384449, |
| "learning_rate": 0.00010853384689529873, |
| "loss": 8.7453, |
| "step": 32064, |
| "throughput": 12511.40980242315 |
| }, |
| { |
| "epoch": 0.5030760429156969, |
| "grad_norm": 0.08575332164764404, |
| "learning_rate": 0.00010828494363063732, |
| "loss": 8.7427, |
| "step": 32096, |
| "throughput": 12511.381388249676 |
| }, |
| { |
| "epoch": 0.5035776142446258, |
| "grad_norm": 0.08162245899438858, |
| "learning_rate": 0.0001080362742590364, |
| "loss": 8.7537, |
| "step": 32128, |
| "throughput": 12511.317804481178 |
| }, |
| { |
| "epoch": 0.5040791855735547, |
| "grad_norm": 0.0902937799692154, |
| "learning_rate": 0.00010778783980600939, |
| "loss": 8.7594, |
| "step": 32160, |
| "throughput": 12511.351734559135 |
| }, |
| { |
| "epoch": 0.5045807569024836, |
| "grad_norm": 0.08423754572868347, |
| "learning_rate": 0.00010753964129610052, |
| "loss": 8.743, |
| "step": 32192, |
| "throughput": 12511.245678204701 |
| }, |
| { |
| "epoch": 0.5050823282314125, |
| "grad_norm": 0.08069079369306564, |
| "learning_rate": 0.00010729167975288122, |
| "loss": 8.7456, |
| "step": 32224, |
| "throughput": 12511.309004588747 |
| }, |
| { |
| "epoch": 0.5055838995603413, |
| "grad_norm": 0.07970630377531052, |
| "learning_rate": 0.0001070439561989457, |
| "loss": 8.7515, |
| "step": 32256, |
| "throughput": 12511.464278292919 |
| }, |
| { |
| "epoch": 0.5060854708892703, |
| "grad_norm": 0.08481299132108688, |
| "learning_rate": 0.00010679647165590659, |
| "loss": 8.7306, |
| "step": 32288, |
| "throughput": 12511.645110625795 |
| }, |
| { |
| "epoch": 0.5065870422181992, |
| "grad_norm": 0.08320998400449753, |
| "learning_rate": 0.00010654922714439083, |
| "loss": 8.7316, |
| "step": 32320, |
| "throughput": 12511.732213907615 |
| }, |
| { |
| "epoch": 0.5070886135471281, |
| "grad_norm": 0.08566644042730331, |
| "learning_rate": 0.00010630222368403561, |
| "loss": 8.7223, |
| "step": 32352, |
| "throughput": 12511.857384326633 |
| }, |
| { |
| "epoch": 0.507590184876057, |
| "grad_norm": 0.0807805135846138, |
| "learning_rate": 0.00010605546229348396, |
| "loss": 8.7497, |
| "step": 32384, |
| "throughput": 12511.968185963713 |
| }, |
| { |
| "epoch": 0.5080917562049859, |
| "grad_norm": 0.08053547143936157, |
| "learning_rate": 0.00010580894399038044, |
| "loss": 8.7466, |
| "step": 32416, |
| "throughput": 12512.034265205712 |
| }, |
| { |
| "epoch": 0.5085933275339148, |
| "grad_norm": 0.12142524868249893, |
| "learning_rate": 0.00010556266979136734, |
| "loss": 8.73, |
| "step": 32448, |
| "throughput": 12511.94393793066 |
| }, |
| { |
| "epoch": 0.5090948988628438, |
| "grad_norm": 0.0837826207280159, |
| "learning_rate": 0.00010531664071208019, |
| "loss": 8.733, |
| "step": 32480, |
| "throughput": 12512.009493397778 |
| }, |
| { |
| "epoch": 0.5095964701917727, |
| "grad_norm": 0.08382360637187958, |
| "learning_rate": 0.00010507085776714369, |
| "loss": 8.7224, |
| "step": 32512, |
| "throughput": 12511.999355929676 |
| }, |
| { |
| "epoch": 0.5100980415207016, |
| "grad_norm": 0.0817563384771347, |
| "learning_rate": 0.00010482532197016732, |
| "loss": 8.7403, |
| "step": 32544, |
| "throughput": 12511.856327004498 |
| }, |
| { |
| "epoch": 0.5105996128496305, |
| "grad_norm": 0.08128924667835236, |
| "learning_rate": 0.00010458003433374152, |
| "loss": 8.7322, |
| "step": 32576, |
| "throughput": 12511.991218821682 |
| }, |
| { |
| "epoch": 0.5111011841785594, |
| "grad_norm": 0.08386674523353577, |
| "learning_rate": 0.00010433499586943319, |
| "loss": 8.7419, |
| "step": 32608, |
| "throughput": 12512.170004084111 |
| }, |
| { |
| "epoch": 0.5116027555074883, |
| "grad_norm": 0.07870669662952423, |
| "learning_rate": 0.00010409020758778178, |
| "loss": 8.7404, |
| "step": 32640, |
| "throughput": 12512.34686175348 |
| }, |
| { |
| "epoch": 0.5121043268364173, |
| "grad_norm": 0.0825226828455925, |
| "learning_rate": 0.00010384567049829474, |
| "loss": 8.7298, |
| "step": 32672, |
| "throughput": 12512.38983596489 |
| }, |
| { |
| "epoch": 0.5126058981653461, |
| "grad_norm": 0.08137047290802002, |
| "learning_rate": 0.00010360138560944379, |
| "loss": 8.7255, |
| "step": 32704, |
| "throughput": 12512.564533810659 |
| }, |
| { |
| "epoch": 0.513107469494275, |
| "grad_norm": 0.08141603320837021, |
| "learning_rate": 0.00010335735392866061, |
| "loss": 8.7222, |
| "step": 32736, |
| "throughput": 12512.62880948394 |
| }, |
| { |
| "epoch": 0.5136090408232039, |
| "grad_norm": 0.08862655609846115, |
| "learning_rate": 0.00010311357646233255, |
| "loss": 8.7387, |
| "step": 32768, |
| "throughput": 12512.50488243399 |
| }, |
| { |
| "epoch": 0.5141106121521328, |
| "grad_norm": 0.08207987248897552, |
| "learning_rate": 0.00010287005421579854, |
| "loss": 8.7515, |
| "step": 32800, |
| "throughput": 12511.983187127891 |
| }, |
| { |
| "epoch": 0.5146121834810617, |
| "grad_norm": 0.07818058878183365, |
| "learning_rate": 0.00010262678819334511, |
| "loss": 8.7303, |
| "step": 32832, |
| "throughput": 12511.901100086807 |
| }, |
| { |
| "epoch": 0.5151137548099907, |
| "grad_norm": 0.08296819031238556, |
| "learning_rate": 0.00010238377939820202, |
| "loss": 8.7355, |
| "step": 32864, |
| "throughput": 12511.843185395246 |
| }, |
| { |
| "epoch": 0.5156153261389196, |
| "grad_norm": 0.09073803573846817, |
| "learning_rate": 0.00010214102883253832, |
| "loss": 8.7332, |
| "step": 32896, |
| "throughput": 12511.953352864462 |
| }, |
| { |
| "epoch": 0.5161168974678485, |
| "grad_norm": 0.32719534635543823, |
| "learning_rate": 0.00010189853749745799, |
| "loss": 8.7188, |
| "step": 32928, |
| "throughput": 12512.104071246153 |
| }, |
| { |
| "epoch": 0.5166184687967774, |
| "grad_norm": 0.08849098533391953, |
| "learning_rate": 0.00010165630639299606, |
| "loss": 8.7225, |
| "step": 32960, |
| "throughput": 12512.283954151511 |
| }, |
| { |
| "epoch": 0.5171200401257063, |
| "grad_norm": 0.08463660627603531, |
| "learning_rate": 0.00010141433651811429, |
| "loss": 8.7263, |
| "step": 32992, |
| "throughput": 12512.360463779769 |
| }, |
| { |
| "epoch": 0.5176216114546353, |
| "grad_norm": 0.09417407959699631, |
| "learning_rate": 0.00010117262887069724, |
| "loss": 8.7347, |
| "step": 33024, |
| "throughput": 12512.487421895641 |
| }, |
| { |
| "epoch": 0.5181231827835642, |
| "grad_norm": 0.07990865409374237, |
| "learning_rate": 0.00010093118444754784, |
| "loss": 8.7374, |
| "step": 33056, |
| "throughput": 12512.604844924646 |
| }, |
| { |
| "epoch": 0.5186247541124931, |
| "grad_norm": 0.1536962240934372, |
| "learning_rate": 0.0001006900042443837, |
| "loss": 8.7055, |
| "step": 33088, |
| "throughput": 12512.54815890842 |
| }, |
| { |
| "epoch": 0.519126325441422, |
| "grad_norm": 0.09293182939291, |
| "learning_rate": 0.00010044908925583264, |
| "loss": 8.7433, |
| "step": 33120, |
| "throughput": 12512.486599010308 |
| }, |
| { |
| "epoch": 0.5196278967703508, |
| "grad_norm": 0.08393841981887817, |
| "learning_rate": 0.00010020844047542886, |
| "loss": 8.7175, |
| "step": 33152, |
| "throughput": 12512.50869110717 |
| }, |
| { |
| "epoch": 0.5201294680992797, |
| "grad_norm": 0.0840592086315155, |
| "learning_rate": 9.996805889560857e-05, |
| "loss": 8.7241, |
| "step": 33184, |
| "throughput": 12512.387015564022 |
| }, |
| { |
| "epoch": 0.5206310394282087, |
| "grad_norm": 0.08570297062397003, |
| "learning_rate": 9.972794550770612e-05, |
| "loss": 8.7176, |
| "step": 33216, |
| "throughput": 12512.459075590425 |
| }, |
| { |
| "epoch": 0.5211326107571376, |
| "grad_norm": 0.08551673591136932, |
| "learning_rate": 9.948810130194984e-05, |
| "loss": 8.7408, |
| "step": 33248, |
| "throughput": 12512.593138858181 |
| }, |
| { |
| "epoch": 0.5216341820860665, |
| "grad_norm": 0.08799657970666885, |
| "learning_rate": 9.924852726745807e-05, |
| "loss": 8.7136, |
| "step": 33280, |
| "throughput": 12512.76581620304 |
| }, |
| { |
| "epoch": 0.5221357534149954, |
| "grad_norm": 0.08384101092815399, |
| "learning_rate": 9.900922439223464e-05, |
| "loss": 8.7567, |
| "step": 33312, |
| "throughput": 12512.863226335372 |
| }, |
| { |
| "epoch": 0.5226373247439243, |
| "grad_norm": 0.08380109071731567, |
| "learning_rate": 9.877019366316541e-05, |
| "loss": 8.7051, |
| "step": 33344, |
| "throughput": 12512.982174536366 |
| }, |
| { |
| "epoch": 0.5231388960728532, |
| "grad_norm": 0.08169150352478027, |
| "learning_rate": 9.85314360660138e-05, |
| "loss": 8.7099, |
| "step": 33376, |
| "throughput": 12513.085254147247 |
| }, |
| { |
| "epoch": 0.5236404674017822, |
| "grad_norm": 0.08903324604034424, |
| "learning_rate": 9.829295258541692e-05, |
| "loss": 8.7207, |
| "step": 33408, |
| "throughput": 12513.046439956352 |
| }, |
| { |
| "epoch": 0.5241420387307111, |
| "grad_norm": 0.08354644477367401, |
| "learning_rate": 9.805474420488123e-05, |
| "loss": 8.742, |
| "step": 33440, |
| "throughput": 12512.953627898272 |
| }, |
| { |
| "epoch": 0.52464361005964, |
| "grad_norm": 0.07761716097593307, |
| "learning_rate": 9.78168119067789e-05, |
| "loss": 8.7388, |
| "step": 33472, |
| "throughput": 12512.971486634364 |
| }, |
| { |
| "epoch": 0.5251451813885689, |
| "grad_norm": 0.08411026000976562, |
| "learning_rate": 9.757915667234339e-05, |
| "loss": 8.7373, |
| "step": 33504, |
| "throughput": 12512.91443184655 |
| }, |
| { |
| "epoch": 0.5256467527174978, |
| "grad_norm": 0.08170992136001587, |
| "learning_rate": 9.734177948166558e-05, |
| "loss": 8.7299, |
| "step": 33536, |
| "throughput": 12512.913718463229 |
| }, |
| { |
| "epoch": 0.5261483240464266, |
| "grad_norm": 0.11816666275262833, |
| "learning_rate": 9.710468131368968e-05, |
| "loss": 8.6938, |
| "step": 33568, |
| "throughput": 12513.025537345375 |
| }, |
| { |
| "epoch": 0.5266498953753556, |
| "grad_norm": 0.08636017143726349, |
| "learning_rate": 9.68678631462093e-05, |
| "loss": 8.7373, |
| "step": 33600, |
| "throughput": 12513.19843227255 |
| }, |
| { |
| "epoch": 0.5271514667042845, |
| "grad_norm": 0.07713182270526886, |
| "learning_rate": 9.66313259558633e-05, |
| "loss": 8.7255, |
| "step": 33632, |
| "throughput": 12513.317219699162 |
| }, |
| { |
| "epoch": 0.5276530380332134, |
| "grad_norm": 0.09151088446378708, |
| "learning_rate": 9.639507071813166e-05, |
| "loss": 8.6943, |
| "step": 33664, |
| "throughput": 12513.411913962553 |
| }, |
| { |
| "epoch": 0.5281546093621423, |
| "grad_norm": 0.08120939135551453, |
| "learning_rate": 9.615909840733167e-05, |
| "loss": 8.7223, |
| "step": 33696, |
| "throughput": 12513.582126916053 |
| }, |
| { |
| "epoch": 0.5286561806910712, |
| "grad_norm": 0.08518539369106293, |
| "learning_rate": 9.592340999661393e-05, |
| "loss": 8.7208, |
| "step": 33728, |
| "throughput": 12513.6103713382 |
| }, |
| { |
| "epoch": 0.5291577520200001, |
| "grad_norm": 0.07572302967309952, |
| "learning_rate": 9.568800645795812e-05, |
| "loss": 8.7463, |
| "step": 33760, |
| "throughput": 12513.480475179858 |
| }, |
| { |
| "epoch": 0.5296593233489291, |
| "grad_norm": 0.08459645509719849, |
| "learning_rate": 9.545288876216901e-05, |
| "loss": 8.714, |
| "step": 33792, |
| "throughput": 12513.512840943906 |
| }, |
| { |
| "epoch": 0.530160894677858, |
| "grad_norm": 0.08286039531230927, |
| "learning_rate": 9.521805787887285e-05, |
| "loss": 8.7092, |
| "step": 33824, |
| "throughput": 12513.469277726148 |
| }, |
| { |
| "epoch": 0.5306624660067869, |
| "grad_norm": 0.08557367324829102, |
| "learning_rate": 9.498351477651286e-05, |
| "loss": 8.7235, |
| "step": 33856, |
| "throughput": 12513.451216054991 |
| }, |
| { |
| "epoch": 0.5311640373357158, |
| "grad_norm": 0.0787692740559578, |
| "learning_rate": 9.47492604223454e-05, |
| "loss": 8.7063, |
| "step": 33888, |
| "throughput": 12513.513260723728 |
| }, |
| { |
| "epoch": 0.5316656086646447, |
| "grad_norm": 0.08846087753772736, |
| "learning_rate": 9.451529578243618e-05, |
| "loss": 8.7176, |
| "step": 33920, |
| "throughput": 12513.670563059231 |
| }, |
| { |
| "epoch": 0.5321671799935737, |
| "grad_norm": 0.08972983062267303, |
| "learning_rate": 9.428162182165607e-05, |
| "loss": 8.7047, |
| "step": 33952, |
| "throughput": 12513.786490143648 |
| }, |
| { |
| "epoch": 0.5326687513225026, |
| "grad_norm": 0.07745558023452759, |
| "learning_rate": 9.40482395036772e-05, |
| "loss": 8.7144, |
| "step": 33984, |
| "throughput": 12513.919700520197 |
| }, |
| { |
| "epoch": 0.5331703226514314, |
| "grad_norm": 0.07799308747053146, |
| "learning_rate": 9.381514979096888e-05, |
| "loss": 8.6938, |
| "step": 34016, |
| "throughput": 12514.037489764116 |
| }, |
| { |
| "epoch": 0.5336718939803603, |
| "grad_norm": 0.08440552651882172, |
| "learning_rate": 9.35823536447938e-05, |
| "loss": 8.7255, |
| "step": 34048, |
| "throughput": 12514.114761239924 |
| }, |
| { |
| "epoch": 0.5341734653092892, |
| "grad_norm": 0.07518921047449112, |
| "learning_rate": 9.334985202520395e-05, |
| "loss": 8.6927, |
| "step": 34080, |
| "throughput": 12514.078383131604 |
| }, |
| { |
| "epoch": 0.5346750366382181, |
| "grad_norm": 0.0790555477142334, |
| "learning_rate": 9.311764589103679e-05, |
| "loss": 8.7241, |
| "step": 34112, |
| "throughput": 12513.982049437827 |
| }, |
| { |
| "epoch": 0.5351766079671471, |
| "grad_norm": 0.08127565681934357, |
| "learning_rate": 9.288573619991096e-05, |
| "loss": 8.7226, |
| "step": 34144, |
| "throughput": 12514.01556900709 |
| }, |
| { |
| "epoch": 0.535678179296076, |
| "grad_norm": 0.07951905578374863, |
| "learning_rate": 9.265412390822278e-05, |
| "loss": 8.7273, |
| "step": 34176, |
| "throughput": 12513.949064899492 |
| }, |
| { |
| "epoch": 0.5361797506250049, |
| "grad_norm": 0.08190485835075378, |
| "learning_rate": 9.242280997114204e-05, |
| "loss": 8.6994, |
| "step": 34208, |
| "throughput": 12514.017668664614 |
| }, |
| { |
| "epoch": 0.5366813219539338, |
| "grad_norm": 0.0807257890701294, |
| "learning_rate": 9.219179534260811e-05, |
| "loss": 8.705, |
| "step": 34240, |
| "throughput": 12514.14673158072 |
| }, |
| { |
| "epoch": 0.5371828932828627, |
| "grad_norm": 0.08508472889661789, |
| "learning_rate": 9.196108097532597e-05, |
| "loss": 8.7023, |
| "step": 34272, |
| "throughput": 12514.308283370869 |
| }, |
| { |
| "epoch": 0.5376844646117916, |
| "grad_norm": 0.08585110306739807, |
| "learning_rate": 9.173066782076236e-05, |
| "loss": 8.7118, |
| "step": 34304, |
| "throughput": 12514.387570121344 |
| }, |
| { |
| "epoch": 0.5381860359407206, |
| "grad_norm": 0.0836232453584671, |
| "learning_rate": 9.15005568291418e-05, |
| "loss": 8.6902, |
| "step": 34336, |
| "throughput": 12514.502622322905 |
| }, |
| { |
| "epoch": 0.5386876072696495, |
| "grad_norm": 0.08131017535924911, |
| "learning_rate": 9.12707489494428e-05, |
| "loss": 8.682, |
| "step": 34368, |
| "throughput": 12514.60707528309 |
| }, |
| { |
| "epoch": 0.5391891785985784, |
| "grad_norm": 0.08063245564699173, |
| "learning_rate": 9.104124512939357e-05, |
| "loss": 8.7298, |
| "step": 34400, |
| "throughput": 12514.569249700598 |
| }, |
| { |
| "epoch": 0.5396907499275073, |
| "grad_norm": 0.08101888746023178, |
| "learning_rate": 9.081204631546867e-05, |
| "loss": 8.7014, |
| "step": 34432, |
| "throughput": 12514.477715518917 |
| }, |
| { |
| "epoch": 0.5401923212564361, |
| "grad_norm": 0.08040083944797516, |
| "learning_rate": 9.058315345288465e-05, |
| "loss": 8.687, |
| "step": 34464, |
| "throughput": 12514.475338292768 |
| }, |
| { |
| "epoch": 0.540693892585365, |
| "grad_norm": 0.08651674538850784, |
| "learning_rate": 9.035456748559639e-05, |
| "loss": 8.729, |
| "step": 34496, |
| "throughput": 12514.422458791822 |
| }, |
| { |
| "epoch": 0.541195463914294, |
| "grad_norm": 0.08819051086902618, |
| "learning_rate": 9.012628935629299e-05, |
| "loss": 8.6979, |
| "step": 34528, |
| "throughput": 12514.461501678546 |
| }, |
| { |
| "epoch": 0.5416970352432229, |
| "grad_norm": 0.0806623175740242, |
| "learning_rate": 8.989832000639424e-05, |
| "loss": 8.6905, |
| "step": 34560, |
| "throughput": 12514.552034181705 |
| }, |
| { |
| "epoch": 0.5421986065721518, |
| "grad_norm": 0.07820701599121094, |
| "learning_rate": 8.967066037604637e-05, |
| "loss": 8.7115, |
| "step": 34592, |
| "throughput": 12514.716440195212 |
| }, |
| { |
| "epoch": 0.5427001779010807, |
| "grad_norm": 0.08373693376779556, |
| "learning_rate": 8.944331140411841e-05, |
| "loss": 8.7161, |
| "step": 34624, |
| "throughput": 12514.836142421645 |
| }, |
| { |
| "epoch": 0.5432017492300096, |
| "grad_norm": 0.08561219274997711, |
| "learning_rate": 8.921627402819813e-05, |
| "loss": 8.7024, |
| "step": 34656, |
| "throughput": 12514.960254163809 |
| }, |
| { |
| "epoch": 0.5437033205589386, |
| "grad_norm": 0.08857572823762894, |
| "learning_rate": 8.898954918458835e-05, |
| "loss": 8.7119, |
| "step": 34688, |
| "throughput": 12515.078179186417 |
| }, |
| { |
| "epoch": 0.5442048918878675, |
| "grad_norm": 0.08995655179023743, |
| "learning_rate": 8.876313780830305e-05, |
| "loss": 8.723, |
| "step": 34720, |
| "throughput": 12515.100646360597 |
| }, |
| { |
| "epoch": 0.5447064632167964, |
| "grad_norm": 0.08579035848379135, |
| "learning_rate": 8.853704083306341e-05, |
| "loss": 8.7166, |
| "step": 34752, |
| "throughput": 12514.98232968835 |
| }, |
| { |
| "epoch": 0.5452080345457253, |
| "grad_norm": 0.08334320038557053, |
| "learning_rate": 8.831125919129397e-05, |
| "loss": 8.71, |
| "step": 34784, |
| "throughput": 12515.002708526768 |
| }, |
| { |
| "epoch": 0.5457096058746542, |
| "grad_norm": 0.08186853677034378, |
| "learning_rate": 8.808579381411892e-05, |
| "loss": 8.7085, |
| "step": 34816, |
| "throughput": 12515.016993182575 |
| }, |
| { |
| "epoch": 0.5462111772035831, |
| "grad_norm": 0.08742789179086685, |
| "learning_rate": 8.786064563135815e-05, |
| "loss": 8.7129, |
| "step": 34848, |
| "throughput": 12514.411341164006 |
| }, |
| { |
| "epoch": 0.5467127485325121, |
| "grad_norm": 0.09223673492670059, |
| "learning_rate": 8.763581557152348e-05, |
| "loss": 8.7098, |
| "step": 34880, |
| "throughput": 12514.476674435435 |
| }, |
| { |
| "epoch": 0.5472143198614409, |
| "grad_norm": 0.08679956942796707, |
| "learning_rate": 8.741130456181463e-05, |
| "loss": 8.7003, |
| "step": 34912, |
| "throughput": 12514.636751601753 |
| }, |
| { |
| "epoch": 0.5477158911903698, |
| "grad_norm": 0.0794166699051857, |
| "learning_rate": 8.718711352811573e-05, |
| "loss": 8.7058, |
| "step": 34944, |
| "throughput": 12514.743625683801 |
| }, |
| { |
| "epoch": 0.5482174625192987, |
| "grad_norm": 0.07620302587747574, |
| "learning_rate": 8.696324339499135e-05, |
| "loss": 8.7075, |
| "step": 34976, |
| "throughput": 12514.867235682797 |
| }, |
| { |
| "epoch": 0.5487190338482276, |
| "grad_norm": 0.09295224398374557, |
| "learning_rate": 8.673969508568242e-05, |
| "loss": 8.6821, |
| "step": 35008, |
| "throughput": 12514.977725954972 |
| }, |
| { |
| "epoch": 0.5492206051771565, |
| "grad_norm": 0.07569506764411926, |
| "learning_rate": 8.651646952210293e-05, |
| "loss": 8.7031, |
| "step": 35040, |
| "throughput": 12514.95672239073 |
| }, |
| { |
| "epoch": 0.5497221765060855, |
| "grad_norm": 0.08172563463449478, |
| "learning_rate": 8.629356762483573e-05, |
| "loss": 8.7053, |
| "step": 35072, |
| "throughput": 12514.972298338476 |
| }, |
| { |
| "epoch": 0.5502237478350144, |
| "grad_norm": 0.08027073740959167, |
| "learning_rate": 8.607099031312901e-05, |
| "loss": 8.6982, |
| "step": 35104, |
| "throughput": 12514.91865195136 |
| }, |
| { |
| "epoch": 0.5507253191639433, |
| "grad_norm": 0.08717502653598785, |
| "learning_rate": 8.58487385048921e-05, |
| "loss": 8.7226, |
| "step": 35136, |
| "throughput": 12514.937656725599 |
| }, |
| { |
| "epoch": 0.5512268904928722, |
| "grad_norm": 0.10650716722011566, |
| "learning_rate": 8.562681311669218e-05, |
| "loss": 8.7122, |
| "step": 35168, |
| "throughput": 12514.89458864502 |
| }, |
| { |
| "epoch": 0.5517284618218011, |
| "grad_norm": 0.07669194042682648, |
| "learning_rate": 8.540521506375026e-05, |
| "loss": 8.7039, |
| "step": 35200, |
| "throughput": 12514.952425684478 |
| }, |
| { |
| "epoch": 0.55223003315073, |
| "grad_norm": 0.08644834160804749, |
| "learning_rate": 8.518394525993734e-05, |
| "loss": 8.6973, |
| "step": 35232, |
| "throughput": 12515.091657244611 |
| }, |
| { |
| "epoch": 0.552731604479659, |
| "grad_norm": 0.08324025571346283, |
| "learning_rate": 8.496300461777068e-05, |
| "loss": 8.7074, |
| "step": 35264, |
| "throughput": 12515.20257350424 |
| }, |
| { |
| "epoch": 0.5532331758085879, |
| "grad_norm": 0.08295135200023651, |
| "learning_rate": 8.474239404841023e-05, |
| "loss": 8.6957, |
| "step": 35296, |
| "throughput": 12515.360529727743 |
| }, |
| { |
| "epoch": 0.5537347471375168, |
| "grad_norm": 0.0789424255490303, |
| "learning_rate": 8.452211446165458e-05, |
| "loss": 8.6975, |
| "step": 35328, |
| "throughput": 12515.486852153686 |
| }, |
| { |
| "epoch": 0.5542363184664456, |
| "grad_norm": 0.08173195272684097, |
| "learning_rate": 8.430216676593744e-05, |
| "loss": 8.7228, |
| "step": 35360, |
| "throughput": 12515.514720827478 |
| }, |
| { |
| "epoch": 0.5547378897953745, |
| "grad_norm": 0.08786769211292267, |
| "learning_rate": 8.408255186832372e-05, |
| "loss": 8.7126, |
| "step": 35392, |
| "throughput": 12515.545096190892 |
| }, |
| { |
| "epoch": 0.5552394611243034, |
| "grad_norm": 0.09330630302429199, |
| "learning_rate": 8.386327067450593e-05, |
| "loss": 8.6834, |
| "step": 35424, |
| "throughput": 12515.437722737353 |
| }, |
| { |
| "epoch": 0.5557410324532324, |
| "grad_norm": 0.08301142603158951, |
| "learning_rate": 8.36443240888004e-05, |
| "loss": 8.6979, |
| "step": 35456, |
| "throughput": 12515.457683968867 |
| }, |
| { |
| "epoch": 0.5562426037821613, |
| "grad_norm": 0.08266784995794296, |
| "learning_rate": 8.342571301414342e-05, |
| "loss": 8.7118, |
| "step": 35488, |
| "throughput": 12515.426200839343 |
| }, |
| { |
| "epoch": 0.5567441751110902, |
| "grad_norm": 0.09009415656328201, |
| "learning_rate": 8.320743835208775e-05, |
| "loss": 8.715, |
| "step": 35520, |
| "throughput": 12515.464315842382 |
| }, |
| { |
| "epoch": 0.5572457464400191, |
| "grad_norm": 0.08386842161417007, |
| "learning_rate": 8.298950100279872e-05, |
| "loss": 8.7033, |
| "step": 35552, |
| "throughput": 12515.580516730502 |
| }, |
| { |
| "epoch": 0.557747317768948, |
| "grad_norm": 0.08837930858135223, |
| "learning_rate": 8.27719018650507e-05, |
| "loss": 8.7159, |
| "step": 35584, |
| "throughput": 12515.690077420006 |
| }, |
| { |
| "epoch": 0.558248889097877, |
| "grad_norm": 0.09241513162851334, |
| "learning_rate": 8.255464183622304e-05, |
| "loss": 8.7163, |
| "step": 35616, |
| "throughput": 12515.849451780045 |
| }, |
| { |
| "epoch": 0.5587504604268059, |
| "grad_norm": 0.08907388150691986, |
| "learning_rate": 8.23377218122968e-05, |
| "loss": 8.6996, |
| "step": 35648, |
| "throughput": 12515.982321961046 |
| }, |
| { |
| "epoch": 0.5592520317557348, |
| "grad_norm": 0.08048530668020248, |
| "learning_rate": 8.212114268785083e-05, |
| "loss": 8.6889, |
| "step": 35680, |
| "throughput": 12516.09922219064 |
| }, |
| { |
| "epoch": 0.5597536030846637, |
| "grad_norm": 0.08962694555521011, |
| "learning_rate": 8.190490535605809e-05, |
| "loss": 8.6696, |
| "step": 35712, |
| "throughput": 12516.10359288709 |
| }, |
| { |
| "epoch": 0.5602551744135926, |
| "grad_norm": 0.07987916469573975, |
| "learning_rate": 8.16890107086819e-05, |
| "loss": 8.6858, |
| "step": 35744, |
| "throughput": 12515.92148666699 |
| }, |
| { |
| "epoch": 0.5607567457425215, |
| "grad_norm": 0.08420739322900772, |
| "learning_rate": 8.14734596360725e-05, |
| "loss": 8.6962, |
| "step": 35776, |
| "throughput": 12515.934187064237 |
| }, |
| { |
| "epoch": 0.5612583170714504, |
| "grad_norm": 0.08826903998851776, |
| "learning_rate": 8.12582530271631e-05, |
| "loss": 8.7113, |
| "step": 35808, |
| "throughput": 12515.952704779598 |
| }, |
| { |
| "epoch": 0.5617598884003793, |
| "grad_norm": 0.10297546535730362, |
| "learning_rate": 8.104339176946648e-05, |
| "loss": 8.6696, |
| "step": 35840, |
| "throughput": 12515.947157806651 |
| }, |
| { |
| "epoch": 0.5622614597293082, |
| "grad_norm": 0.07826440036296844, |
| "learning_rate": 8.082887674907099e-05, |
| "loss": 8.6888, |
| "step": 35872, |
| "throughput": 12516.01639726759 |
| }, |
| { |
| "epoch": 0.5627630310582371, |
| "grad_norm": 0.08734133839607239, |
| "learning_rate": 8.061470885063726e-05, |
| "loss": 8.7059, |
| "step": 35904, |
| "throughput": 12516.124572831535 |
| }, |
| { |
| "epoch": 0.563264602387166, |
| "grad_norm": 0.09758967161178589, |
| "learning_rate": 8.040088895739433e-05, |
| "loss": 8.7093, |
| "step": 35936, |
| "throughput": 12516.278365852624 |
| }, |
| { |
| "epoch": 0.5637661737160949, |
| "grad_norm": 0.0903221070766449, |
| "learning_rate": 8.018741795113614e-05, |
| "loss": 8.6963, |
| "step": 35968, |
| "throughput": 12516.40036791016 |
| }, |
| { |
| "epoch": 0.5642677450450239, |
| "grad_norm": 0.0803726464509964, |
| "learning_rate": 7.997429671221764e-05, |
| "loss": 8.6938, |
| "step": 36000, |
| "throughput": 12516.506289442148 |
| }, |
| { |
| "epoch": 0.5647693163739528, |
| "grad_norm": 0.08807411044836044, |
| "learning_rate": 7.97615261195515e-05, |
| "loss": 8.7012, |
| "step": 36032, |
| "throughput": 12516.495471268532 |
| }, |
| { |
| "epoch": 0.5652708877028817, |
| "grad_norm": 0.08333015441894531, |
| "learning_rate": 7.95491070506043e-05, |
| "loss": 8.715, |
| "step": 36064, |
| "throughput": 12516.464464403765 |
| }, |
| { |
| "epoch": 0.5657724590318106, |
| "grad_norm": 0.0851825550198555, |
| "learning_rate": 7.933704038139292e-05, |
| "loss": 8.6781, |
| "step": 36096, |
| "throughput": 12516.371951049214 |
| }, |
| { |
| "epoch": 0.5662740303607395, |
| "grad_norm": 0.07540696859359741, |
| "learning_rate": 7.912532698648089e-05, |
| "loss": 8.684, |
| "step": 36128, |
| "throughput": 12516.47121087276 |
| }, |
| { |
| "epoch": 0.5667756016896685, |
| "grad_norm": 0.0787714496254921, |
| "learning_rate": 7.891396773897487e-05, |
| "loss": 8.6759, |
| "step": 36160, |
| "throughput": 12516.426290858957 |
| }, |
| { |
| "epoch": 0.5672771730185974, |
| "grad_norm": 0.08228734880685806, |
| "learning_rate": 7.870296351052104e-05, |
| "loss": 8.6708, |
| "step": 36192, |
| "throughput": 12516.486618277379 |
| }, |
| { |
| "epoch": 0.5677787443475263, |
| "grad_norm": 0.08103416115045547, |
| "learning_rate": 7.849231517130151e-05, |
| "loss": 8.6864, |
| "step": 36224, |
| "throughput": 12516.617793702857 |
| }, |
| { |
| "epoch": 0.5682803156764551, |
| "grad_norm": 0.08393923193216324, |
| "learning_rate": 7.828202359003058e-05, |
| "loss": 8.6839, |
| "step": 36256, |
| "throughput": 12516.722469381535 |
| }, |
| { |
| "epoch": 0.568781887005384, |
| "grad_norm": 0.07748636603355408, |
| "learning_rate": 7.807208963395139e-05, |
| "loss": 8.6778, |
| "step": 36288, |
| "throughput": 12516.875094866176 |
| }, |
| { |
| "epoch": 0.5692834583343129, |
| "grad_norm": 0.08194294571876526, |
| "learning_rate": 7.786251416883218e-05, |
| "loss": 8.6849, |
| "step": 36320, |
| "throughput": 12516.998174008146 |
| }, |
| { |
| "epoch": 0.5697850296632418, |
| "grad_norm": 0.08278539031744003, |
| "learning_rate": 7.765329805896287e-05, |
| "loss": 8.6976, |
| "step": 36352, |
| "throughput": 12517.033518057026 |
| }, |
| { |
| "epoch": 0.5702866009921708, |
| "grad_norm": 0.07897041738033295, |
| "learning_rate": 7.744444216715117e-05, |
| "loss": 8.6953, |
| "step": 36384, |
| "throughput": 12516.974273839218 |
| }, |
| { |
| "epoch": 0.5707881723210997, |
| "grad_norm": 0.08542395383119583, |
| "learning_rate": 7.723594735471952e-05, |
| "loss": 8.6978, |
| "step": 36416, |
| "throughput": 12516.858572450183 |
| }, |
| { |
| "epoch": 0.5712897436500286, |
| "grad_norm": 0.07970910519361496, |
| "learning_rate": 7.702781448150109e-05, |
| "loss": 8.6899, |
| "step": 36448, |
| "throughput": 12516.874160403917 |
| }, |
| { |
| "epoch": 0.5717913149789575, |
| "grad_norm": 0.0827026516199112, |
| "learning_rate": 7.682004440583654e-05, |
| "loss": 8.6825, |
| "step": 36480, |
| "throughput": 12516.862928531891 |
| }, |
| { |
| "epoch": 0.5722928863078864, |
| "grad_norm": 0.07867981493473053, |
| "learning_rate": 7.661263798457014e-05, |
| "loss": 8.6836, |
| "step": 36512, |
| "throughput": 12516.91203717116 |
| }, |
| { |
| "epoch": 0.5727944576368154, |
| "grad_norm": 0.08377469331026077, |
| "learning_rate": 7.64055960730467e-05, |
| "loss": 8.6646, |
| "step": 36544, |
| "throughput": 12517.001455253561 |
| }, |
| { |
| "epoch": 0.5732960289657443, |
| "grad_norm": 0.09216863662004471, |
| "learning_rate": 7.619891952510763e-05, |
| "loss": 8.6938, |
| "step": 36576, |
| "throughput": 12517.107661462249 |
| }, |
| { |
| "epoch": 0.5737976002946732, |
| "grad_norm": 0.08290153741836548, |
| "learning_rate": 7.599260919308764e-05, |
| "loss": 8.6821, |
| "step": 36608, |
| "throughput": 12517.260289134058 |
| }, |
| { |
| "epoch": 0.5742991716236021, |
| "grad_norm": 0.09356728941202164, |
| "learning_rate": 7.578666592781114e-05, |
| "loss": 8.6922, |
| "step": 36640, |
| "throughput": 12517.389574354975 |
| }, |
| { |
| "epoch": 0.574800742952531, |
| "grad_norm": 0.08148008584976196, |
| "learning_rate": 7.558109057858874e-05, |
| "loss": 8.6712, |
| "step": 36672, |
| "throughput": 12517.430869809043 |
| }, |
| { |
| "epoch": 0.5753023142814598, |
| "grad_norm": 0.08250202238559723, |
| "learning_rate": 7.53758839932139e-05, |
| "loss": 8.6867, |
| "step": 36704, |
| "throughput": 12517.466052972384 |
| }, |
| { |
| "epoch": 0.5758038856103888, |
| "grad_norm": 0.08435425907373428, |
| "learning_rate": 7.517104701795905e-05, |
| "loss": 8.6657, |
| "step": 36736, |
| "throughput": 12517.283483662877 |
| }, |
| { |
| "epoch": 0.5763054569393177, |
| "grad_norm": 0.09276581555604935, |
| "learning_rate": 7.496658049757255e-05, |
| "loss": 8.6782, |
| "step": 36768, |
| "throughput": 12517.328290633728 |
| }, |
| { |
| "epoch": 0.5768070282682466, |
| "grad_norm": 0.08053594082593918, |
| "learning_rate": 7.476248527527492e-05, |
| "loss": 8.6766, |
| "step": 36800, |
| "throughput": 12517.335458268808 |
| }, |
| { |
| "epoch": 0.5773085995971755, |
| "grad_norm": 0.08317738771438599, |
| "learning_rate": 7.455876219275552e-05, |
| "loss": 8.6696, |
| "step": 36832, |
| "throughput": 12517.356605369205 |
| }, |
| { |
| "epoch": 0.5778101709261044, |
| "grad_norm": 0.08413007855415344, |
| "learning_rate": 7.435541209016885e-05, |
| "loss": 8.694, |
| "step": 36864, |
| "throughput": 12517.419176649293 |
| }, |
| { |
| "epoch": 0.5783117422550333, |
| "grad_norm": 0.08062509447336197, |
| "learning_rate": 7.415243580613134e-05, |
| "loss": 8.6712, |
| "step": 36896, |
| "throughput": 12516.973503237807 |
| }, |
| { |
| "epoch": 0.5788133135839623, |
| "grad_norm": 0.08116944134235382, |
| "learning_rate": 7.394983417771791e-05, |
| "loss": 8.6912, |
| "step": 36928, |
| "throughput": 12517.128302324305 |
| }, |
| { |
| "epoch": 0.5793148849128912, |
| "grad_norm": 0.08938824385404587, |
| "learning_rate": 7.374760804045815e-05, |
| "loss": 8.6986, |
| "step": 36960, |
| "throughput": 12517.282038382906 |
| }, |
| { |
| "epoch": 0.5798164562418201, |
| "grad_norm": 0.07905585318803787, |
| "learning_rate": 7.354575822833331e-05, |
| "loss": 8.6741, |
| "step": 36992, |
| "throughput": 12517.382771112063 |
| }, |
| { |
| "epoch": 0.580318027570749, |
| "grad_norm": 0.08489411324262619, |
| "learning_rate": 7.334428557377258e-05, |
| "loss": 8.6935, |
| "step": 37024, |
| "throughput": 12517.346292375569 |
| }, |
| { |
| "epoch": 0.5808195988996779, |
| "grad_norm": 0.09119182080030441, |
| "learning_rate": 7.314319090764985e-05, |
| "loss": 8.6883, |
| "step": 37056, |
| "throughput": 12517.23137805927 |
| }, |
| { |
| "epoch": 0.5813211702286069, |
| "grad_norm": 0.08179455995559692, |
| "learning_rate": 7.294247505928003e-05, |
| "loss": 8.6844, |
| "step": 37088, |
| "throughput": 12517.178960965015 |
| }, |
| { |
| "epoch": 0.5818227415575358, |
| "grad_norm": 0.08026742935180664, |
| "learning_rate": 7.274213885641592e-05, |
| "loss": 8.6751, |
| "step": 37120, |
| "throughput": 12517.181465984202 |
| }, |
| { |
| "epoch": 0.5823243128864646, |
| "grad_norm": 0.08509814739227295, |
| "learning_rate": 7.254218312524461e-05, |
| "loss": 8.6828, |
| "step": 37152, |
| "throughput": 12517.27502240571 |
| }, |
| { |
| "epoch": 0.5828258842153935, |
| "grad_norm": 0.08018773049116135, |
| "learning_rate": 7.234260869038417e-05, |
| "loss": 8.695, |
| "step": 37184, |
| "throughput": 12517.289733393207 |
| }, |
| { |
| "epoch": 0.5833274555443224, |
| "grad_norm": 0.08433583378791809, |
| "learning_rate": 7.214341637488007e-05, |
| "loss": 8.7007, |
| "step": 37216, |
| "throughput": 12517.361447458245 |
| }, |
| { |
| "epoch": 0.5838290268732513, |
| "grad_norm": 0.08063721656799316, |
| "learning_rate": 7.194460700020206e-05, |
| "loss": 8.6719, |
| "step": 37248, |
| "throughput": 12517.512031241993 |
| }, |
| { |
| "epoch": 0.5843305982021803, |
| "grad_norm": 0.08141735196113586, |
| "learning_rate": 7.174618138624058e-05, |
| "loss": 8.6913, |
| "step": 37280, |
| "throughput": 12517.664695356068 |
| }, |
| { |
| "epoch": 0.5848321695311092, |
| "grad_norm": 0.08511857688426971, |
| "learning_rate": 7.154814035130351e-05, |
| "loss": 8.6866, |
| "step": 37312, |
| "throughput": 12517.791148806165 |
| }, |
| { |
| "epoch": 0.5853337408600381, |
| "grad_norm": 0.08427421748638153, |
| "learning_rate": 7.135048471211257e-05, |
| "loss": 8.6776, |
| "step": 37344, |
| "throughput": 12517.793106161102 |
| }, |
| { |
| "epoch": 0.585835312188967, |
| "grad_norm": 0.08205582201480865, |
| "learning_rate": 7.115321528380024e-05, |
| "loss": 8.7002, |
| "step": 37376, |
| "throughput": 12517.730083261857 |
| }, |
| { |
| "epoch": 0.5863368835178959, |
| "grad_norm": 0.08410238474607468, |
| "learning_rate": 7.095633287990622e-05, |
| "loss": 8.6803, |
| "step": 37408, |
| "throughput": 12517.57726651346 |
| }, |
| { |
| "epoch": 0.5868384548468248, |
| "grad_norm": 0.08508728444576263, |
| "learning_rate": 7.075983831237421e-05, |
| "loss": 8.6598, |
| "step": 37440, |
| "throughput": 12517.667385193266 |
| }, |
| { |
| "epoch": 0.5873400261757538, |
| "grad_norm": 0.2568950355052948, |
| "learning_rate": 7.056373239154826e-05, |
| "loss": 8.6748, |
| "step": 37472, |
| "throughput": 12517.671686146488 |
| }, |
| { |
| "epoch": 0.5878415975046827, |
| "grad_norm": 0.08162539452314377, |
| "learning_rate": 7.036801592616982e-05, |
| "loss": 8.6504, |
| "step": 37504, |
| "throughput": 12517.690999905139 |
| }, |
| { |
| "epoch": 0.5883431688336116, |
| "grad_norm": 0.07849689573049545, |
| "learning_rate": 7.017268972337419e-05, |
| "loss": 8.6504, |
| "step": 37536, |
| "throughput": 12517.768847195408 |
| }, |
| { |
| "epoch": 0.5888447401625405, |
| "grad_norm": 0.08418171852827072, |
| "learning_rate": 6.997775458868724e-05, |
| "loss": 8.6812, |
| "step": 37568, |
| "throughput": 12517.915109902939 |
| }, |
| { |
| "epoch": 0.5893463114914693, |
| "grad_norm": 0.08415034413337708, |
| "learning_rate": 6.978321132602197e-05, |
| "loss": 8.6772, |
| "step": 37600, |
| "throughput": 12518.061122467996 |
| }, |
| { |
| "epoch": 0.5898478828203982, |
| "grad_norm": 0.08779383450746536, |
| "learning_rate": 6.95890607376754e-05, |
| "loss": 8.6624, |
| "step": 37632, |
| "throughput": 12518.181187880014 |
| }, |
| { |
| "epoch": 0.5903494541493272, |
| "grad_norm": 0.07764028757810593, |
| "learning_rate": 6.939530362432513e-05, |
| "loss": 8.6793, |
| "step": 37664, |
| "throughput": 12518.19822260176 |
| }, |
| { |
| "epoch": 0.5908510254782561, |
| "grad_norm": 0.0823327898979187, |
| "learning_rate": 6.920194078502611e-05, |
| "loss": 8.6744, |
| "step": 37696, |
| "throughput": 12518.172950977636 |
| }, |
| { |
| "epoch": 0.591352596807185, |
| "grad_norm": 0.07632856070995331, |
| "learning_rate": 6.900897301720721e-05, |
| "loss": 8.6787, |
| "step": 37728, |
| "throughput": 12518.087089234079 |
| }, |
| { |
| "epoch": 0.5918541681361139, |
| "grad_norm": 0.07819085568189621, |
| "learning_rate": 6.881640111666807e-05, |
| "loss": 8.6944, |
| "step": 37760, |
| "throughput": 12518.08412581868 |
| }, |
| { |
| "epoch": 0.5923557394650428, |
| "grad_norm": 0.08589126914739609, |
| "learning_rate": 6.862422587757581e-05, |
| "loss": 8.6819, |
| "step": 37792, |
| "throughput": 12518.12154185379 |
| }, |
| { |
| "epoch": 0.5928573107939717, |
| "grad_norm": 0.08168213069438934, |
| "learning_rate": 6.843244809246173e-05, |
| "loss": 8.6967, |
| "step": 37824, |
| "throughput": 12518.118068216625 |
| }, |
| { |
| "epoch": 0.5933588821229007, |
| "grad_norm": 0.07826963812112808, |
| "learning_rate": 6.824106855221788e-05, |
| "loss": 8.6777, |
| "step": 37856, |
| "throughput": 12518.203417005496 |
| }, |
| { |
| "epoch": 0.5938604534518296, |
| "grad_norm": 0.08327604085206985, |
| "learning_rate": 6.805008804609411e-05, |
| "loss": 8.6729, |
| "step": 37888, |
| "throughput": 12518.299888113675 |
| }, |
| { |
| "epoch": 0.5943620247807585, |
| "grad_norm": 0.08167058974504471, |
| "learning_rate": 6.78595073616946e-05, |
| "loss": 8.6702, |
| "step": 37920, |
| "throughput": 12518.440363110465 |
| }, |
| { |
| "epoch": 0.5948635961096874, |
| "grad_norm": 0.08159983903169632, |
| "learning_rate": 6.766932728497468e-05, |
| "loss": 8.6946, |
| "step": 37952, |
| "throughput": 12518.577594035549 |
| }, |
| { |
| "epoch": 0.5953651674386163, |
| "grad_norm": 0.08012655377388, |
| "learning_rate": 6.747954860023746e-05, |
| "loss": 8.6939, |
| "step": 37984, |
| "throughput": 12518.626317917498 |
| }, |
| { |
| "epoch": 0.5958667387675451, |
| "grad_norm": 0.0828748419880867, |
| "learning_rate": 6.729017209013086e-05, |
| "loss": 8.6817, |
| "step": 38016, |
| "throughput": 12518.667987562065 |
| }, |
| { |
| "epoch": 0.5963683100964741, |
| "grad_norm": 0.08672600984573364, |
| "learning_rate": 6.710119853564422e-05, |
| "loss": 8.689, |
| "step": 38048, |
| "throughput": 12518.465836159847 |
| }, |
| { |
| "epoch": 0.596869881425403, |
| "grad_norm": 0.09564584493637085, |
| "learning_rate": 6.69126287161049e-05, |
| "loss": 8.6977, |
| "step": 38080, |
| "throughput": 12518.43985834833 |
| }, |
| { |
| "epoch": 0.5973714527543319, |
| "grad_norm": 0.08064235001802444, |
| "learning_rate": 6.672446340917553e-05, |
| "loss": 8.6815, |
| "step": 38112, |
| "throughput": 12518.4491105091 |
| }, |
| { |
| "epoch": 0.5978730240832608, |
| "grad_norm": 0.08223454654216766, |
| "learning_rate": 6.653670339085031e-05, |
| "loss": 8.6738, |
| "step": 38144, |
| "throughput": 12518.495327793264 |
| }, |
| { |
| "epoch": 0.5983745954121897, |
| "grad_norm": 0.08172369003295898, |
| "learning_rate": 6.634934943545217e-05, |
| "loss": 8.6804, |
| "step": 38176, |
| "throughput": 12518.589927415283 |
| }, |
| { |
| "epoch": 0.5988761667411187, |
| "grad_norm": 0.08844827860593796, |
| "learning_rate": 6.616240231562933e-05, |
| "loss": 8.6558, |
| "step": 38208, |
| "throughput": 12518.669975625291 |
| }, |
| { |
| "epoch": 0.5993777380700476, |
| "grad_norm": 0.08515715599060059, |
| "learning_rate": 6.597586280235227e-05, |
| "loss": 8.6843, |
| "step": 38240, |
| "throughput": 12518.811041374041 |
| }, |
| { |
| "epoch": 0.5998793093989765, |
| "grad_norm": 0.0898033082485199, |
| "learning_rate": 6.578973166491053e-05, |
| "loss": 8.6682, |
| "step": 38272, |
| "throughput": 12518.948975647792 |
| }, |
| { |
| "epoch": 0.6003808807279054, |
| "grad_norm": 0.0798042044043541, |
| "learning_rate": 6.560400967090948e-05, |
| "loss": 8.6688, |
| "step": 38304, |
| "throughput": 12519.03731635014 |
| }, |
| { |
| "epoch": 0.6008824520568343, |
| "grad_norm": 0.07899042218923569, |
| "learning_rate": 6.54186975862671e-05, |
| "loss": 8.6622, |
| "step": 38336, |
| "throughput": 12519.056239877606 |
| }, |
| { |
| "epoch": 0.6013840233857632, |
| "grad_norm": 0.0829099491238594, |
| "learning_rate": 6.523379617521104e-05, |
| "loss": 8.6642, |
| "step": 38368, |
| "throughput": 12518.961806386887 |
| }, |
| { |
| "epoch": 0.6018855947146922, |
| "grad_norm": 0.07706566154956818, |
| "learning_rate": 6.504930620027524e-05, |
| "loss": 8.6645, |
| "step": 38400, |
| "throughput": 12518.839803727704 |
| }, |
| { |
| "epoch": 0.6023871660436211, |
| "grad_norm": 0.08346829563379288, |
| "learning_rate": 6.486522842229692e-05, |
| "loss": 8.6709, |
| "step": 38432, |
| "throughput": 12518.82358632836 |
| }, |
| { |
| "epoch": 0.6028887373725499, |
| "grad_norm": 0.08457069098949432, |
| "learning_rate": 6.468156360041337e-05, |
| "loss": 8.6736, |
| "step": 38464, |
| "throughput": 12518.91697685828 |
| }, |
| { |
| "epoch": 0.6033903087014788, |
| "grad_norm": 0.08379726111888885, |
| "learning_rate": 6.449831249205887e-05, |
| "loss": 8.6604, |
| "step": 38496, |
| "throughput": 12518.920394830555 |
| }, |
| { |
| "epoch": 0.6038918800304077, |
| "grad_norm": 0.08050908893346786, |
| "learning_rate": 6.431547585296156e-05, |
| "loss": 8.6586, |
| "step": 38528, |
| "throughput": 12519.000297488355 |
| }, |
| { |
| "epoch": 0.6043934513593366, |
| "grad_norm": 0.08401334285736084, |
| "learning_rate": 6.413305443714022e-05, |
| "loss": 8.6613, |
| "step": 38560, |
| "throughput": 12519.14071583752 |
| }, |
| { |
| "epoch": 0.6048950226882656, |
| "grad_norm": 0.08327256143093109, |
| "learning_rate": 6.395104899690134e-05, |
| "loss": 8.6622, |
| "step": 38592, |
| "throughput": 12519.282051726044 |
| }, |
| { |
| "epoch": 0.6053965940171945, |
| "grad_norm": 0.08408129215240479, |
| "learning_rate": 6.37694602828359e-05, |
| "loss": 8.6708, |
| "step": 38624, |
| "throughput": 12519.367297466042 |
| }, |
| { |
| "epoch": 0.6058981653461234, |
| "grad_norm": 0.07974757254123688, |
| "learning_rate": 6.358828904381632e-05, |
| "loss": 8.6589, |
| "step": 38656, |
| "throughput": 12519.429108838523 |
| }, |
| { |
| "epoch": 0.6063997366750523, |
| "grad_norm": 0.0898723155260086, |
| "learning_rate": 6.340753602699327e-05, |
| "loss": 8.6824, |
| "step": 38688, |
| "throughput": 12519.31796076407 |
| }, |
| { |
| "epoch": 0.6069013080039812, |
| "grad_norm": 0.09549321979284286, |
| "learning_rate": 6.322720197779275e-05, |
| "loss": 8.6868, |
| "step": 38720, |
| "throughput": 12519.202567652002 |
| }, |
| { |
| "epoch": 0.6074028793329102, |
| "grad_norm": 0.08469787240028381, |
| "learning_rate": 6.304728763991291e-05, |
| "loss": 8.6661, |
| "step": 38752, |
| "throughput": 12519.26707210921 |
| }, |
| { |
| "epoch": 0.6079044506618391, |
| "grad_norm": 0.0787658542394638, |
| "learning_rate": 6.286779375532107e-05, |
| "loss": 8.6826, |
| "step": 38784, |
| "throughput": 12519.306401204129 |
| }, |
| { |
| "epoch": 0.608406021990768, |
| "grad_norm": 0.07940182834863663, |
| "learning_rate": 6.268872106425044e-05, |
| "loss": 8.643, |
| "step": 38816, |
| "throughput": 12519.314657029476 |
| }, |
| { |
| "epoch": 0.6089075933196969, |
| "grad_norm": 0.07980058342218399, |
| "learning_rate": 6.25100703051974e-05, |
| "loss": 8.6536, |
| "step": 38848, |
| "throughput": 12519.35759876071 |
| }, |
| { |
| "epoch": 0.6094091646486258, |
| "grad_norm": 0.08572038263082504, |
| "learning_rate": 6.233184221491818e-05, |
| "loss": 8.6725, |
| "step": 38880, |
| "throughput": 12519.494620818527 |
| }, |
| { |
| "epoch": 0.6099107359775546, |
| "grad_norm": 0.09471891075372696, |
| "learning_rate": 6.2154037528426e-05, |
| "loss": 8.6527, |
| "step": 38912, |
| "throughput": 12519.632116222083 |
| }, |
| { |
| "epoch": 0.6104123073064835, |
| "grad_norm": 0.0854351744055748, |
| "learning_rate": 6.197665697898784e-05, |
| "loss": 8.6649, |
| "step": 38944, |
| "throughput": 12519.236822148852 |
| }, |
| { |
| "epoch": 0.6109138786354125, |
| "grad_norm": 0.10343901813030243, |
| "learning_rate": 6.179970129812166e-05, |
| "loss": 8.6608, |
| "step": 38976, |
| "throughput": 12519.257951071279 |
| }, |
| { |
| "epoch": 0.6114154499643414, |
| "grad_norm": 0.0799938291311264, |
| "learning_rate": 6.16231712155932e-05, |
| "loss": 8.676, |
| "step": 39008, |
| "throughput": 12519.278111050882 |
| }, |
| { |
| "epoch": 0.6119170212932703, |
| "grad_norm": 0.08410289883613586, |
| "learning_rate": 6.144706745941308e-05, |
| "loss": 8.6546, |
| "step": 39040, |
| "throughput": 12519.151087954797 |
| }, |
| { |
| "epoch": 0.6124185926221992, |
| "grad_norm": 0.08022049069404602, |
| "learning_rate": 6.127139075583363e-05, |
| "loss": 8.661, |
| "step": 39072, |
| "throughput": 12519.139449349837 |
| }, |
| { |
| "epoch": 0.6129201639511281, |
| "grad_norm": 0.08267541974782944, |
| "learning_rate": 6.109614182934616e-05, |
| "loss": 8.6619, |
| "step": 39104, |
| "throughput": 12519.196024051356 |
| }, |
| { |
| "epoch": 0.6134217352800571, |
| "grad_norm": 0.08225016295909882, |
| "learning_rate": 6.092132140267775e-05, |
| "loss": 8.649, |
| "step": 39136, |
| "throughput": 12519.243129954537 |
| }, |
| { |
| "epoch": 0.613923306608986, |
| "grad_norm": 0.11314070969820023, |
| "learning_rate": 6.074693019678839e-05, |
| "loss": 8.6704, |
| "step": 39168, |
| "throughput": 12519.263339232122 |
| }, |
| { |
| "epoch": 0.6144248779379149, |
| "grad_norm": 0.08187414705753326, |
| "learning_rate": 6.0572968930867827e-05, |
| "loss": 8.6522, |
| "step": 39200, |
| "throughput": 12519.38006793021 |
| }, |
| { |
| "epoch": 0.6149264492668438, |
| "grad_norm": 0.08883952349424362, |
| "learning_rate": 6.039943832233293e-05, |
| "loss": 8.6594, |
| "step": 39232, |
| "throughput": 12519.518153664507 |
| }, |
| { |
| "epoch": 0.6154280205957727, |
| "grad_norm": 0.08027220517396927, |
| "learning_rate": 6.022633908682442e-05, |
| "loss": 8.6596, |
| "step": 39264, |
| "throughput": 12519.657498292741 |
| }, |
| { |
| "epoch": 0.6159295919247016, |
| "grad_norm": 0.07722441852092743, |
| "learning_rate": 6.005367193820408e-05, |
| "loss": 8.6551, |
| "step": 39296, |
| "throughput": 12519.71106762286 |
| }, |
| { |
| "epoch": 0.6164311632536306, |
| "grad_norm": 0.08117758482694626, |
| "learning_rate": 5.9881437588551675e-05, |
| "loss": 8.66, |
| "step": 39328, |
| "throughput": 12519.773714208372 |
| }, |
| { |
| "epoch": 0.6169327345825594, |
| "grad_norm": 0.08332060277462006, |
| "learning_rate": 5.970963674816224e-05, |
| "loss": 8.6514, |
| "step": 39360, |
| "throughput": 12519.722993629892 |
| }, |
| { |
| "epoch": 0.6174343059114883, |
| "grad_norm": 0.08834696561098099, |
| "learning_rate": 5.953827012554291e-05, |
| "loss": 8.6643, |
| "step": 39392, |
| "throughput": 12519.569812218686 |
| }, |
| { |
| "epoch": 0.6179358772404172, |
| "grad_norm": 0.08121950924396515, |
| "learning_rate": 5.9367338427410197e-05, |
| "loss": 8.6577, |
| "step": 39424, |
| "throughput": 12519.582037618115 |
| }, |
| { |
| "epoch": 0.6184374485693461, |
| "grad_norm": 0.08764735609292984, |
| "learning_rate": 5.9196842358686866e-05, |
| "loss": 8.6642, |
| "step": 39456, |
| "throughput": 12519.679513734258 |
| }, |
| { |
| "epoch": 0.618939019898275, |
| "grad_norm": 0.08493324369192123, |
| "learning_rate": 5.902678262249923e-05, |
| "loss": 8.6652, |
| "step": 39488, |
| "throughput": 12519.700233712612 |
| }, |
| { |
| "epoch": 0.619440591227204, |
| "grad_norm": 0.08276000618934631, |
| "learning_rate": 5.885715992017419e-05, |
| "loss": 8.6494, |
| "step": 39520, |
| "throughput": 12519.772592101563 |
| }, |
| { |
| "epoch": 0.6199421625561329, |
| "grad_norm": 0.0843217521905899, |
| "learning_rate": 5.86879749512362e-05, |
| "loss": 8.6553, |
| "step": 39552, |
| "throughput": 12519.908684142914 |
| }, |
| { |
| "epoch": 0.6204437338850618, |
| "grad_norm": 0.0830702856183052, |
| "learning_rate": 5.851922841340461e-05, |
| "loss": 8.659, |
| "step": 39584, |
| "throughput": 12520.048560578924 |
| }, |
| { |
| "epoch": 0.6209453052139907, |
| "grad_norm": 0.09729384630918503, |
| "learning_rate": 5.835092100259063e-05, |
| "loss": 8.6474, |
| "step": 39616, |
| "throughput": 12520.16554718025 |
| }, |
| { |
| "epoch": 0.6214468765429196, |
| "grad_norm": 0.07610440254211426, |
| "learning_rate": 5.818305341289458e-05, |
| "loss": 8.6655, |
| "step": 39648, |
| "throughput": 12520.177816100835 |
| }, |
| { |
| "epoch": 0.6219484478718486, |
| "grad_norm": 0.07896667718887329, |
| "learning_rate": 5.8015626336602814e-05, |
| "loss": 8.6495, |
| "step": 39680, |
| "throughput": 12520.13873298695 |
| }, |
| { |
| "epoch": 0.6224500192007775, |
| "grad_norm": 0.08647879958152771, |
| "learning_rate": 5.7848640464185124e-05, |
| "loss": 8.6696, |
| "step": 39712, |
| "throughput": 12519.979631811184 |
| }, |
| { |
| "epoch": 0.6229515905297064, |
| "grad_norm": 0.07803455740213394, |
| "learning_rate": 5.768209648429174e-05, |
| "loss": 8.6655, |
| "step": 39744, |
| "throughput": 12519.932013667134 |
| }, |
| { |
| "epoch": 0.6234531618586353, |
| "grad_norm": 0.08229484409093857, |
| "learning_rate": 5.751599508375059e-05, |
| "loss": 8.664, |
| "step": 39776, |
| "throughput": 12520.069857426954 |
| }, |
| { |
| "epoch": 0.6239547331875641, |
| "grad_norm": 0.07795999199151993, |
| "learning_rate": 5.735033694756423e-05, |
| "loss": 8.649, |
| "step": 39808, |
| "throughput": 12520.027184068515 |
| }, |
| { |
| "epoch": 0.624456304516493, |
| "grad_norm": 0.07954669743776321, |
| "learning_rate": 5.718512275890737e-05, |
| "loss": 8.6491, |
| "step": 39840, |
| "throughput": 12520.12953119863 |
| }, |
| { |
| "epoch": 0.624957875845422, |
| "grad_norm": 0.08642455190420151, |
| "learning_rate": 5.70203531991238e-05, |
| "loss": 8.6537, |
| "step": 39872, |
| "throughput": 12520.23675201523 |
| }, |
| { |
| "epoch": 0.6254594471743509, |
| "grad_norm": 0.08800368756055832, |
| "learning_rate": 5.6856028947723734e-05, |
| "loss": 8.6564, |
| "step": 39904, |
| "throughput": 12520.373320639146 |
| }, |
| { |
| "epoch": 0.6259610185032798, |
| "grad_norm": 0.08899801969528198, |
| "learning_rate": 5.669215068238075e-05, |
| "loss": 8.6403, |
| "step": 39936, |
| "throughput": 12520.487837263145 |
| }, |
| { |
| "epoch": 0.6264625898322087, |
| "grad_norm": 0.07982466369867325, |
| "learning_rate": 5.652871907892934e-05, |
| "loss": 8.6608, |
| "step": 39968, |
| "throughput": 12520.534128921343 |
| }, |
| { |
| "epoch": 0.6269641611611376, |
| "grad_norm": 0.08430910110473633, |
| "learning_rate": 5.6365734811362026e-05, |
| "loss": 8.6578, |
| "step": 40000, |
| "throughput": 12520.507540367866 |
| }, |
| { |
| "epoch": 0.6274657324900665, |
| "grad_norm": 0.08782041817903519, |
| "learning_rate": 5.620319855182629e-05, |
| "loss": 8.6465, |
| "step": 40032, |
| "throughput": 12520.373597182608 |
| }, |
| { |
| "epoch": 0.6279673038189955, |
| "grad_norm": 0.08348139375448227, |
| "learning_rate": 5.60411109706222e-05, |
| "loss": 8.6222, |
| "step": 40064, |
| "throughput": 12520.378249662394 |
| }, |
| { |
| "epoch": 0.6284688751479244, |
| "grad_norm": 0.07767164707183838, |
| "learning_rate": 5.587947273619938e-05, |
| "loss": 8.6362, |
| "step": 40096, |
| "throughput": 12520.447447399005 |
| }, |
| { |
| "epoch": 0.6289704464768533, |
| "grad_norm": 0.08124253898859024, |
| "learning_rate": 5.5718284515154476e-05, |
| "loss": 8.6423, |
| "step": 40128, |
| "throughput": 12520.503797659545 |
| }, |
| { |
| "epoch": 0.6294720178057822, |
| "grad_norm": 0.08289259672164917, |
| "learning_rate": 5.5557546972228114e-05, |
| "loss": 8.6708, |
| "step": 40160, |
| "throughput": 12520.511064661736 |
| }, |
| { |
| "epoch": 0.6299735891347111, |
| "grad_norm": 0.11718080937862396, |
| "learning_rate": 5.539726077030239e-05, |
| "loss": 8.6634, |
| "step": 40192, |
| "throughput": 12520.621801043319 |
| }, |
| { |
| "epoch": 0.63047516046364, |
| "grad_norm": 0.08058638125658035, |
| "learning_rate": 5.523742657039809e-05, |
| "loss": 8.6504, |
| "step": 40224, |
| "throughput": 12520.757605927323 |
| }, |
| { |
| "epoch": 0.6309767317925689, |
| "grad_norm": 0.08185072988271713, |
| "learning_rate": 5.5078045031672005e-05, |
| "loss": 8.6793, |
| "step": 40256, |
| "throughput": 12520.866675222109 |
| }, |
| { |
| "epoch": 0.6314783031214978, |
| "grad_norm": 0.07993034273386002, |
| "learning_rate": 5.491911681141394e-05, |
| "loss": 8.6437, |
| "step": 40288, |
| "throughput": 12520.907599693744 |
| }, |
| { |
| "epoch": 0.6319798744504267, |
| "grad_norm": 0.08810719847679138, |
| "learning_rate": 5.476064256504443e-05, |
| "loss": 8.6815, |
| "step": 40320, |
| "throughput": 12520.931947301131 |
| }, |
| { |
| "epoch": 0.6324814457793556, |
| "grad_norm": 0.08399613946676254, |
| "learning_rate": 5.460262294611172e-05, |
| "loss": 8.6655, |
| "step": 40352, |
| "throughput": 12520.840774088949 |
| }, |
| { |
| "epoch": 0.6329830171082845, |
| "grad_norm": 0.081191286444664, |
| "learning_rate": 5.444505860628923e-05, |
| "loss": 8.629, |
| "step": 40384, |
| "throughput": 12520.802830375966 |
| }, |
| { |
| "epoch": 0.6334845884372134, |
| "grad_norm": 0.084128238260746, |
| "learning_rate": 5.428795019537268e-05, |
| "loss": 8.6391, |
| "step": 40416, |
| "throughput": 12520.839767156504 |
| }, |
| { |
| "epoch": 0.6339861597661424, |
| "grad_norm": 0.08739369362592697, |
| "learning_rate": 5.413129836127766e-05, |
| "loss": 8.6524, |
| "step": 40448, |
| "throughput": 12520.90426857039 |
| }, |
| { |
| "epoch": 0.6344877310950713, |
| "grad_norm": 0.07963095605373383, |
| "learning_rate": 5.3975103750036805e-05, |
| "loss": 8.6379, |
| "step": 40480, |
| "throughput": 12520.897375271004 |
| }, |
| { |
| "epoch": 0.6349893024240002, |
| "grad_norm": 0.08396175503730774, |
| "learning_rate": 5.3819367005797186e-05, |
| "loss": 8.6349, |
| "step": 40512, |
| "throughput": 12521.006784652312 |
| }, |
| { |
| "epoch": 0.6354908737529291, |
| "grad_norm": 0.08460961282253265, |
| "learning_rate": 5.366408877081752e-05, |
| "loss": 8.6645, |
| "step": 40544, |
| "throughput": 12521.141832276591 |
| }, |
| { |
| "epoch": 0.635992445081858, |
| "grad_norm": 0.08145654946565628, |
| "learning_rate": 5.3509269685465764e-05, |
| "loss": 8.6382, |
| "step": 40576, |
| "throughput": 12521.276430339789 |
| }, |
| { |
| "epoch": 0.636494016410787, |
| "grad_norm": 0.11674635857343674, |
| "learning_rate": 5.3354910388216274e-05, |
| "loss": 8.6558, |
| "step": 40608, |
| "throughput": 12521.312809689112 |
| }, |
| { |
| "epoch": 0.6369955877397159, |
| "grad_norm": 0.09571953862905502, |
| "learning_rate": 5.3201011515647276e-05, |
| "loss": 8.6613, |
| "step": 40640, |
| "throughput": 12521.388907581995 |
| }, |
| { |
| "epoch": 0.6374971590686448, |
| "grad_norm": 0.08281499892473221, |
| "learning_rate": 5.304757370243811e-05, |
| "loss": 8.6383, |
| "step": 40672, |
| "throughput": 12521.366481892124 |
| }, |
| { |
| "epoch": 0.6379987303975736, |
| "grad_norm": 0.08709795773029327, |
| "learning_rate": 5.2894597581366835e-05, |
| "loss": 8.663, |
| "step": 40704, |
| "throughput": 12521.240769983162 |
| }, |
| { |
| "epoch": 0.6385003017265025, |
| "grad_norm": 0.08456841111183167, |
| "learning_rate": 5.274208378330737e-05, |
| "loss": 8.6629, |
| "step": 40736, |
| "throughput": 12521.203232916065 |
| }, |
| { |
| "epoch": 0.6390018730554314, |
| "grad_norm": 0.09472116082906723, |
| "learning_rate": 5.2590032937227154e-05, |
| "loss": 8.6541, |
| "step": 40768, |
| "throughput": 12521.336472196534 |
| }, |
| { |
| "epoch": 0.6395034443843604, |
| "grad_norm": 0.08780498802661896, |
| "learning_rate": 5.2438445670184244e-05, |
| "loss": 8.654, |
| "step": 40800, |
| "throughput": 12521.3264815158 |
| }, |
| { |
| "epoch": 0.6400050157132893, |
| "grad_norm": 0.08326949179172516, |
| "learning_rate": 5.2287322607325e-05, |
| "loss": 8.6358, |
| "step": 40832, |
| "throughput": 12521.392052429343 |
| }, |
| { |
| "epoch": 0.6405065870422182, |
| "grad_norm": 0.08162552863359451, |
| "learning_rate": 5.213666437188141e-05, |
| "loss": 8.6674, |
| "step": 40864, |
| "throughput": 12521.494973148838 |
| }, |
| { |
| "epoch": 0.6410081583711471, |
| "grad_norm": 0.08285222202539444, |
| "learning_rate": 5.1986471585168485e-05, |
| "loss": 8.656, |
| "step": 40896, |
| "throughput": 12521.629309490367 |
| }, |
| { |
| "epoch": 0.641509729700076, |
| "grad_norm": 0.09120822697877884, |
| "learning_rate": 5.183674486658167e-05, |
| "loss": 8.6716, |
| "step": 40928, |
| "throughput": 12521.699504095955 |
| }, |
| { |
| "epoch": 0.6420113010290049, |
| "grad_norm": 0.08420810103416443, |
| "learning_rate": 5.168748483359445e-05, |
| "loss": 8.6607, |
| "step": 40960, |
| "throughput": 12521.75876137181 |
| }, |
| { |
| "epoch": 0.6425128723579339, |
| "grad_norm": 0.07815767079591751, |
| "learning_rate": 5.153869210175563e-05, |
| "loss": 8.6309, |
| "step": 40992, |
| "throughput": 12521.244443250478 |
| }, |
| { |
| "epoch": 0.6430144436868628, |
| "grad_norm": 0.08167236298322678, |
| "learning_rate": 5.139036728468686e-05, |
| "loss": 8.6386, |
| "step": 41024, |
| "throughput": 12521.098366250244 |
| }, |
| { |
| "epoch": 0.6435160150157917, |
| "grad_norm": 0.07946034520864487, |
| "learning_rate": 5.124251099408012e-05, |
| "loss": 8.6276, |
| "step": 41056, |
| "throughput": 12521.02820184281 |
| }, |
| { |
| "epoch": 0.6440175863447206, |
| "grad_norm": 0.08526898920536041, |
| "learning_rate": 5.1095123839695224e-05, |
| "loss": 8.6517, |
| "step": 41088, |
| "throughput": 12521.155406940761 |
| }, |
| { |
| "epoch": 0.6445191576736495, |
| "grad_norm": 0.08295472711324692, |
| "learning_rate": 5.0948206429357224e-05, |
| "loss": 8.6621, |
| "step": 41120, |
| "throughput": 12521.166390281849 |
| }, |
| { |
| "epoch": 0.6450207290025783, |
| "grad_norm": 0.09882532060146332, |
| "learning_rate": 5.080175936895392e-05, |
| "loss": 8.6701, |
| "step": 41152, |
| "throughput": 12521.21875397715 |
| }, |
| { |
| "epoch": 0.6455223003315073, |
| "grad_norm": 0.07845364511013031, |
| "learning_rate": 5.065578326243348e-05, |
| "loss": 8.6176, |
| "step": 41184, |
| "throughput": 12521.31994753445 |
| }, |
| { |
| "epoch": 0.6460238716604362, |
| "grad_norm": 0.08402087539434433, |
| "learning_rate": 5.0510278711801735e-05, |
| "loss": 8.6548, |
| "step": 41216, |
| "throughput": 12521.449296348317 |
| }, |
| { |
| "epoch": 0.6465254429893651, |
| "grad_norm": 0.08985952287912369, |
| "learning_rate": 5.036524631711996e-05, |
| "loss": 8.6473, |
| "step": 41248, |
| "throughput": 12521.563405601884 |
| }, |
| { |
| "epoch": 0.647027014318294, |
| "grad_norm": 0.07821807265281677, |
| "learning_rate": 5.02206866765021e-05, |
| "loss": 8.6453, |
| "step": 41280, |
| "throughput": 12521.576374610004 |
| }, |
| { |
| "epoch": 0.6475285856472229, |
| "grad_norm": 0.08190401643514633, |
| "learning_rate": 5.007660038611259e-05, |
| "loss": 8.6582, |
| "step": 41312, |
| "throughput": 12521.573527242897 |
| }, |
| { |
| "epoch": 0.6480301569761518, |
| "grad_norm": 0.0907343253493309, |
| "learning_rate": 4.9932988040163726e-05, |
| "loss": 8.6428, |
| "step": 41344, |
| "throughput": 12521.475130040359 |
| }, |
| { |
| "epoch": 0.6485317283050808, |
| "grad_norm": 0.08307422697544098, |
| "learning_rate": 4.978985023091324e-05, |
| "loss": 8.6496, |
| "step": 41376, |
| "throughput": 12521.449891877653 |
| }, |
| { |
| "epoch": 0.6490332996340097, |
| "grad_norm": 0.0786016508936882, |
| "learning_rate": 4.964718754866186e-05, |
| "loss": 8.6396, |
| "step": 41408, |
| "throughput": 12521.499320841976 |
| }, |
| { |
| "epoch": 0.6495348709629386, |
| "grad_norm": 0.08367376029491425, |
| "learning_rate": 4.95050005817509e-05, |
| "loss": 8.6546, |
| "step": 41440, |
| "throughput": 12521.56282824873 |
| }, |
| { |
| "epoch": 0.6500364422918675, |
| "grad_norm": 0.08345983922481537, |
| "learning_rate": 4.936328991655988e-05, |
| "loss": 8.6153, |
| "step": 41472, |
| "throughput": 12521.561639231697 |
| }, |
| { |
| "epoch": 0.6505380136207964, |
| "grad_norm": 0.08260690420866013, |
| "learning_rate": 4.9222056137504e-05, |
| "loss": 8.6362, |
| "step": 41504, |
| "throughput": 12521.667206337985 |
| }, |
| { |
| "epoch": 0.6510395849497254, |
| "grad_norm": 0.08155972510576248, |
| "learning_rate": 4.908129982703169e-05, |
| "loss": 8.6391, |
| "step": 41536, |
| "throughput": 12521.796105573369 |
| }, |
| { |
| "epoch": 0.6515411562786543, |
| "grad_norm": 0.08556462824344635, |
| "learning_rate": 4.8941021565622516e-05, |
| "loss": 8.6523, |
| "step": 41568, |
| "throughput": 12521.898259358102 |
| }, |
| { |
| "epoch": 0.6520427276075831, |
| "grad_norm": 0.0759713277220726, |
| "learning_rate": 4.880122193178441e-05, |
| "loss": 8.651, |
| "step": 41600, |
| "throughput": 12521.960176028337 |
| }, |
| { |
| "epoch": 0.652544298936512, |
| "grad_norm": 0.08541751652956009, |
| "learning_rate": 4.866190150205143e-05, |
| "loss": 8.6432, |
| "step": 41632, |
| "throughput": 12521.945643081093 |
| }, |
| { |
| "epoch": 0.6530458702654409, |
| "grad_norm": 0.08364102244377136, |
| "learning_rate": 4.8523060850981476e-05, |
| "loss": 8.6236, |
| "step": 41664, |
| "throughput": 12521.91633035345 |
| }, |
| { |
| "epoch": 0.6535474415943698, |
| "grad_norm": 0.09959270060062408, |
| "learning_rate": 4.838470055115379e-05, |
| "loss": 8.6442, |
| "step": 41696, |
| "throughput": 12521.843651647958 |
| }, |
| { |
| "epoch": 0.6540490129232988, |
| "grad_norm": 0.08332763612270355, |
| "learning_rate": 4.82468211731667e-05, |
| "loss": 8.6504, |
| "step": 41728, |
| "throughput": 12521.829790599762 |
| }, |
| { |
| "epoch": 0.6545505842522277, |
| "grad_norm": 0.08586803823709488, |
| "learning_rate": 4.8109423285635116e-05, |
| "loss": 8.6482, |
| "step": 41760, |
| "throughput": 12521.95742416806 |
| }, |
| { |
| "epoch": 0.6550521555811566, |
| "grad_norm": 0.08022906631231308, |
| "learning_rate": 4.797250745518833e-05, |
| "loss": 8.6108, |
| "step": 41792, |
| "throughput": 12521.939657731617 |
| }, |
| { |
| "epoch": 0.6555537269100855, |
| "grad_norm": 0.08036471158266068, |
| "learning_rate": 4.7836074246467685e-05, |
| "loss": 8.6355, |
| "step": 41824, |
| "throughput": 12522.020873759328 |
| }, |
| { |
| "epoch": 0.6560552982390144, |
| "grad_norm": 0.08128458261489868, |
| "learning_rate": 4.770012422212412e-05, |
| "loss": 8.6377, |
| "step": 41856, |
| "throughput": 12522.124525838915 |
| }, |
| { |
| "epoch": 0.6565568695679433, |
| "grad_norm": 0.08254926651716232, |
| "learning_rate": 4.756465794281592e-05, |
| "loss": 8.6364, |
| "step": 41888, |
| "throughput": 12522.22535205011 |
| }, |
| { |
| "epoch": 0.6570584408968723, |
| "grad_norm": 0.08438771218061447, |
| "learning_rate": 4.742967596720641e-05, |
| "loss": 8.6498, |
| "step": 41920, |
| "throughput": 12522.306833724853 |
| }, |
| { |
| "epoch": 0.6575600122258012, |
| "grad_norm": 0.08102980256080627, |
| "learning_rate": 4.729517885196169e-05, |
| "loss": 8.6601, |
| "step": 41952, |
| "throughput": 12522.312067211904 |
| }, |
| { |
| "epoch": 0.6580615835547301, |
| "grad_norm": 0.0832732692360878, |
| "learning_rate": 4.716116715174827e-05, |
| "loss": 8.6327, |
| "step": 41984, |
| "throughput": 12522.290298437994 |
| }, |
| { |
| "epoch": 0.6585631548836589, |
| "grad_norm": 0.08147306740283966, |
| "learning_rate": 4.702764141923075e-05, |
| "loss": 8.6518, |
| "step": 42016, |
| "throughput": 12522.187377477721 |
| }, |
| { |
| "epoch": 0.6590647262125878, |
| "grad_norm": 0.08547472208738327, |
| "learning_rate": 4.6894602205069674e-05, |
| "loss": 8.6239, |
| "step": 42048, |
| "throughput": 12522.12719798037 |
| }, |
| { |
| "epoch": 0.6595662975415167, |
| "grad_norm": 0.08419306576251984, |
| "learning_rate": 4.6762050057919165e-05, |
| "loss": 8.6273, |
| "step": 42080, |
| "throughput": 12522.252802743767 |
| }, |
| { |
| "epoch": 0.6600678688704457, |
| "grad_norm": 0.08612877875566483, |
| "learning_rate": 4.6629985524424686e-05, |
| "loss": 8.6288, |
| "step": 42112, |
| "throughput": 12522.27762845165 |
| }, |
| { |
| "epoch": 0.6605694401993746, |
| "grad_norm": 0.08975645899772644, |
| "learning_rate": 4.649840914922071e-05, |
| "loss": 8.6381, |
| "step": 42144, |
| "throughput": 12522.307565885858 |
| }, |
| { |
| "epoch": 0.6610710115283035, |
| "grad_norm": 0.08304554969072342, |
| "learning_rate": 4.636732147492863e-05, |
| "loss": 8.6272, |
| "step": 42176, |
| "throughput": 12522.40601976463 |
| }, |
| { |
| "epoch": 0.6615725828572324, |
| "grad_norm": 0.08024667203426361, |
| "learning_rate": 4.6236723042154424e-05, |
| "loss": 8.6528, |
| "step": 42208, |
| "throughput": 12522.5278263286 |
| }, |
| { |
| "epoch": 0.6620741541861613, |
| "grad_norm": 0.08220332860946655, |
| "learning_rate": 4.61066143894864e-05, |
| "loss": 8.6424, |
| "step": 42240, |
| "throughput": 12522.587369184974 |
| }, |
| { |
| "epoch": 0.6625757255150903, |
| "grad_norm": 0.08129006624221802, |
| "learning_rate": 4.5976996053492996e-05, |
| "loss": 8.6298, |
| "step": 42272, |
| "throughput": 12522.670632374928 |
| }, |
| { |
| "epoch": 0.6630772968440192, |
| "grad_norm": 0.08768677711486816, |
| "learning_rate": 4.5847868568720646e-05, |
| "loss": 8.6229, |
| "step": 42304, |
| "throughput": 12522.68761818525 |
| }, |
| { |
| "epoch": 0.6635788681729481, |
| "grad_norm": 0.07980692386627197, |
| "learning_rate": 4.571923246769147e-05, |
| "loss": 8.6451, |
| "step": 42336, |
| "throughput": 12522.582664506732 |
| }, |
| { |
| "epoch": 0.664080439501877, |
| "grad_norm": 0.08362894505262375, |
| "learning_rate": 4.559108828090115e-05, |
| "loss": 8.6364, |
| "step": 42368, |
| "throughput": 12522.497043430209 |
| }, |
| { |
| "epoch": 0.6645820108308059, |
| "grad_norm": 0.08338017016649246, |
| "learning_rate": 4.546343653681667e-05, |
| "loss": 8.6472, |
| "step": 42400, |
| "throughput": 12522.586423737033 |
| }, |
| { |
| "epoch": 0.6650835821597348, |
| "grad_norm": 0.09748286753892899, |
| "learning_rate": 4.53362777618742e-05, |
| "loss": 8.6326, |
| "step": 42432, |
| "throughput": 12522.64904954824 |
| }, |
| { |
| "epoch": 0.6655851534886637, |
| "grad_norm": 0.08786331117153168, |
| "learning_rate": 4.52096124804769e-05, |
| "loss": 8.6305, |
| "step": 42464, |
| "throughput": 12522.634563136266 |
| }, |
| { |
| "epoch": 0.6660867248175926, |
| "grad_norm": 0.08766663819551468, |
| "learning_rate": 4.508344121499281e-05, |
| "loss": 8.6313, |
| "step": 42496, |
| "throughput": 12522.761445075928 |
| }, |
| { |
| "epoch": 0.6665882961465215, |
| "grad_norm": 0.08180690556764603, |
| "learning_rate": 4.495776448575255e-05, |
| "loss": 8.6289, |
| "step": 42528, |
| "throughput": 12522.863547315234 |
| }, |
| { |
| "epoch": 0.6670898674754504, |
| "grad_norm": 0.07657379657030106, |
| "learning_rate": 4.483258281104734e-05, |
| "loss": 8.6117, |
| "step": 42560, |
| "throughput": 12522.965550374338 |
| }, |
| { |
| "epoch": 0.6675914388043793, |
| "grad_norm": 0.0844731479883194, |
| "learning_rate": 4.470789670712681e-05, |
| "loss": 8.6299, |
| "step": 42592, |
| "throughput": 12522.990678540467 |
| }, |
| { |
| "epoch": 0.6680930101333082, |
| "grad_norm": 0.08531223982572556, |
| "learning_rate": 4.458370668819676e-05, |
| "loss": 8.6462, |
| "step": 42624, |
| "throughput": 12523.031978335513 |
| }, |
| { |
| "epoch": 0.6685945814622372, |
| "grad_norm": 0.08948558568954468, |
| "learning_rate": 4.4460013266417226e-05, |
| "loss": 8.6194, |
| "step": 42656, |
| "throughput": 12522.988029400258 |
| }, |
| { |
| "epoch": 0.6690961527911661, |
| "grad_norm": 0.08160790055990219, |
| "learning_rate": 4.433681695190027e-05, |
| "loss": 8.6268, |
| "step": 42688, |
| "throughput": 12522.897319792524 |
| }, |
| { |
| "epoch": 0.669597724120095, |
| "grad_norm": 0.08070877939462662, |
| "learning_rate": 4.421411825270785e-05, |
| "loss": 8.6173, |
| "step": 42720, |
| "throughput": 12522.890851115651 |
| }, |
| { |
| "epoch": 0.6700992954490239, |
| "grad_norm": 0.0807216539978981, |
| "learning_rate": 4.4091917674849727e-05, |
| "loss": 8.6364, |
| "step": 42752, |
| "throughput": 12522.989144430921 |
| }, |
| { |
| "epoch": 0.6706008667779528, |
| "grad_norm": 0.07999275624752045, |
| "learning_rate": 4.397021572228147e-05, |
| "loss": 8.6432, |
| "step": 42784, |
| "throughput": 12522.988455472507 |
| }, |
| { |
| "epoch": 0.6711024381068817, |
| "grad_norm": 0.08301256597042084, |
| "learning_rate": 4.38490128969023e-05, |
| "loss": 8.6446, |
| "step": 42816, |
| "throughput": 12523.010419309921 |
| }, |
| { |
| "epoch": 0.6716040094358107, |
| "grad_norm": 0.093504898250103, |
| "learning_rate": 4.3728309698553056e-05, |
| "loss": 8.6468, |
| "step": 42848, |
| "throughput": 12522.775666640384 |
| }, |
| { |
| "epoch": 0.6721055807647396, |
| "grad_norm": 0.07958458364009857, |
| "learning_rate": 4.3608106625014014e-05, |
| "loss": 8.6301, |
| "step": 42880, |
| "throughput": 12522.867752852817 |
| }, |
| { |
| "epoch": 0.6726071520936684, |
| "grad_norm": 0.0886707678437233, |
| "learning_rate": 4.348840417200306e-05, |
| "loss": 8.6371, |
| "step": 42912, |
| "throughput": 12522.919741376949 |
| }, |
| { |
| "epoch": 0.6731087234225973, |
| "grad_norm": 0.08578263223171234, |
| "learning_rate": 4.336920283317343e-05, |
| "loss": 8.6488, |
| "step": 42944, |
| "throughput": 12522.922350456145 |
| }, |
| { |
| "epoch": 0.6736102947515262, |
| "grad_norm": 0.08376625925302505, |
| "learning_rate": 4.325050310011183e-05, |
| "loss": 8.6529, |
| "step": 42976, |
| "throughput": 12522.931695930905 |
| }, |
| { |
| "epoch": 0.6741118660804551, |
| "grad_norm": 0.11295043677091599, |
| "learning_rate": 4.3132305462336306e-05, |
| "loss": 8.6394, |
| "step": 43008, |
| "throughput": 12522.788272499705 |
| }, |
| { |
| "epoch": 0.6746134374093841, |
| "grad_norm": 0.08392587304115295, |
| "learning_rate": 4.301461040729424e-05, |
| "loss": 8.6591, |
| "step": 43040, |
| "throughput": 12522.27830297955 |
| }, |
| { |
| "epoch": 0.675115008738313, |
| "grad_norm": 0.08914206176996231, |
| "learning_rate": 4.289741842036042e-05, |
| "loss": 8.6304, |
| "step": 43072, |
| "throughput": 12522.394654787475 |
| }, |
| { |
| "epoch": 0.6756165800672419, |
| "grad_norm": 0.08153049647808075, |
| "learning_rate": 4.2780729984834916e-05, |
| "loss": 8.6209, |
| "step": 43104, |
| "throughput": 12522.398481053842 |
| }, |
| { |
| "epoch": 0.6761181513961708, |
| "grad_norm": 0.08689522743225098, |
| "learning_rate": 4.266454558194122e-05, |
| "loss": 8.6312, |
| "step": 43136, |
| "throughput": 12522.42026822721 |
| }, |
| { |
| "epoch": 0.6766197227250997, |
| "grad_norm": 0.08201098442077637, |
| "learning_rate": 4.254886569082413e-05, |
| "loss": 8.6142, |
| "step": 43168, |
| "throughput": 12522.51619543128 |
| }, |
| { |
| "epoch": 0.6771212940540287, |
| "grad_norm": 0.09345916658639908, |
| "learning_rate": 4.243369078854788e-05, |
| "loss": 8.6268, |
| "step": 43200, |
| "throughput": 12522.60465925701 |
| }, |
| { |
| "epoch": 0.6776228653829576, |
| "grad_norm": 0.07916395366191864, |
| "learning_rate": 4.231902135009407e-05, |
| "loss": 8.648, |
| "step": 43232, |
| "throughput": 12522.652433449026 |
| }, |
| { |
| "epoch": 0.6781244367118865, |
| "grad_norm": 0.08550101518630981, |
| "learning_rate": 4.220485784835984e-05, |
| "loss": 8.638, |
| "step": 43264, |
| "throughput": 12522.654022435197 |
| }, |
| { |
| "epoch": 0.6786260080408154, |
| "grad_norm": 0.08053749054670334, |
| "learning_rate": 4.209120075415577e-05, |
| "loss": 8.6118, |
| "step": 43296, |
| "throughput": 12522.67075424937 |
| }, |
| { |
| "epoch": 0.6791275793697443, |
| "grad_norm": 0.09553582966327667, |
| "learning_rate": 4.197805053620411e-05, |
| "loss": 8.6166, |
| "step": 43328, |
| "throughput": 12522.60568477544 |
| }, |
| { |
| "epoch": 0.6796291506986731, |
| "grad_norm": 0.08054535835981369, |
| "learning_rate": 4.186540766113665e-05, |
| "loss": 8.6222, |
| "step": 43360, |
| "throughput": 12522.514819398422 |
| }, |
| { |
| "epoch": 0.680130722027602, |
| "grad_norm": 0.08594219386577606, |
| "learning_rate": 4.1753272593492956e-05, |
| "loss": 8.6323, |
| "step": 43392, |
| "throughput": 12522.628488500615 |
| }, |
| { |
| "epoch": 0.680632293356531, |
| "grad_norm": 0.09543893486261368, |
| "learning_rate": 4.1641645795718364e-05, |
| "loss": 8.6415, |
| "step": 43424, |
| "throughput": 12522.637533434876 |
| }, |
| { |
| "epoch": 0.6811338646854599, |
| "grad_norm": 0.08152885735034943, |
| "learning_rate": 4.153052772816217e-05, |
| "loss": 8.6092, |
| "step": 43456, |
| "throughput": 12522.662222482037 |
| }, |
| { |
| "epoch": 0.6816354360143888, |
| "grad_norm": 0.0825868621468544, |
| "learning_rate": 4.141991884907555e-05, |
| "loss": 8.6168, |
| "step": 43488, |
| "throughput": 12522.777858646657 |
| }, |
| { |
| "epoch": 0.6821370073433177, |
| "grad_norm": 0.08323723077774048, |
| "learning_rate": 4.1309819614609865e-05, |
| "loss": 8.618, |
| "step": 43520, |
| "throughput": 12522.84369218315 |
| }, |
| { |
| "epoch": 0.6826385786722466, |
| "grad_norm": 0.09601942449808121, |
| "learning_rate": 4.1200230478814695e-05, |
| "loss": 8.6368, |
| "step": 43552, |
| "throughput": 12522.914836625052 |
| }, |
| { |
| "epoch": 0.6831401500011756, |
| "grad_norm": 0.08802493661642075, |
| "learning_rate": 4.109115189363601e-05, |
| "loss": 8.6315, |
| "step": 43584, |
| "throughput": 12522.984965508142 |
| }, |
| { |
| "epoch": 0.6836417213301045, |
| "grad_norm": 0.08340758085250854, |
| "learning_rate": 4.0982584308914114e-05, |
| "loss": 8.6037, |
| "step": 43616, |
| "throughput": 12522.988422248942 |
| }, |
| { |
| "epoch": 0.6841432926590334, |
| "grad_norm": 0.08395954966545105, |
| "learning_rate": 4.0874528172382114e-05, |
| "loss": 8.632, |
| "step": 43648, |
| "throughput": 12522.980385093575 |
| }, |
| { |
| "epoch": 0.6846448639879623, |
| "grad_norm": 0.08390320092439651, |
| "learning_rate": 4.0766983929663835e-05, |
| "loss": 8.6055, |
| "step": 43680, |
| "throughput": 12522.821441200436 |
| }, |
| { |
| "epoch": 0.6851464353168912, |
| "grad_norm": 0.08433941751718521, |
| "learning_rate": 4.065995202427206e-05, |
| "loss": 8.6176, |
| "step": 43712, |
| "throughput": 12522.842022596295 |
| }, |
| { |
| "epoch": 0.6856480066458202, |
| "grad_norm": 0.07991538941860199, |
| "learning_rate": 4.055343289760664e-05, |
| "loss": 8.6254, |
| "step": 43744, |
| "throughput": 12522.886808340942 |
| }, |
| { |
| "epoch": 0.6861495779747491, |
| "grad_norm": 0.08180645108222961, |
| "learning_rate": 4.0447426988952816e-05, |
| "loss": 8.5996, |
| "step": 43776, |
| "throughput": 12522.927032292622 |
| }, |
| { |
| "epoch": 0.6866511493036779, |
| "grad_norm": 0.08377785980701447, |
| "learning_rate": 4.0341934735479224e-05, |
| "loss": 8.6212, |
| "step": 43808, |
| "throughput": 12522.991394982113 |
| }, |
| { |
| "epoch": 0.6871527206326068, |
| "grad_norm": 0.09080639481544495, |
| "learning_rate": 4.02369565722363e-05, |
| "loss": 8.6394, |
| "step": 43840, |
| "throughput": 12523.084577149331 |
| }, |
| { |
| "epoch": 0.6876542919615357, |
| "grad_norm": 0.08551277220249176, |
| "learning_rate": 4.013249293215422e-05, |
| "loss": 8.597, |
| "step": 43872, |
| "throughput": 12523.135716651132 |
| }, |
| { |
| "epoch": 0.6881558632904646, |
| "grad_norm": 0.07742547988891602, |
| "learning_rate": 4.0028544246041406e-05, |
| "loss": 8.6112, |
| "step": 43904, |
| "throughput": 12523.209614895739 |
| }, |
| { |
| "epoch": 0.6886574346193935, |
| "grad_norm": 0.0808200091123581, |
| "learning_rate": 3.99251109425825e-05, |
| "loss": 8.6392, |
| "step": 43936, |
| "throughput": 12523.23536883359 |
| }, |
| { |
| "epoch": 0.6891590059483225, |
| "grad_norm": 0.08219944685697556, |
| "learning_rate": 3.982219344833681e-05, |
| "loss": 8.6295, |
| "step": 43968, |
| "throughput": 12523.249453557088 |
| }, |
| { |
| "epoch": 0.6896605772772514, |
| "grad_norm": 0.07823171466588974, |
| "learning_rate": 3.971979218773634e-05, |
| "loss": 8.604, |
| "step": 44000, |
| "throughput": 12523.173674158134 |
| }, |
| { |
| "epoch": 0.6901621486061803, |
| "grad_norm": 0.083199642598629, |
| "learning_rate": 3.961790758308418e-05, |
| "loss": 8.619, |
| "step": 44032, |
| "throughput": 12523.113882214477 |
| }, |
| { |
| "epoch": 0.6906637199351092, |
| "grad_norm": 0.08003423362970352, |
| "learning_rate": 3.951654005455281e-05, |
| "loss": 8.6139, |
| "step": 44064, |
| "throughput": 12523.2006829187 |
| }, |
| { |
| "epoch": 0.6911652912640381, |
| "grad_norm": 0.08449212461709976, |
| "learning_rate": 3.9415690020182154e-05, |
| "loss": 8.6242, |
| "step": 44096, |
| "throughput": 12523.240405619921 |
| }, |
| { |
| "epoch": 0.6916668625929671, |
| "grad_norm": 0.08138830214738846, |
| "learning_rate": 3.9315357895878066e-05, |
| "loss": 8.6153, |
| "step": 44128, |
| "throughput": 12523.267847555973 |
| }, |
| { |
| "epoch": 0.692168433921896, |
| "grad_norm": 0.08120720088481903, |
| "learning_rate": 3.921554409541053e-05, |
| "loss": 8.6142, |
| "step": 44160, |
| "throughput": 12523.359212932843 |
| }, |
| { |
| "epoch": 0.6926700052508249, |
| "grad_norm": 0.08653061091899872, |
| "learning_rate": 3.911624903041198e-05, |
| "loss": 8.6334, |
| "step": 44192, |
| "throughput": 12523.44384977313 |
| }, |
| { |
| "epoch": 0.6931715765797538, |
| "grad_norm": 0.08123726397752762, |
| "learning_rate": 3.9017473110375525e-05, |
| "loss": 8.6243, |
| "step": 44224, |
| "throughput": 12523.48548232217 |
| }, |
| { |
| "epoch": 0.6936731479086826, |
| "grad_norm": 0.0898829996585846, |
| "learning_rate": 3.891921674265336e-05, |
| "loss": 8.6052, |
| "step": 44256, |
| "throughput": 12523.506511880501 |
| }, |
| { |
| "epoch": 0.6941747192376115, |
| "grad_norm": 0.08948096632957458, |
| "learning_rate": 3.8821480332455024e-05, |
| "loss": 8.6146, |
| "step": 44288, |
| "throughput": 12523.524708510986 |
| }, |
| { |
| "epoch": 0.6946762905665405, |
| "grad_norm": 0.0792609453201294, |
| "learning_rate": 3.87242642828458e-05, |
| "loss": 8.6346, |
| "step": 44320, |
| "throughput": 12523.469758766978 |
| }, |
| { |
| "epoch": 0.6951778618954694, |
| "grad_norm": 0.08997055888175964, |
| "learning_rate": 3.862756899474493e-05, |
| "loss": 8.6231, |
| "step": 44352, |
| "throughput": 12523.368936179342 |
| }, |
| { |
| "epoch": 0.6956794332243983, |
| "grad_norm": 0.08132538944482803, |
| "learning_rate": 3.853139486692408e-05, |
| "loss": 8.5962, |
| "step": 44384, |
| "throughput": 12523.454513690915 |
| }, |
| { |
| "epoch": 0.6961810045533272, |
| "grad_norm": 0.08895617723464966, |
| "learning_rate": 3.843574229600565e-05, |
| "loss": 8.6124, |
| "step": 44416, |
| "throughput": 12523.481205386239 |
| }, |
| { |
| "epoch": 0.6966825758822561, |
| "grad_norm": 0.07943489402532578, |
| "learning_rate": 3.834061167646112e-05, |
| "loss": 8.6234, |
| "step": 44448, |
| "throughput": 12523.50126168083 |
| }, |
| { |
| "epoch": 0.697184147211185, |
| "grad_norm": 0.0908041000366211, |
| "learning_rate": 3.8246003400609424e-05, |
| "loss": 8.6159, |
| "step": 44480, |
| "throughput": 12523.608974555584 |
| }, |
| { |
| "epoch": 0.697685718540114, |
| "grad_norm": 0.08159472048282623, |
| "learning_rate": 3.81519178586154e-05, |
| "loss": 8.6132, |
| "step": 44512, |
| "throughput": 12523.664315294256 |
| }, |
| { |
| "epoch": 0.6981872898690429, |
| "grad_norm": 0.08212780207395554, |
| "learning_rate": 3.805835543848809e-05, |
| "loss": 8.6418, |
| "step": 44544, |
| "throughput": 12523.696854249029 |
| }, |
| { |
| "epoch": 0.6986888611979718, |
| "grad_norm": 0.08898486942052841, |
| "learning_rate": 3.796531652607919e-05, |
| "loss": 8.6337, |
| "step": 44576, |
| "throughput": 12523.767501252678 |
| }, |
| { |
| "epoch": 0.6991904325269007, |
| "grad_norm": 0.07757981866598129, |
| "learning_rate": 3.7872801505081434e-05, |
| "loss": 8.64, |
| "step": 44608, |
| "throughput": 12523.763177558329 |
| }, |
| { |
| "epoch": 0.6996920038558296, |
| "grad_norm": 0.09902704507112503, |
| "learning_rate": 3.778081075702709e-05, |
| "loss": 8.6096, |
| "step": 44640, |
| "throughput": 12523.724903622915 |
| }, |
| { |
| "epoch": 0.7001935751847586, |
| "grad_norm": 0.09403155744075775, |
| "learning_rate": 3.7689344661286264e-05, |
| "loss": 8.6372, |
| "step": 44672, |
| "throughput": 12523.62412386298 |
| }, |
| { |
| "epoch": 0.7006951465136874, |
| "grad_norm": 0.07636962831020355, |
| "learning_rate": 3.759840359506536e-05, |
| "loss": 8.6089, |
| "step": 44704, |
| "throughput": 12523.68024082709 |
| }, |
| { |
| "epoch": 0.7011967178426163, |
| "grad_norm": 0.07870710641145706, |
| "learning_rate": 3.750798793340565e-05, |
| "loss": 8.6243, |
| "step": 44736, |
| "throughput": 12523.730161145108 |
| }, |
| { |
| "epoch": 0.7016982891715452, |
| "grad_norm": 0.09148543328046799, |
| "learning_rate": 3.7418098049181573e-05, |
| "loss": 8.635, |
| "step": 44768, |
| "throughput": 12523.798281632704 |
| }, |
| { |
| "epoch": 0.7021998605004741, |
| "grad_norm": 0.10121971368789673, |
| "learning_rate": 3.732873431309929e-05, |
| "loss": 8.6121, |
| "step": 44800, |
| "throughput": 12523.82858597023 |
| }, |
| { |
| "epoch": 0.702701431829403, |
| "grad_norm": 0.09453009814023972, |
| "learning_rate": 3.7239897093695106e-05, |
| "loss": 8.623, |
| "step": 44832, |
| "throughput": 12523.891706175975 |
| }, |
| { |
| "epoch": 0.703203003158332, |
| "grad_norm": 0.08002530783414841, |
| "learning_rate": 3.715158675733396e-05, |
| "loss": 8.6228, |
| "step": 44864, |
| "throughput": 12523.936516927986 |
| }, |
| { |
| "epoch": 0.7037045744872609, |
| "grad_norm": 0.08112312108278275, |
| "learning_rate": 3.706380366820796e-05, |
| "loss": 8.6189, |
| "step": 44896, |
| "throughput": 12523.96820859386 |
| }, |
| { |
| "epoch": 0.7042061458161898, |
| "grad_norm": 0.08807221800088882, |
| "learning_rate": 3.6976548188334834e-05, |
| "loss": 8.6011, |
| "step": 44928, |
| "throughput": 12524.00821230705 |
| }, |
| { |
| "epoch": 0.7047077171451187, |
| "grad_norm": 0.07938039302825928, |
| "learning_rate": 3.688982067755642e-05, |
| "loss": 8.5999, |
| "step": 44960, |
| "throughput": 12524.02571071888 |
| }, |
| { |
| "epoch": 0.7052092884740476, |
| "grad_norm": 0.08693437278270721, |
| "learning_rate": 3.680362149353724e-05, |
| "loss": 8.6299, |
| "step": 44992, |
| "throughput": 12523.882489605088 |
| }, |
| { |
| "epoch": 0.7057108598029765, |
| "grad_norm": 0.08155670017004013, |
| "learning_rate": 3.671795099176297e-05, |
| "loss": 8.6078, |
| "step": 45024, |
| "throughput": 12523.846569313075 |
| }, |
| { |
| "epoch": 0.7062124311319055, |
| "grad_norm": 0.08850400149822235, |
| "learning_rate": 3.6632809525539055e-05, |
| "loss": 8.6215, |
| "step": 45056, |
| "throughput": 12523.903709696035 |
| }, |
| { |
| "epoch": 0.7067140024608344, |
| "grad_norm": 0.08342622220516205, |
| "learning_rate": 3.6548197445989086e-05, |
| "loss": 8.6262, |
| "step": 45088, |
| "throughput": 12523.479201287208 |
| }, |
| { |
| "epoch": 0.7072155737897633, |
| "grad_norm": 0.07904572039842606, |
| "learning_rate": 3.6464115102053596e-05, |
| "loss": 8.6112, |
| "step": 45120, |
| "throughput": 12523.491504430669 |
| }, |
| { |
| "epoch": 0.7077171451186921, |
| "grad_norm": 0.08593115955591202, |
| "learning_rate": 3.6380562840488376e-05, |
| "loss": 8.6333, |
| "step": 45152, |
| "throughput": 12523.580804834894 |
| }, |
| { |
| "epoch": 0.708218716447621, |
| "grad_norm": 0.07867827266454697, |
| "learning_rate": 3.629754100586323e-05, |
| "loss": 8.6095, |
| "step": 45184, |
| "throughput": 12523.619096618833 |
| }, |
| { |
| "epoch": 0.7087202877765499, |
| "grad_norm": 0.0866331160068512, |
| "learning_rate": 3.6215049940560433e-05, |
| "loss": 8.6319, |
| "step": 45216, |
| "throughput": 12523.682169342635 |
| }, |
| { |
| "epoch": 0.7092218591054789, |
| "grad_norm": 0.089105524122715, |
| "learning_rate": 3.613308998477339e-05, |
| "loss": 8.5836, |
| "step": 45248, |
| "throughput": 12523.703755312761 |
| }, |
| { |
| "epoch": 0.7097234304344078, |
| "grad_norm": 0.07944278419017792, |
| "learning_rate": 3.605166147650517e-05, |
| "loss": 8.611, |
| "step": 45280, |
| "throughput": 12523.690922392816 |
| }, |
| { |
| "epoch": 0.7102250017633367, |
| "grad_norm": 0.07689522206783295, |
| "learning_rate": 3.597076475156726e-05, |
| "loss": 8.633, |
| "step": 45312, |
| "throughput": 12523.659284638932 |
| }, |
| { |
| "epoch": 0.7107265730922656, |
| "grad_norm": 0.08848965167999268, |
| "learning_rate": 3.589040014357791e-05, |
| "loss": 8.6344, |
| "step": 45344, |
| "throughput": 12523.5316571319 |
| }, |
| { |
| "epoch": 0.7112281444211945, |
| "grad_norm": 0.0853632241487503, |
| "learning_rate": 3.581056798396105e-05, |
| "loss": 8.6217, |
| "step": 45376, |
| "throughput": 12523.58453266287 |
| }, |
| { |
| "epoch": 0.7117297157501234, |
| "grad_norm": 0.09939948469400406, |
| "learning_rate": 3.57312686019447e-05, |
| "loss": 8.6062, |
| "step": 45408, |
| "throughput": 12523.692480312704 |
| }, |
| { |
| "epoch": 0.7122312870790524, |
| "grad_norm": 0.08067552000284195, |
| "learning_rate": 3.565250232455983e-05, |
| "loss": 8.6223, |
| "step": 45440, |
| "throughput": 12523.672272983184 |
| }, |
| { |
| "epoch": 0.7127328584079813, |
| "grad_norm": 0.08435127884149551, |
| "learning_rate": 3.55742694766387e-05, |
| "loss": 8.61, |
| "step": 45472, |
| "throughput": 12523.780598951442 |
| }, |
| { |
| "epoch": 0.7132344297369102, |
| "grad_norm": 0.100026935338974, |
| "learning_rate": 3.549657038081386e-05, |
| "loss": 8.616, |
| "step": 45504, |
| "throughput": 12523.806392368822 |
| }, |
| { |
| "epoch": 0.7137360010658391, |
| "grad_norm": 0.08343475311994553, |
| "learning_rate": 3.5419405357516624e-05, |
| "loss": 8.6031, |
| "step": 45536, |
| "throughput": 12523.875363752408 |
| }, |
| { |
| "epoch": 0.714237572394768, |
| "grad_norm": 0.08745314925909042, |
| "learning_rate": 3.534277472497574e-05, |
| "loss": 8.6085, |
| "step": 45568, |
| "throughput": 12523.903771054958 |
| }, |
| { |
| "epoch": 0.7147391437236968, |
| "grad_norm": 0.0844145342707634, |
| "learning_rate": 3.52666787992162e-05, |
| "loss": 8.6286, |
| "step": 45600, |
| "throughput": 12523.90038549103 |
| }, |
| { |
| "epoch": 0.7152407150526258, |
| "grad_norm": 0.08013051003217697, |
| "learning_rate": 3.519111789405779e-05, |
| "loss": 8.6428, |
| "step": 45632, |
| "throughput": 12523.89713389975 |
| }, |
| { |
| "epoch": 0.7157422863815547, |
| "grad_norm": 0.08158287405967712, |
| "learning_rate": 3.5116092321113936e-05, |
| "loss": 8.6173, |
| "step": 45664, |
| "throughput": 12523.80169957661 |
| }, |
| { |
| "epoch": 0.7162438577104836, |
| "grad_norm": 0.08240120112895966, |
| "learning_rate": 3.504160238979032e-05, |
| "loss": 8.5966, |
| "step": 45696, |
| "throughput": 12523.83175255946 |
| }, |
| { |
| "epoch": 0.7167454290394125, |
| "grad_norm": 0.08658699691295624, |
| "learning_rate": 3.496764840728361e-05, |
| "loss": 8.5956, |
| "step": 45728, |
| "throughput": 12523.899073897346 |
| }, |
| { |
| "epoch": 0.7172470003683414, |
| "grad_norm": 0.12535245716571808, |
| "learning_rate": 3.489423067858027e-05, |
| "loss": 8.6085, |
| "step": 45760, |
| "throughput": 12523.970433581719 |
| }, |
| { |
| "epoch": 0.7177485716972704, |
| "grad_norm": 0.09029490500688553, |
| "learning_rate": 3.4821349506455255e-05, |
| "loss": 8.6238, |
| "step": 45792, |
| "throughput": 12523.995812741854 |
| }, |
| { |
| "epoch": 0.7182501430261993, |
| "grad_norm": 0.08282382786273956, |
| "learning_rate": 3.47490051914707e-05, |
| "loss": 8.5885, |
| "step": 45824, |
| "throughput": 12524.055137789073 |
| }, |
| { |
| "epoch": 0.7187517143551282, |
| "grad_norm": 0.08247379213571548, |
| "learning_rate": 3.4677198031974784e-05, |
| "loss": 8.6082, |
| "step": 45856, |
| "throughput": 12524.080343281556 |
| }, |
| { |
| "epoch": 0.7192532856840571, |
| "grad_norm": 0.08638311922550201, |
| "learning_rate": 3.4605928324100444e-05, |
| "loss": 8.6297, |
| "step": 45888, |
| "throughput": 12524.10711505808 |
| }, |
| { |
| "epoch": 0.719754857012986, |
| "grad_norm": 0.09089305996894836, |
| "learning_rate": 3.45351963617642e-05, |
| "loss": 8.5998, |
| "step": 45920, |
| "throughput": 12524.126242425193 |
| }, |
| { |
| "epoch": 0.720256428341915, |
| "grad_norm": 0.11494186520576477, |
| "learning_rate": 3.446500243666481e-05, |
| "loss": 8.6295, |
| "step": 45952, |
| "throughput": 12524.163220188617 |
| }, |
| { |
| "epoch": 0.7207579996708439, |
| "grad_norm": 0.0827856957912445, |
| "learning_rate": 3.439534683828228e-05, |
| "loss": 8.6133, |
| "step": 45984, |
| "throughput": 12524.0293047988 |
| }, |
| { |
| "epoch": 0.7212595709997727, |
| "grad_norm": 0.08527000993490219, |
| "learning_rate": 3.4326229853876475e-05, |
| "loss": 8.6231, |
| "step": 46016, |
| "throughput": 12524.019004494541 |
| }, |
| { |
| "epoch": 0.7217611423287016, |
| "grad_norm": 0.09022502601146698, |
| "learning_rate": 3.425765176848607e-05, |
| "loss": 8.6127, |
| "step": 46048, |
| "throughput": 12524.058892874886 |
| }, |
| { |
| "epoch": 0.7222627136576305, |
| "grad_norm": 0.08133542537689209, |
| "learning_rate": 3.418961286492728e-05, |
| "loss": 8.6219, |
| "step": 46080, |
| "throughput": 12524.13604167911 |
| }, |
| { |
| "epoch": 0.7227642849865594, |
| "grad_norm": 0.08808305859565735, |
| "learning_rate": 3.412211342379273e-05, |
| "loss": 8.6263, |
| "step": 46112, |
| "throughput": 12524.171982740596 |
| }, |
| { |
| "epoch": 0.7232658563154883, |
| "grad_norm": 0.0857241079211235, |
| "learning_rate": 3.405515372345033e-05, |
| "loss": 8.6126, |
| "step": 46144, |
| "throughput": 12524.256170090268 |
| }, |
| { |
| "epoch": 0.7237674276444173, |
| "grad_norm": 0.08425363153219223, |
| "learning_rate": 3.398873404004209e-05, |
| "loss": 8.6241, |
| "step": 46176, |
| "throughput": 12524.295988496866 |
| }, |
| { |
| "epoch": 0.7242689989733462, |
| "grad_norm": 0.10848300904035568, |
| "learning_rate": 3.392285464748298e-05, |
| "loss": 8.6233, |
| "step": 46208, |
| "throughput": 12524.335729369192 |
| }, |
| { |
| "epoch": 0.7247705703022751, |
| "grad_norm": 0.10150907933712006, |
| "learning_rate": 3.385751581745979e-05, |
| "loss": 8.6177, |
| "step": 46240, |
| "throughput": 12524.332371447043 |
| }, |
| { |
| "epoch": 0.725272141631204, |
| "grad_norm": 0.0821448266506195, |
| "learning_rate": 3.379271781943007e-05, |
| "loss": 8.5999, |
| "step": 46272, |
| "throughput": 12524.365300158324 |
| }, |
| { |
| "epoch": 0.7257737129601329, |
| "grad_norm": 0.0838143527507782, |
| "learning_rate": 3.372846092062095e-05, |
| "loss": 8.6133, |
| "step": 46304, |
| "throughput": 12524.275482011071 |
| }, |
| { |
| "epoch": 0.7262752842890619, |
| "grad_norm": 0.07988490909337997, |
| "learning_rate": 3.366474538602806e-05, |
| "loss": 8.6177, |
| "step": 46336, |
| "throughput": 12524.206904659482 |
| }, |
| { |
| "epoch": 0.7267768556179908, |
| "grad_norm": 0.0871606096625328, |
| "learning_rate": 3.3601571478414455e-05, |
| "loss": 8.5967, |
| "step": 46368, |
| "throughput": 12524.257923440766 |
| }, |
| { |
| "epoch": 0.7272784269469197, |
| "grad_norm": 0.08344225585460663, |
| "learning_rate": 3.3538939458309556e-05, |
| "loss": 8.6105, |
| "step": 46400, |
| "throughput": 12524.36591856638 |
| }, |
| { |
| "epoch": 0.7277799982758486, |
| "grad_norm": 0.08372768014669418, |
| "learning_rate": 3.347684958400795e-05, |
| "loss": 8.5999, |
| "step": 46432, |
| "throughput": 12524.353684013731 |
| }, |
| { |
| "epoch": 0.7282815696047774, |
| "grad_norm": 0.07885193079710007, |
| "learning_rate": 3.341530211156847e-05, |
| "loss": 8.6069, |
| "step": 46464, |
| "throughput": 12524.435881493126 |
| }, |
| { |
| "epoch": 0.7287831409337063, |
| "grad_norm": 0.07780560851097107, |
| "learning_rate": 3.33542972948131e-05, |
| "loss": 8.6087, |
| "step": 46496, |
| "throughput": 12524.477125348127 |
| }, |
| { |
| "epoch": 0.7292847122626352, |
| "grad_norm": 0.07899513840675354, |
| "learning_rate": 3.329383538532587e-05, |
| "loss": 8.6141, |
| "step": 46528, |
| "throughput": 12524.51234135451 |
| }, |
| { |
| "epoch": 0.7297862835915642, |
| "grad_norm": 0.07994463294744492, |
| "learning_rate": 3.323391663245188e-05, |
| "loss": 8.607, |
| "step": 46560, |
| "throughput": 12524.558991803693 |
| }, |
| { |
| "epoch": 0.7302878549204931, |
| "grad_norm": 0.08503729104995728, |
| "learning_rate": 3.3174541283296225e-05, |
| "loss": 8.6031, |
| "step": 46592, |
| "throughput": 12524.564242509407 |
| }, |
| { |
| "epoch": 0.730789426249422, |
| "grad_norm": 0.07991162687540054, |
| "learning_rate": 3.311570958272303e-05, |
| "loss": 8.5908, |
| "step": 46624, |
| "throughput": 12524.528518215346 |
| }, |
| { |
| "epoch": 0.7312909975783509, |
| "grad_norm": 0.10354507714509964, |
| "learning_rate": 3.305742177335444e-05, |
| "loss": 8.5972, |
| "step": 46656, |
| "throughput": 12524.490738426928 |
| }, |
| { |
| "epoch": 0.7317925689072798, |
| "grad_norm": 0.08643455803394318, |
| "learning_rate": 3.29996780955695e-05, |
| "loss": 8.5962, |
| "step": 46688, |
| "throughput": 12524.459445706503 |
| }, |
| { |
| "epoch": 0.7322941402362088, |
| "grad_norm": 0.08863314241170883, |
| "learning_rate": 3.294247878750333e-05, |
| "loss": 8.6137, |
| "step": 46720, |
| "throughput": 12524.56493807568 |
| }, |
| { |
| "epoch": 0.7327957115651377, |
| "grad_norm": 0.0950147733092308, |
| "learning_rate": 3.288582408504603e-05, |
| "loss": 8.5964, |
| "step": 46752, |
| "throughput": 12524.634684763916 |
| }, |
| { |
| "epoch": 0.7332972828940666, |
| "grad_norm": 0.08327952027320862, |
| "learning_rate": 3.2829714221841805e-05, |
| "loss": 8.6353, |
| "step": 46784, |
| "throughput": 12524.63104910303 |
| }, |
| { |
| "epoch": 0.7337988542229955, |
| "grad_norm": 0.0803689956665039, |
| "learning_rate": 3.2774149429287854e-05, |
| "loss": 8.6109, |
| "step": 46816, |
| "throughput": 12524.673779338787 |
| }, |
| { |
| "epoch": 0.7343004255519244, |
| "grad_norm": 0.09594424813985825, |
| "learning_rate": 3.271912993653357e-05, |
| "loss": 8.6212, |
| "step": 46848, |
| "throughput": 12524.734358163496 |
| }, |
| { |
| "epoch": 0.7348019968808533, |
| "grad_norm": 0.0844351202249527, |
| "learning_rate": 3.266465597047948e-05, |
| "loss": 8.6105, |
| "step": 46880, |
| "throughput": 12524.771159082025 |
| }, |
| { |
| "epoch": 0.7353035682097822, |
| "grad_norm": 0.08568018674850464, |
| "learning_rate": 3.261072775577641e-05, |
| "loss": 8.614, |
| "step": 46912, |
| "throughput": 12524.792275334812 |
| }, |
| { |
| "epoch": 0.7358051395387111, |
| "grad_norm": 0.0839168131351471, |
| "learning_rate": 3.255734551482446e-05, |
| "loss": 8.598, |
| "step": 46944, |
| "throughput": 12524.83965946456 |
| }, |
| { |
| "epoch": 0.73630671086764, |
| "grad_norm": 0.08393090963363647, |
| "learning_rate": 3.2504509467772154e-05, |
| "loss": 8.604, |
| "step": 46976, |
| "throughput": 12524.73058877774 |
| }, |
| { |
| "epoch": 0.7368082821965689, |
| "grad_norm": 0.08217764645814896, |
| "learning_rate": 3.24522198325155e-05, |
| "loss": 8.597, |
| "step": 47008, |
| "throughput": 12524.642241833213 |
| }, |
| { |
| "epoch": 0.7373098535254978, |
| "grad_norm": 0.0889383926987648, |
| "learning_rate": 3.2400476824697126e-05, |
| "loss": 8.5989, |
| "step": 47040, |
| "throughput": 12524.745536910536 |
| }, |
| { |
| "epoch": 0.7378114248544267, |
| "grad_norm": 0.09606282413005829, |
| "learning_rate": 3.234928065770532e-05, |
| "loss": 8.6268, |
| "step": 47072, |
| "throughput": 12524.822726977116 |
| }, |
| { |
| "epoch": 0.7383129961833557, |
| "grad_norm": 0.08575651049613953, |
| "learning_rate": 3.2298631542673254e-05, |
| "loss": 8.6118, |
| "step": 47104, |
| "throughput": 12524.841677926874 |
| }, |
| { |
| "epoch": 0.7388145675122846, |
| "grad_norm": 0.08529612421989441, |
| "learning_rate": 3.2248529688478036e-05, |
| "loss": 8.6369, |
| "step": 47136, |
| "throughput": 12524.46294525117 |
| }, |
| { |
| "epoch": 0.7393161388412135, |
| "grad_norm": 0.09575065970420837, |
| "learning_rate": 3.2198975301739834e-05, |
| "loss": 8.6034, |
| "step": 47168, |
| "throughput": 12524.506369958186 |
| }, |
| { |
| "epoch": 0.7398177101701424, |
| "grad_norm": 0.07998523861169815, |
| "learning_rate": 3.214996858682109e-05, |
| "loss": 8.6062, |
| "step": 47200, |
| "throughput": 12524.528555009028 |
| }, |
| { |
| "epoch": 0.7403192814990713, |
| "grad_norm": 0.08077394962310791, |
| "learning_rate": 3.210150974582565e-05, |
| "loss": 8.6244, |
| "step": 47232, |
| "throughput": 12524.578044882532 |
| }, |
| { |
| "epoch": 0.7408208528280003, |
| "grad_norm": 0.08935201168060303, |
| "learning_rate": 3.205359897859793e-05, |
| "loss": 8.6, |
| "step": 47264, |
| "throughput": 12524.58983487304 |
| }, |
| { |
| "epoch": 0.7413224241569292, |
| "grad_norm": 0.08008461445569992, |
| "learning_rate": 3.2006236482722034e-05, |
| "loss": 8.5788, |
| "step": 47296, |
| "throughput": 12524.508451575897 |
| }, |
| { |
| "epoch": 0.7418239954858581, |
| "grad_norm": 0.08044737577438354, |
| "learning_rate": 3.195942245352108e-05, |
| "loss": 8.6208, |
| "step": 47328, |
| "throughput": 12524.453576738908 |
| }, |
| { |
| "epoch": 0.7423255668147869, |
| "grad_norm": 0.08467201143503189, |
| "learning_rate": 3.191315708405626e-05, |
| "loss": 8.6039, |
| "step": 47360, |
| "throughput": 12524.503867721372 |
| }, |
| { |
| "epoch": 0.7428271381437158, |
| "grad_norm": 0.09131542593240738, |
| "learning_rate": 3.1867440565126066e-05, |
| "loss": 8.6309, |
| "step": 47392, |
| "throughput": 12524.608496881645 |
| }, |
| { |
| "epoch": 0.7433287094726447, |
| "grad_norm": 0.08028768748044968, |
| "learning_rate": 3.182227308526557e-05, |
| "loss": 8.5986, |
| "step": 47424, |
| "throughput": 12524.634275872028 |
| }, |
| { |
| "epoch": 0.7438302808015737, |
| "grad_norm": 0.07837190479040146, |
| "learning_rate": 3.17776548307456e-05, |
| "loss": 8.6226, |
| "step": 47456, |
| "throughput": 12524.676327812673 |
| }, |
| { |
| "epoch": 0.7443318521305026, |
| "grad_norm": 0.08891215920448303, |
| "learning_rate": 3.173358598557196e-05, |
| "loss": 8.6002, |
| "step": 47488, |
| "throughput": 12524.69703560755 |
| }, |
| { |
| "epoch": 0.7448334234594315, |
| "grad_norm": 0.08777833729982376, |
| "learning_rate": 3.169006673148473e-05, |
| "loss": 8.5901, |
| "step": 47520, |
| "throughput": 12524.737627397319 |
| }, |
| { |
| "epoch": 0.7453349947883604, |
| "grad_norm": 0.08668994158506393, |
| "learning_rate": 3.1647097247957385e-05, |
| "loss": 8.6023, |
| "step": 47552, |
| "throughput": 12524.751668629939 |
| }, |
| { |
| "epoch": 0.7458365661172893, |
| "grad_norm": 0.09065524488687515, |
| "learning_rate": 3.160467771219624e-05, |
| "loss": 8.6088, |
| "step": 47584, |
| "throughput": 12524.784184177812 |
| }, |
| { |
| "epoch": 0.7463381374462182, |
| "grad_norm": 0.09433916956186295, |
| "learning_rate": 3.1562808299139596e-05, |
| "loss": 8.6203, |
| "step": 47616, |
| "throughput": 12524.793844792443 |
| }, |
| { |
| "epoch": 0.7468397087751472, |
| "grad_norm": 0.0789889246225357, |
| "learning_rate": 3.1521489181457005e-05, |
| "loss": 8.6115, |
| "step": 47648, |
| "throughput": 12524.728785991274 |
| }, |
| { |
| "epoch": 0.7473412801040761, |
| "grad_norm": 0.08276744186878204, |
| "learning_rate": 3.1480720529548654e-05, |
| "loss": 8.6034, |
| "step": 47680, |
| "throughput": 12524.688017450091 |
| }, |
| { |
| "epoch": 0.747842851433005, |
| "grad_norm": 0.07992296665906906, |
| "learning_rate": 3.1440502511544566e-05, |
| "loss": 8.6031, |
| "step": 47712, |
| "throughput": 12524.793787131397 |
| }, |
| { |
| "epoch": 0.7483444227619339, |
| "grad_norm": 0.08115531504154205, |
| "learning_rate": 3.1400835293303984e-05, |
| "loss": 8.6163, |
| "step": 47744, |
| "throughput": 12524.866111468855 |
| }, |
| { |
| "epoch": 0.7488459940908628, |
| "grad_norm": 0.08725763857364655, |
| "learning_rate": 3.136171903841463e-05, |
| "loss": 8.6209, |
| "step": 47776, |
| "throughput": 12524.861843279761 |
| }, |
| { |
| "epoch": 0.7493475654197916, |
| "grad_norm": 0.08313852548599243, |
| "learning_rate": 3.1323153908192057e-05, |
| "loss": 8.6085, |
| "step": 47808, |
| "throughput": 12524.903077268247 |
| }, |
| { |
| "epoch": 0.7498491367487206, |
| "grad_norm": 0.0964965894818306, |
| "learning_rate": 3.128514006167897e-05, |
| "loss": 8.6233, |
| "step": 47840, |
| "throughput": 12524.910717496554 |
| }, |
| { |
| "epoch": 0.7503507080776495, |
| "grad_norm": 0.08035736531019211, |
| "learning_rate": 3.124767765564459e-05, |
| "loss": 8.6016, |
| "step": 47872, |
| "throughput": 12524.97226222231 |
| }, |
| { |
| "epoch": 0.7508522794065784, |
| "grad_norm": 0.07964587211608887, |
| "learning_rate": 3.121076684458398e-05, |
| "loss": 8.6102, |
| "step": 47904, |
| "throughput": 12525.004078127577 |
| }, |
| { |
| "epoch": 0.7513538507355073, |
| "grad_norm": 0.08621484786272049, |
| "learning_rate": 3.1174407780717433e-05, |
| "loss": 8.6079, |
| "step": 47936, |
| "throughput": 12525.004016543815 |
| }, |
| { |
| "epoch": 0.7518554220644362, |
| "grad_norm": 0.08485983312129974, |
| "learning_rate": 3.113860061398985e-05, |
| "loss": 8.5892, |
| "step": 47968, |
| "throughput": 12524.956430873583 |
| }, |
| { |
| "epoch": 0.7523569933933651, |
| "grad_norm": 0.09468486905097961, |
| "learning_rate": 3.110334549207009e-05, |
| "loss": 8.6019, |
| "step": 48000, |
| "throughput": 12524.856626194747 |
| }, |
| { |
| "epoch": 0.7528585647222941, |
| "grad_norm": 0.09153393656015396, |
| "learning_rate": 3.1068642560350375e-05, |
| "loss": 8.588, |
| "step": 48032, |
| "throughput": 12524.957839340766 |
| }, |
| { |
| "epoch": 0.753360136051223, |
| "grad_norm": 0.08684483170509338, |
| "learning_rate": 3.103449196194569e-05, |
| "loss": 8.6055, |
| "step": 48064, |
| "throughput": 12525.02156365344 |
| }, |
| { |
| "epoch": 0.7538617073801519, |
| "grad_norm": 0.08171502500772476, |
| "learning_rate": 3.1000893837693234e-05, |
| "loss": 8.6261, |
| "step": 48096, |
| "throughput": 12525.017951077596 |
| }, |
| { |
| "epoch": 0.7543632787090808, |
| "grad_norm": 0.08191504329442978, |
| "learning_rate": 3.096784832615175e-05, |
| "loss": 8.5741, |
| "step": 48128, |
| "throughput": 12525.075499919187 |
| }, |
| { |
| "epoch": 0.7548648500380097, |
| "grad_norm": 0.08089284598827362, |
| "learning_rate": 3.093535556360101e-05, |
| "loss": 8.6271, |
| "step": 48160, |
| "throughput": 12525.120487941642 |
| }, |
| { |
| "epoch": 0.7553664213669387, |
| "grad_norm": 0.08522171527147293, |
| "learning_rate": 3.0903415684041285e-05, |
| "loss": 8.6076, |
| "step": 48192, |
| "throughput": 12525.145324905377 |
| }, |
| { |
| "epoch": 0.7558679926958676, |
| "grad_norm": 0.08962146192789078, |
| "learning_rate": 3.087202881919273e-05, |
| "loss": 8.6072, |
| "step": 48224, |
| "throughput": 12525.163169395968 |
| }, |
| { |
| "epoch": 0.7563695640247964, |
| "grad_norm": 0.08281126618385315, |
| "learning_rate": 3.084119509849488e-05, |
| "loss": 8.6067, |
| "step": 48256, |
| "throughput": 12525.192516711173 |
| }, |
| { |
| "epoch": 0.7568711353537253, |
| "grad_norm": 0.08468001335859299, |
| "learning_rate": 3.081091464910606e-05, |
| "loss": 8.6145, |
| "step": 48288, |
| "throughput": 12525.122473990757 |
| }, |
| { |
| "epoch": 0.7573727066826542, |
| "grad_norm": 0.08133754879236221, |
| "learning_rate": 3.078118759590295e-05, |
| "loss": 8.6025, |
| "step": 48320, |
| "throughput": 12525.068061495733 |
| }, |
| { |
| "epoch": 0.7578742780115831, |
| "grad_norm": 0.08273835480213165, |
| "learning_rate": 3.075201406148001e-05, |
| "loss": 8.5968, |
| "step": 48352, |
| "throughput": 12525.133415826404 |
| }, |
| { |
| "epoch": 0.758375849340512, |
| "grad_norm": 0.0818416029214859, |
| "learning_rate": 3.072339416614899e-05, |
| "loss": 8.6016, |
| "step": 48384, |
| "throughput": 12525.235286059295 |
| }, |
| { |
| "epoch": 0.758877420669441, |
| "grad_norm": 0.09383910149335861, |
| "learning_rate": 3.069532802793839e-05, |
| "loss": 8.5897, |
| "step": 48416, |
| "throughput": 12525.264290396648 |
| }, |
| { |
| "epoch": 0.7593789919983699, |
| "grad_norm": 0.09501391649246216, |
| "learning_rate": 3.066781576259309e-05, |
| "loss": 8.6211, |
| "step": 48448, |
| "throughput": 12525.27034147465 |
| }, |
| { |
| "epoch": 0.7598805633272988, |
| "grad_norm": 0.08860146254301071, |
| "learning_rate": 3.0640857483573714e-05, |
| "loss": 8.6066, |
| "step": 48480, |
| "throughput": 12525.306394704627 |
| }, |
| { |
| "epoch": 0.7603821346562277, |
| "grad_norm": 0.08503951877355576, |
| "learning_rate": 3.061445330205631e-05, |
| "loss": 8.5952, |
| "step": 48512, |
| "throughput": 12525.334264765412 |
| }, |
| { |
| "epoch": 0.7608837059851566, |
| "grad_norm": 0.08074294030666351, |
| "learning_rate": 3.0588603326931796e-05, |
| "loss": 8.6088, |
| "step": 48544, |
| "throughput": 12525.34869462047 |
| }, |
| { |
| "epoch": 0.7613852773140856, |
| "grad_norm": 0.08348783105611801, |
| "learning_rate": 3.056330766480554e-05, |
| "loss": 8.5983, |
| "step": 48576, |
| "throughput": 12525.405007836334 |
| }, |
| { |
| "epoch": 0.7618868486430145, |
| "grad_norm": 0.08301907032728195, |
| "learning_rate": 3.053856641999694e-05, |
| "loss": 8.5994, |
| "step": 48608, |
| "throughput": 12525.346566915949 |
| }, |
| { |
| "epoch": 0.7623884199719434, |
| "grad_norm": 0.08006524294614792, |
| "learning_rate": 3.0514379694538932e-05, |
| "loss": 8.5948, |
| "step": 48640, |
| "throughput": 12525.320557107438 |
| }, |
| { |
| "epoch": 0.7628899913008723, |
| "grad_norm": 0.08431556075811386, |
| "learning_rate": 3.0490747588177684e-05, |
| "loss": 8.6174, |
| "step": 48672, |
| "throughput": 12525.308854529463 |
| }, |
| { |
| "epoch": 0.7633915626298011, |
| "grad_norm": 0.08287610858678818, |
| "learning_rate": 3.0467670198372044e-05, |
| "loss": 8.6072, |
| "step": 48704, |
| "throughput": 12525.408934162428 |
| }, |
| { |
| "epoch": 0.76389313395873, |
| "grad_norm": 0.16203758120536804, |
| "learning_rate": 3.044514762029326e-05, |
| "loss": 8.5923, |
| "step": 48736, |
| "throughput": 12525.47215639991 |
| }, |
| { |
| "epoch": 0.764394705287659, |
| "grad_norm": 0.08479800820350647, |
| "learning_rate": 3.0423179946824494e-05, |
| "loss": 8.6132, |
| "step": 48768, |
| "throughput": 12525.474404700622 |
| }, |
| { |
| "epoch": 0.7648962766165879, |
| "grad_norm": 0.08963710814714432, |
| "learning_rate": 3.040176726856049e-05, |
| "loss": 8.6053, |
| "step": 48800, |
| "throughput": 12525.493884954105 |
| }, |
| { |
| "epoch": 0.7653978479455168, |
| "grad_norm": 0.09195411205291748, |
| "learning_rate": 3.0380909673807205e-05, |
| "loss": 8.5854, |
| "step": 48832, |
| "throughput": 12525.534557200805 |
| }, |
| { |
| "epoch": 0.7658994192744457, |
| "grad_norm": 0.08387543261051178, |
| "learning_rate": 3.0360607248581437e-05, |
| "loss": 8.6059, |
| "step": 48864, |
| "throughput": 12525.53278414358 |
| }, |
| { |
| "epoch": 0.7664009906033746, |
| "grad_norm": 0.08522969484329224, |
| "learning_rate": 3.0340860076610427e-05, |
| "loss": 8.5936, |
| "step": 48896, |
| "throughput": 12525.592779349634 |
| }, |
| { |
| "epoch": 0.7669025619323036, |
| "grad_norm": 0.08993417024612427, |
| "learning_rate": 3.0321668239331582e-05, |
| "loss": 8.6044, |
| "step": 48928, |
| "throughput": 12525.60480003467 |
| }, |
| { |
| "epoch": 0.7674041332612325, |
| "grad_norm": 0.08827044814825058, |
| "learning_rate": 3.030303181589207e-05, |
| "loss": 8.5837, |
| "step": 48960, |
| "throughput": 12525.509729326744 |
| }, |
| { |
| "epoch": 0.7679057045901614, |
| "grad_norm": 0.08753187954425812, |
| "learning_rate": 3.0284950883148598e-05, |
| "loss": 8.5909, |
| "step": 48992, |
| "throughput": 12525.453731483165 |
| }, |
| { |
| "epoch": 0.7684072759190903, |
| "grad_norm": 0.0935630276799202, |
| "learning_rate": 3.026742551566696e-05, |
| "loss": 8.5953, |
| "step": 49024, |
| "throughput": 12525.55475574127 |
| }, |
| { |
| "epoch": 0.7689088472480192, |
| "grad_norm": 0.09898614138364792, |
| "learning_rate": 3.0250455785721827e-05, |
| "loss": 8.6012, |
| "step": 49056, |
| "throughput": 12525.620507141155 |
| }, |
| { |
| "epoch": 0.7694104185769481, |
| "grad_norm": 0.08655832707881927, |
| "learning_rate": 3.023404176329643e-05, |
| "loss": 8.6057, |
| "step": 49088, |
| "throughput": 12525.62817059881 |
| }, |
| { |
| "epoch": 0.7699119899058771, |
| "grad_norm": 0.07959497720003128, |
| "learning_rate": 3.021818351608223e-05, |
| "loss": 8.5975, |
| "step": 49120, |
| "throughput": 12525.676969174108 |
| }, |
| { |
| "epoch": 0.7704135612348059, |
| "grad_norm": 0.09171836823225021, |
| "learning_rate": 3.0202881109478676e-05, |
| "loss": 8.6115, |
| "step": 49152, |
| "throughput": 12525.69393438 |
| }, |
| { |
| "epoch": 0.7709151325637348, |
| "grad_norm": 0.07986843585968018, |
| "learning_rate": 3.0188134606592958e-05, |
| "loss": 8.6097, |
| "step": 49184, |
| "throughput": 12525.331320803678 |
| }, |
| { |
| "epoch": 0.7714167038926637, |
| "grad_norm": 0.0822770819067955, |
| "learning_rate": 3.017394406823969e-05, |
| "loss": 8.5995, |
| "step": 49216, |
| "throughput": 12525.360102708491 |
| }, |
| { |
| "epoch": 0.7719182752215926, |
| "grad_norm": 0.08194506913423538, |
| "learning_rate": 3.0160309552940704e-05, |
| "loss": 8.6245, |
| "step": 49248, |
| "throughput": 12525.395683174307 |
| }, |
| { |
| "epoch": 0.7724198465505215, |
| "grad_norm": 0.08263807743787766, |
| "learning_rate": 3.014723111692476e-05, |
| "loss": 8.6017, |
| "step": 49280, |
| "throughput": 12525.34133444018 |
| }, |
| { |
| "epoch": 0.7729214178794505, |
| "grad_norm": 0.08559015393257141, |
| "learning_rate": 3.013470881412739e-05, |
| "loss": 8.5813, |
| "step": 49312, |
| "throughput": 12525.303717545508 |
| }, |
| { |
| "epoch": 0.7734229892083794, |
| "grad_norm": 0.08753157407045364, |
| "learning_rate": 3.0122742696190606e-05, |
| "loss": 8.6161, |
| "step": 49344, |
| "throughput": 12525.361815283388 |
| }, |
| { |
| "epoch": 0.7739245605373083, |
| "grad_norm": 0.0791105255484581, |
| "learning_rate": 3.0111332812462692e-05, |
| "loss": 8.6013, |
| "step": 49376, |
| "throughput": 12525.459584093609 |
| }, |
| { |
| "epoch": 0.7744261318662372, |
| "grad_norm": 0.08456587046384811, |
| "learning_rate": 3.0100479209998055e-05, |
| "loss": 8.5916, |
| "step": 49408, |
| "throughput": 12525.448924109218 |
| }, |
| { |
| "epoch": 0.7749277031951661, |
| "grad_norm": 0.09241674095392227, |
| "learning_rate": 3.0090181933556994e-05, |
| "loss": 8.6064, |
| "step": 49440, |
| "throughput": 12525.465303008194 |
| }, |
| { |
| "epoch": 0.775429274524095, |
| "grad_norm": 0.09265810251235962, |
| "learning_rate": 3.0080441025605494e-05, |
| "loss": 8.5875, |
| "step": 49472, |
| "throughput": 12525.49486172476 |
| }, |
| { |
| "epoch": 0.775930845853024, |
| "grad_norm": 0.0823616310954094, |
| "learning_rate": 3.007125652631508e-05, |
| "loss": 8.5854, |
| "step": 49504, |
| "throughput": 12525.532190424396 |
| }, |
| { |
| "epoch": 0.7764324171819529, |
| "grad_norm": 0.08635231107473373, |
| "learning_rate": 3.006262847356269e-05, |
| "loss": 8.5906, |
| "step": 49536, |
| "throughput": 12525.569918707946 |
| }, |
| { |
| "epoch": 0.7769339885108818, |
| "grad_norm": 0.09322790056467056, |
| "learning_rate": 3.0054556902930394e-05, |
| "loss": 8.6039, |
| "step": 49568, |
| "throughput": 12525.62596869029 |
| }, |
| { |
| "epoch": 0.7774355598398106, |
| "grad_norm": 0.08192238211631775, |
| "learning_rate": 3.0047041847705404e-05, |
| "loss": 8.6082, |
| "step": 49600, |
| "throughput": 12525.607799600375 |
| }, |
| { |
| "epoch": 0.7779371311687395, |
| "grad_norm": 0.08833806216716766, |
| "learning_rate": 3.0040083338879834e-05, |
| "loss": 8.58, |
| "step": 49632, |
| "throughput": 12525.533183024592 |
| }, |
| { |
| "epoch": 0.7784387024976684, |
| "grad_norm": 0.0832701176404953, |
| "learning_rate": 3.0033681405150554e-05, |
| "loss": 8.6138, |
| "step": 49664, |
| "throughput": 12525.52730959789 |
| }, |
| { |
| "epoch": 0.7789402738265974, |
| "grad_norm": 0.09455437958240509, |
| "learning_rate": 3.0027836072919202e-05, |
| "loss": 8.5852, |
| "step": 49696, |
| "throughput": 12525.62178306482 |
| }, |
| { |
| "epoch": 0.7794418451555263, |
| "grad_norm": 0.10499484091997147, |
| "learning_rate": 3.002254736629194e-05, |
| "loss": 8.6143, |
| "step": 49728, |
| "throughput": 12525.680177305796 |
| }, |
| { |
| "epoch": 0.7799434164844552, |
| "grad_norm": 0.08849858492612839, |
| "learning_rate": 3.001781530707938e-05, |
| "loss": 8.585, |
| "step": 49760, |
| "throughput": 12525.640270537673 |
| }, |
| { |
| "epoch": 0.7804449878133841, |
| "grad_norm": 0.08110593259334564, |
| "learning_rate": 3.0013639914796586e-05, |
| "loss": 8.6013, |
| "step": 49792, |
| "throughput": 12525.696330785242 |
| }, |
| { |
| "epoch": 0.780946559142313, |
| "grad_norm": 0.08891758322715759, |
| "learning_rate": 3.001002120666285e-05, |
| "loss": 8.5945, |
| "step": 49824, |
| "throughput": 12525.695608125934 |
| }, |
| { |
| "epoch": 0.781448130471242, |
| "grad_norm": 0.10169275850057602, |
| "learning_rate": 3.0006959197601765e-05, |
| "loss": 8.6049, |
| "step": 49856, |
| "throughput": 12525.73024644998 |
| }, |
| { |
| "epoch": 0.7819497018001709, |
| "grad_norm": 0.08562269061803818, |
| "learning_rate": 3.000445390024106e-05, |
| "loss": 8.5981, |
| "step": 49888, |
| "throughput": 12525.791092855176 |
| }, |
| { |
| "epoch": 0.7824512731290998, |
| "grad_norm": 0.08182035386562347, |
| "learning_rate": 3.0002505324912582e-05, |
| "loss": 8.586, |
| "step": 49920, |
| "throughput": 12525.789284073133 |
| }, |
| { |
| "epoch": 0.7829528444580287, |
| "grad_norm": 0.08575500547885895, |
| "learning_rate": 3.0001113479652246e-05, |
| "loss": 8.596, |
| "step": 49952, |
| "throughput": 12525.74085708137 |
| }, |
| { |
| "epoch": 0.7834544157869576, |
| "grad_norm": 0.09045220166444778, |
| "learning_rate": 3.0000278370200057e-05, |
| "loss": 8.6039, |
| "step": 49984, |
| "throughput": 12525.726369494236 |
| }, |
| { |
| "epoch": 0.7839559871158865, |
| "grad_norm": 0.09492523223161697, |
| "learning_rate": 2.9999999999999997e-05, |
| "loss": 8.5925, |
| "step": 50016, |
| "throughput": 12525.78270587197 |
| } |
| ], |
| "logging_steps": 32, |
| "max_steps": 50016, |
| "num_input_tokens_seen": 104891154432, |
| "num_train_epochs": 1, |
| "save_steps": 2048, |
| "stateful_callbacks": { |
| "LogCallback": { |
| "elapsed_time": 261688.12384581566, |
| "start_time": 1766740210.8614042 |
| }, |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 8.619164947133655e+20, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|