| { |
| "best_metric": 0.009820309467613697, |
| "best_model_checkpoint": "/workspace/previous_works/RadFM/output/RadFM-Llama3-8B-pretrain-0002-embed_tokens-depth32-lora-10ep/checkpoint-10000", |
| "epoch": 2.0951183741881416, |
| "eval_steps": 10000, |
| "global_step": 10000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0031426775612822125, |
| "grad_norm": 38.333740234375, |
| "learning_rate": 3.488372093023256e-06, |
| "loss": 2.6324, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.006285355122564425, |
| "grad_norm": 23.8914794921875, |
| "learning_rate": 6.976744186046512e-06, |
| "loss": 2.3565, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.009428032683846637, |
| "grad_norm": 6.890503883361816, |
| "learning_rate": 1.0465116279069768e-05, |
| "loss": 1.8897, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.01257071024512885, |
| "grad_norm": 3.9464468955993652, |
| "learning_rate": 1.3953488372093024e-05, |
| "loss": 1.3707, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.01571338780641106, |
| "grad_norm": 4.443431854248047, |
| "learning_rate": 1.744186046511628e-05, |
| "loss": 1.055, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.018856065367693273, |
| "grad_norm": 3.5747361183166504, |
| "learning_rate": 2.0930232558139536e-05, |
| "loss": 0.9048, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.02199874292897549, |
| "grad_norm": 4.540731430053711, |
| "learning_rate": 2.441860465116279e-05, |
| "loss": 0.9143, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.0251414204902577, |
| "grad_norm": 4.121450424194336, |
| "learning_rate": 2.7906976744186048e-05, |
| "loss": 0.7641, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.028284098051539912, |
| "grad_norm": 3.1179299354553223, |
| "learning_rate": 3.13953488372093e-05, |
| "loss": 0.7784, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.03142677561282212, |
| "grad_norm": 2.9703869819641113, |
| "learning_rate": 3.488372093023256e-05, |
| "loss": 0.7299, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.034569453174104335, |
| "grad_norm": 2.706854820251465, |
| "learning_rate": 3.837209302325582e-05, |
| "loss": 0.6778, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.03771213073538655, |
| "grad_norm": 3.361267328262329, |
| "learning_rate": 4.186046511627907e-05, |
| "loss": 0.7222, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.04085480829666876, |
| "grad_norm": 4.040229797363281, |
| "learning_rate": 4.5348837209302326e-05, |
| "loss": 0.6684, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.04399748585795098, |
| "grad_norm": 2.817627429962158, |
| "learning_rate": 4.883720930232558e-05, |
| "loss": 0.7458, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.04714016341923319, |
| "grad_norm": 2.8800182342529297, |
| "learning_rate": 5.232558139534884e-05, |
| "loss": 0.6338, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.0502828409805154, |
| "grad_norm": 2.436993360519409, |
| "learning_rate": 5.5813953488372095e-05, |
| "loss": 0.6299, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.05342551854179761, |
| "grad_norm": 3.5814456939697266, |
| "learning_rate": 5.9302325581395356e-05, |
| "loss": 0.5728, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.056568196103079824, |
| "grad_norm": 2.8744938373565674, |
| "learning_rate": 6.27906976744186e-05, |
| "loss": 0.59, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.059710873664362035, |
| "grad_norm": 2.679749011993408, |
| "learning_rate": 6.627906976744186e-05, |
| "loss": 0.6016, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.06285355122564425, |
| "grad_norm": 3.1333463191986084, |
| "learning_rate": 6.976744186046513e-05, |
| "loss": 0.6569, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.06599622878692646, |
| "grad_norm": 2.2865939140319824, |
| "learning_rate": 7.325581395348837e-05, |
| "loss": 0.6385, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.06913890634820867, |
| "grad_norm": 2.9787251949310303, |
| "learning_rate": 7.674418604651163e-05, |
| "loss": 0.6307, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.07228158390949088, |
| "grad_norm": 2.078509569168091, |
| "learning_rate": 8.023255813953489e-05, |
| "loss": 0.5454, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.0754242614707731, |
| "grad_norm": 2.6606740951538086, |
| "learning_rate": 8.372093023255814e-05, |
| "loss": 0.6211, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.0785669390320553, |
| "grad_norm": 1.9346429109573364, |
| "learning_rate": 8.72093023255814e-05, |
| "loss": 0.5954, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.08170961659333752, |
| "grad_norm": 2.2432360649108887, |
| "learning_rate": 9.069767441860465e-05, |
| "loss": 0.5385, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.08485229415461974, |
| "grad_norm": 2.1645498275756836, |
| "learning_rate": 9.418604651162792e-05, |
| "loss": 0.592, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.08799497171590195, |
| "grad_norm": 2.1806533336639404, |
| "learning_rate": 9.767441860465116e-05, |
| "loss": 0.5372, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.09113764927718417, |
| "grad_norm": 2.445610761642456, |
| "learning_rate": 9.999996802299678e-05, |
| "loss": 0.6487, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.09428032683846638, |
| "grad_norm": 2.3592734336853027, |
| "learning_rate": 9.999948836876656e-05, |
| "loss": 0.5957, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.09742300439974859, |
| "grad_norm": 2.3027069568634033, |
| "learning_rate": 9.999843313485898e-05, |
| "loss": 0.5835, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.1005656819610308, |
| "grad_norm": 2.6429057121276855, |
| "learning_rate": 9.999680233342161e-05, |
| "loss": 0.592, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.10370835952231301, |
| "grad_norm": 2.0832202434539795, |
| "learning_rate": 9.999459598322778e-05, |
| "loss": 0.6203, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.10685103708359522, |
| "grad_norm": 2.481870412826538, |
| "learning_rate": 9.999181410967633e-05, |
| "loss": 0.5428, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.10999371464487744, |
| "grad_norm": 1.9621151685714722, |
| "learning_rate": 9.99884567447914e-05, |
| "loss": 0.6101, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.11313639220615965, |
| "grad_norm": 2.8833186626434326, |
| "learning_rate": 9.998452392722198e-05, |
| "loss": 0.5577, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.11627906976744186, |
| "grad_norm": 2.4447429180145264, |
| "learning_rate": 9.998001570224158e-05, |
| "loss": 0.566, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.11942174732872407, |
| "grad_norm": 2.141496419906616, |
| "learning_rate": 9.997493212174753e-05, |
| "loss": 0.6211, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.12256442489000628, |
| "grad_norm": 2.389796495437622, |
| "learning_rate": 9.996927324426057e-05, |
| "loss": 0.5937, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.1257071024512885, |
| "grad_norm": 2.1194262504577637, |
| "learning_rate": 9.996303913492408e-05, |
| "loss": 0.5847, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.12884978001257072, |
| "grad_norm": 1.7767274379730225, |
| "learning_rate": 9.99562298655033e-05, |
| "loss": 0.518, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.13199245757385292, |
| "grad_norm": 2.0348453521728516, |
| "learning_rate": 9.994884551438458e-05, |
| "loss": 0.5941, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.13513513513513514, |
| "grad_norm": 1.443819284439087, |
| "learning_rate": 9.994088616657444e-05, |
| "loss": 0.5022, |
| "step": 645 |
| }, |
| { |
| "epoch": 0.13827781269641734, |
| "grad_norm": 2.1748251914978027, |
| "learning_rate": 9.993235191369861e-05, |
| "loss": 0.5369, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.14142049025769957, |
| "grad_norm": 1.9295774698257446, |
| "learning_rate": 9.99232428540009e-05, |
| "loss": 0.607, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.14456316781898176, |
| "grad_norm": 1.7530088424682617, |
| "learning_rate": 9.991355909234224e-05, |
| "loss": 0.5417, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.147705845380264, |
| "grad_norm": 10.02226448059082, |
| "learning_rate": 9.990330074019925e-05, |
| "loss": 0.5901, |
| "step": 705 |
| }, |
| { |
| "epoch": 0.1508485229415462, |
| "grad_norm": 1.3864644765853882, |
| "learning_rate": 9.989246791566314e-05, |
| "loss": 0.678, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.1539912005028284, |
| "grad_norm": 1.6103929281234741, |
| "learning_rate": 9.988106074343823e-05, |
| "loss": 0.4741, |
| "step": 735 |
| }, |
| { |
| "epoch": 0.1571338780641106, |
| "grad_norm": 1.5933347940444946, |
| "learning_rate": 9.986907935484064e-05, |
| "loss": 0.5391, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.16027655562539284, |
| "grad_norm": 1.5971338748931885, |
| "learning_rate": 9.985652388779663e-05, |
| "loss": 0.5782, |
| "step": 765 |
| }, |
| { |
| "epoch": 0.16341923318667503, |
| "grad_norm": 1.559793472290039, |
| "learning_rate": 9.984339448684113e-05, |
| "loss": 0.5227, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.16656191074795726, |
| "grad_norm": 1.3077164888381958, |
| "learning_rate": 9.982969130311597e-05, |
| "loss": 0.5203, |
| "step": 795 |
| }, |
| { |
| "epoch": 0.16970458830923948, |
| "grad_norm": 1.6828336715698242, |
| "learning_rate": 9.98154144943683e-05, |
| "loss": 0.5471, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.17284726587052168, |
| "grad_norm": 1.387099266052246, |
| "learning_rate": 9.98005642249486e-05, |
| "loss": 0.5399, |
| "step": 825 |
| }, |
| { |
| "epoch": 0.1759899434318039, |
| "grad_norm": 1.723253607749939, |
| "learning_rate": 9.978514066580886e-05, |
| "loss": 0.5606, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.1791326209930861, |
| "grad_norm": 1.22931706905365, |
| "learning_rate": 9.976914399450068e-05, |
| "loss": 0.5024, |
| "step": 855 |
| }, |
| { |
| "epoch": 0.18227529855436833, |
| "grad_norm": 1.4278538227081299, |
| "learning_rate": 9.97525743951731e-05, |
| "loss": 0.5983, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.18541797611565053, |
| "grad_norm": 1.4029372930526733, |
| "learning_rate": 9.973543205857057e-05, |
| "loss": 0.5699, |
| "step": 885 |
| }, |
| { |
| "epoch": 0.18856065367693275, |
| "grad_norm": 1.3018133640289307, |
| "learning_rate": 9.971771718203072e-05, |
| "loss": 0.4936, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.19170333123821495, |
| "grad_norm": 1.3082265853881836, |
| "learning_rate": 9.969942996948209e-05, |
| "loss": 0.5025, |
| "step": 915 |
| }, |
| { |
| "epoch": 0.19484600879949718, |
| "grad_norm": 1.2923167943954468, |
| "learning_rate": 9.968057063144182e-05, |
| "loss": 0.5779, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.19798868636077938, |
| "grad_norm": 1.2902971506118774, |
| "learning_rate": 9.966113938501313e-05, |
| "loss": 0.5373, |
| "step": 945 |
| }, |
| { |
| "epoch": 0.2011313639220616, |
| "grad_norm": 1.391560673713684, |
| "learning_rate": 9.964113645388293e-05, |
| "loss": 0.5858, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.2042740414833438, |
| "grad_norm": 1.3245513439178467, |
| "learning_rate": 9.96205620683192e-05, |
| "loss": 0.6043, |
| "step": 975 |
| }, |
| { |
| "epoch": 0.20741671904462602, |
| "grad_norm": 1.4998241662979126, |
| "learning_rate": 9.95994164651683e-05, |
| "loss": 0.5785, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.21055939660590822, |
| "grad_norm": 1.090804934501648, |
| "learning_rate": 9.957769988785236e-05, |
| "loss": 0.6439, |
| "step": 1005 |
| }, |
| { |
| "epoch": 0.21370207416719045, |
| "grad_norm": 1.1564654111862183, |
| "learning_rate": 9.955541258636631e-05, |
| "loss": 0.5091, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.21684475172847265, |
| "grad_norm": 1.1778066158294678, |
| "learning_rate": 9.953255481727513e-05, |
| "loss": 0.5456, |
| "step": 1035 |
| }, |
| { |
| "epoch": 0.21998742928975487, |
| "grad_norm": 1.3568626642227173, |
| "learning_rate": 9.950912684371088e-05, |
| "loss": 0.5208, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.2231301068510371, |
| "grad_norm": 1.804425597190857, |
| "learning_rate": 9.948512893536961e-05, |
| "loss": 0.4956, |
| "step": 1065 |
| }, |
| { |
| "epoch": 0.2262727844123193, |
| "grad_norm": 1.226159930229187, |
| "learning_rate": 9.946056136850833e-05, |
| "loss": 0.5812, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.22941546197360152, |
| "grad_norm": 1.1530790328979492, |
| "learning_rate": 9.943542442594177e-05, |
| "loss": 0.4742, |
| "step": 1095 |
| }, |
| { |
| "epoch": 0.23255813953488372, |
| "grad_norm": 1.390417218208313, |
| "learning_rate": 9.940971839703916e-05, |
| "loss": 0.619, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.23570081709616594, |
| "grad_norm": 1.4010789394378662, |
| "learning_rate": 9.938344357772087e-05, |
| "loss": 0.6086, |
| "step": 1125 |
| }, |
| { |
| "epoch": 0.23884349465744814, |
| "grad_norm": 1.6488044261932373, |
| "learning_rate": 9.935660027045506e-05, |
| "loss": 0.551, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.24198617221873037, |
| "grad_norm": 1.0560044050216675, |
| "learning_rate": 9.932918878425412e-05, |
| "loss": 0.532, |
| "step": 1155 |
| }, |
| { |
| "epoch": 0.24512884978001256, |
| "grad_norm": 1.0651888847351074, |
| "learning_rate": 9.930120943467117e-05, |
| "loss": 0.5012, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.2482715273412948, |
| "grad_norm": 1.0553079843521118, |
| "learning_rate": 9.927266254379642e-05, |
| "loss": 0.5576, |
| "step": 1185 |
| }, |
| { |
| "epoch": 0.251414204902577, |
| "grad_norm": 1.007480263710022, |
| "learning_rate": 9.924354844025339e-05, |
| "loss": 0.4839, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.2545568824638592, |
| "grad_norm": 1.0924334526062012, |
| "learning_rate": 9.921386745919528e-05, |
| "loss": 0.595, |
| "step": 1215 |
| }, |
| { |
| "epoch": 0.25769956002514144, |
| "grad_norm": 1.3309390544891357, |
| "learning_rate": 9.918361994230097e-05, |
| "loss": 0.5224, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.2608422375864236, |
| "grad_norm": 0.9702763557434082, |
| "learning_rate": 9.915280623777114e-05, |
| "loss": 0.4871, |
| "step": 1245 |
| }, |
| { |
| "epoch": 0.26398491514770583, |
| "grad_norm": 1.0511876344680786, |
| "learning_rate": 9.912142670032427e-05, |
| "loss": 0.5861, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.26712759270898806, |
| "grad_norm": 1.396050214767456, |
| "learning_rate": 9.908948169119251e-05, |
| "loss": 0.4651, |
| "step": 1275 |
| }, |
| { |
| "epoch": 0.2702702702702703, |
| "grad_norm": 0.985396683216095, |
| "learning_rate": 9.905697157811761e-05, |
| "loss": 0.4302, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.27341294783155246, |
| "grad_norm": 0.9169828295707703, |
| "learning_rate": 9.902389673534659e-05, |
| "loss": 0.5212, |
| "step": 1305 |
| }, |
| { |
| "epoch": 0.2765556253928347, |
| "grad_norm": 0.9107710123062134, |
| "learning_rate": 9.899025754362751e-05, |
| "loss": 0.4941, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.2796983029541169, |
| "grad_norm": 0.9720286726951599, |
| "learning_rate": 9.8956054390205e-05, |
| "loss": 0.5169, |
| "step": 1335 |
| }, |
| { |
| "epoch": 0.28284098051539913, |
| "grad_norm": 1.1490366458892822, |
| "learning_rate": 9.892128766881596e-05, |
| "loss": 0.4973, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.28598365807668136, |
| "grad_norm": 1.2628952264785767, |
| "learning_rate": 9.888595777968479e-05, |
| "loss": 0.5194, |
| "step": 1365 |
| }, |
| { |
| "epoch": 0.2891263356379635, |
| "grad_norm": 1.1610651016235352, |
| "learning_rate": 9.885006512951897e-05, |
| "loss": 0.4994, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.29226901319924575, |
| "grad_norm": 1.054768681526184, |
| "learning_rate": 9.881361013150436e-05, |
| "loss": 0.4664, |
| "step": 1395 |
| }, |
| { |
| "epoch": 0.295411690760528, |
| "grad_norm": 1.0745666027069092, |
| "learning_rate": 9.877659320530037e-05, |
| "loss": 0.5306, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.2985543683218102, |
| "grad_norm": 1.3258591890335083, |
| "learning_rate": 9.873901477703516e-05, |
| "loss": 0.5076, |
| "step": 1425 |
| }, |
| { |
| "epoch": 0.3016970458830924, |
| "grad_norm": 1.222783088684082, |
| "learning_rate": 9.870087527930077e-05, |
| "loss": 0.4581, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.3048397234443746, |
| "grad_norm": 0.9374076724052429, |
| "learning_rate": 9.866217515114805e-05, |
| "loss": 0.4643, |
| "step": 1455 |
| }, |
| { |
| "epoch": 0.3079824010056568, |
| "grad_norm": 1.3485162258148193, |
| "learning_rate": 9.862291483808173e-05, |
| "loss": 0.5551, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.31112507856693905, |
| "grad_norm": 0.9162548780441284, |
| "learning_rate": 9.858309479205519e-05, |
| "loss": 0.5592, |
| "step": 1485 |
| }, |
| { |
| "epoch": 0.3142677561282212, |
| "grad_norm": 1.1385138034820557, |
| "learning_rate": 9.854271547146531e-05, |
| "loss": 0.477, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.31741043368950345, |
| "grad_norm": 1.0023164749145508, |
| "learning_rate": 9.850177734114718e-05, |
| "loss": 0.4972, |
| "step": 1515 |
| }, |
| { |
| "epoch": 0.32055311125078567, |
| "grad_norm": 2.540215492248535, |
| "learning_rate": 9.846028087236873e-05, |
| "loss": 0.5007, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.3236957888120679, |
| "grad_norm": 1.2012773752212524, |
| "learning_rate": 9.841822654282533e-05, |
| "loss": 0.5481, |
| "step": 1545 |
| }, |
| { |
| "epoch": 0.32683846637335007, |
| "grad_norm": 0.9517608284950256, |
| "learning_rate": 9.837561483663429e-05, |
| "loss": 0.567, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.3299811439346323, |
| "grad_norm": 1.0308321714401245, |
| "learning_rate": 9.833244624432927e-05, |
| "loss": 0.4856, |
| "step": 1575 |
| }, |
| { |
| "epoch": 0.3331238214959145, |
| "grad_norm": 1.118574857711792, |
| "learning_rate": 9.828872126285465e-05, |
| "loss": 0.465, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.33626649905719674, |
| "grad_norm": 1.0821537971496582, |
| "learning_rate": 9.824444039555977e-05, |
| "loss": 0.4394, |
| "step": 1605 |
| }, |
| { |
| "epoch": 0.33940917661847897, |
| "grad_norm": 0.8795451521873474, |
| "learning_rate": 9.81996041521932e-05, |
| "loss": 0.4383, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.34255185417976114, |
| "grad_norm": 1.1455141305923462, |
| "learning_rate": 9.815421304889687e-05, |
| "loss": 0.4805, |
| "step": 1635 |
| }, |
| { |
| "epoch": 0.34569453174104336, |
| "grad_norm": 1.1445369720458984, |
| "learning_rate": 9.81082676082e-05, |
| "loss": 0.5315, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.3488372093023256, |
| "grad_norm": 1.0800312757492065, |
| "learning_rate": 9.806176835901328e-05, |
| "loss": 0.5205, |
| "step": 1665 |
| }, |
| { |
| "epoch": 0.3519798868636078, |
| "grad_norm": 0.7038319706916809, |
| "learning_rate": 9.801471583662263e-05, |
| "loss": 0.515, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.35512256442489, |
| "grad_norm": 0.9790651202201843, |
| "learning_rate": 9.796711058268313e-05, |
| "loss": 0.504, |
| "step": 1695 |
| }, |
| { |
| "epoch": 0.3582652419861722, |
| "grad_norm": 1.1764894723892212, |
| "learning_rate": 9.791895314521267e-05, |
| "loss": 0.4806, |
| "step": 1710 |
| }, |
| { |
| "epoch": 0.36140791954745444, |
| "grad_norm": 0.9900022745132446, |
| "learning_rate": 9.787024407858582e-05, |
| "loss": 0.5358, |
| "step": 1725 |
| }, |
| { |
| "epoch": 0.36455059710873666, |
| "grad_norm": 0.8621386289596558, |
| "learning_rate": 9.782098394352725e-05, |
| "loss": 0.5494, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.36769327467001883, |
| "grad_norm": 0.8717844486236572, |
| "learning_rate": 9.777117330710547e-05, |
| "loss": 0.4967, |
| "step": 1755 |
| }, |
| { |
| "epoch": 0.37083595223130106, |
| "grad_norm": 0.9800569415092468, |
| "learning_rate": 9.772081274272611e-05, |
| "loss": 0.4538, |
| "step": 1770 |
| }, |
| { |
| "epoch": 0.3739786297925833, |
| "grad_norm": 0.9540134072303772, |
| "learning_rate": 9.766990283012544e-05, |
| "loss": 0.5149, |
| "step": 1785 |
| }, |
| { |
| "epoch": 0.3771213073538655, |
| "grad_norm": 1.0856047868728638, |
| "learning_rate": 9.761844415536372e-05, |
| "loss": 0.5042, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.3802639849151477, |
| "grad_norm": 1.0914040803909302, |
| "learning_rate": 9.756643731081833e-05, |
| "loss": 0.5059, |
| "step": 1815 |
| }, |
| { |
| "epoch": 0.3834066624764299, |
| "grad_norm": 1.2371134757995605, |
| "learning_rate": 9.751388289517704e-05, |
| "loss": 0.4506, |
| "step": 1830 |
| }, |
| { |
| "epoch": 0.38654934003771213, |
| "grad_norm": 1.0402591228485107, |
| "learning_rate": 9.746078151343116e-05, |
| "loss": 0.5535, |
| "step": 1845 |
| }, |
| { |
| "epoch": 0.38969201759899436, |
| "grad_norm": 0.6260209083557129, |
| "learning_rate": 9.740713377686843e-05, |
| "loss": 0.4436, |
| "step": 1860 |
| }, |
| { |
| "epoch": 0.3928346951602766, |
| "grad_norm": 0.9588780999183655, |
| "learning_rate": 9.735294030306611e-05, |
| "loss": 0.5573, |
| "step": 1875 |
| }, |
| { |
| "epoch": 0.39597737272155875, |
| "grad_norm": 1.0838474035263062, |
| "learning_rate": 9.729820171588384e-05, |
| "loss": 0.4627, |
| "step": 1890 |
| }, |
| { |
| "epoch": 0.399120050282841, |
| "grad_norm": 1.0682798624038696, |
| "learning_rate": 9.724291864545643e-05, |
| "loss": 0.4893, |
| "step": 1905 |
| }, |
| { |
| "epoch": 0.4022627278441232, |
| "grad_norm": 0.9129301309585571, |
| "learning_rate": 9.718709172818661e-05, |
| "loss": 0.4898, |
| "step": 1920 |
| }, |
| { |
| "epoch": 0.40540540540540543, |
| "grad_norm": 1.0116883516311646, |
| "learning_rate": 9.713072160673777e-05, |
| "loss": 0.4615, |
| "step": 1935 |
| }, |
| { |
| "epoch": 0.4085480829666876, |
| "grad_norm": 1.057822823524475, |
| "learning_rate": 9.707380893002646e-05, |
| "loss": 0.4899, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.4116907605279698, |
| "grad_norm": 0.6419869661331177, |
| "learning_rate": 9.7016354353215e-05, |
| "loss": 0.4348, |
| "step": 1965 |
| }, |
| { |
| "epoch": 0.41483343808925205, |
| "grad_norm": 0.961713433265686, |
| "learning_rate": 9.695835853770387e-05, |
| "loss": 0.4921, |
| "step": 1980 |
| }, |
| { |
| "epoch": 0.4179761156505343, |
| "grad_norm": 0.9473373889923096, |
| "learning_rate": 9.689982215112417e-05, |
| "loss": 0.4926, |
| "step": 1995 |
| }, |
| { |
| "epoch": 0.42111879321181644, |
| "grad_norm": 1.2034335136413574, |
| "learning_rate": 9.684074586732987e-05, |
| "loss": 0.5042, |
| "step": 2010 |
| }, |
| { |
| "epoch": 0.42426147077309867, |
| "grad_norm": 0.9373855590820312, |
| "learning_rate": 9.678113036639014e-05, |
| "loss": 0.5076, |
| "step": 2025 |
| }, |
| { |
| "epoch": 0.4274041483343809, |
| "grad_norm": 1.016756296157837, |
| "learning_rate": 9.672097633458136e-05, |
| "loss": 0.4805, |
| "step": 2040 |
| }, |
| { |
| "epoch": 0.4305468258956631, |
| "grad_norm": 0.7454690337181091, |
| "learning_rate": 9.666028446437942e-05, |
| "loss": 0.5382, |
| "step": 2055 |
| }, |
| { |
| "epoch": 0.4336895034569453, |
| "grad_norm": 0.8196286559104919, |
| "learning_rate": 9.659905545445159e-05, |
| "loss": 0.4613, |
| "step": 2070 |
| }, |
| { |
| "epoch": 0.4368321810182275, |
| "grad_norm": 0.9132091403007507, |
| "learning_rate": 9.653729000964857e-05, |
| "loss": 0.4595, |
| "step": 2085 |
| }, |
| { |
| "epoch": 0.43997485857950974, |
| "grad_norm": 0.8063992857933044, |
| "learning_rate": 9.647498884099633e-05, |
| "loss": 0.4139, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.44311753614079197, |
| "grad_norm": 0.9756997227668762, |
| "learning_rate": 9.641215266568794e-05, |
| "loss": 0.3941, |
| "step": 2115 |
| }, |
| { |
| "epoch": 0.4462602137020742, |
| "grad_norm": 0.6542510390281677, |
| "learning_rate": 9.634878220707531e-05, |
| "loss": 0.4768, |
| "step": 2130 |
| }, |
| { |
| "epoch": 0.44940289126335636, |
| "grad_norm": 0.9039008617401123, |
| "learning_rate": 9.628487819466086e-05, |
| "loss": 0.4248, |
| "step": 2145 |
| }, |
| { |
| "epoch": 0.4525455688246386, |
| "grad_norm": 1.1151047945022583, |
| "learning_rate": 9.622044136408914e-05, |
| "loss": 0.5041, |
| "step": 2160 |
| }, |
| { |
| "epoch": 0.4556882463859208, |
| "grad_norm": 0.8580663800239563, |
| "learning_rate": 9.615547245713836e-05, |
| "loss": 0.4766, |
| "step": 2175 |
| }, |
| { |
| "epoch": 0.45883092394720304, |
| "grad_norm": 0.9799042344093323, |
| "learning_rate": 9.608997222171178e-05, |
| "loss": 0.4714, |
| "step": 2190 |
| }, |
| { |
| "epoch": 0.4619736015084852, |
| "grad_norm": 0.8485172986984253, |
| "learning_rate": 9.602394141182927e-05, |
| "loss": 0.4556, |
| "step": 2205 |
| }, |
| { |
| "epoch": 0.46511627906976744, |
| "grad_norm": 0.9632934927940369, |
| "learning_rate": 9.595738078761837e-05, |
| "loss": 0.4791, |
| "step": 2220 |
| }, |
| { |
| "epoch": 0.46825895663104966, |
| "grad_norm": 0.8843478560447693, |
| "learning_rate": 9.589029111530586e-05, |
| "loss": 0.4603, |
| "step": 2235 |
| }, |
| { |
| "epoch": 0.4714016341923319, |
| "grad_norm": 1.1230348348617554, |
| "learning_rate": 9.582267316720861e-05, |
| "loss": 0.491, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.47454431175361406, |
| "grad_norm": 0.8234013915061951, |
| "learning_rate": 9.575452772172495e-05, |
| "loss": 0.44, |
| "step": 2265 |
| }, |
| { |
| "epoch": 0.4776869893148963, |
| "grad_norm": 0.6838919520378113, |
| "learning_rate": 9.568585556332559e-05, |
| "loss": 0.4456, |
| "step": 2280 |
| }, |
| { |
| "epoch": 0.4808296668761785, |
| "grad_norm": 0.8424423336982727, |
| "learning_rate": 9.561665748254456e-05, |
| "loss": 0.4556, |
| "step": 2295 |
| }, |
| { |
| "epoch": 0.48397234443746073, |
| "grad_norm": 0.6735498905181885, |
| "learning_rate": 9.554693427597024e-05, |
| "loss": 0.5184, |
| "step": 2310 |
| }, |
| { |
| "epoch": 0.4871150219987429, |
| "grad_norm": 0.8868768811225891, |
| "learning_rate": 9.5476686746236e-05, |
| "loss": 0.5403, |
| "step": 2325 |
| }, |
| { |
| "epoch": 0.49025769956002513, |
| "grad_norm": 0.9957670569419861, |
| "learning_rate": 9.540591570201116e-05, |
| "loss": 0.4997, |
| "step": 2340 |
| }, |
| { |
| "epoch": 0.49340037712130735, |
| "grad_norm": 0.76320481300354, |
| "learning_rate": 9.533462195799157e-05, |
| "loss": 0.4534, |
| "step": 2355 |
| }, |
| { |
| "epoch": 0.4965430546825896, |
| "grad_norm": 0.8841500282287598, |
| "learning_rate": 9.526280633489018e-05, |
| "loss": 0.4724, |
| "step": 2370 |
| }, |
| { |
| "epoch": 0.4996857322438718, |
| "grad_norm": 0.8852142095565796, |
| "learning_rate": 9.519046965942776e-05, |
| "loss": 0.4655, |
| "step": 2385 |
| }, |
| { |
| "epoch": 0.502828409805154, |
| "grad_norm": 0.839430570602417, |
| "learning_rate": 9.511761276432321e-05, |
| "loss": 0.4386, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.5059710873664363, |
| "grad_norm": 0.7581266760826111, |
| "learning_rate": 9.50442364882841e-05, |
| "loss": 0.4774, |
| "step": 2415 |
| }, |
| { |
| "epoch": 0.5091137649277184, |
| "grad_norm": 0.8754017949104309, |
| "learning_rate": 9.497034167599691e-05, |
| "loss": 0.4744, |
| "step": 2430 |
| }, |
| { |
| "epoch": 0.5122564424890006, |
| "grad_norm": 0.9099476337432861, |
| "learning_rate": 9.48959291781174e-05, |
| "loss": 0.4292, |
| "step": 2445 |
| }, |
| { |
| "epoch": 0.5153991200502829, |
| "grad_norm": 0.9721155166625977, |
| "learning_rate": 9.482099985126079e-05, |
| "loss": 0.4137, |
| "step": 2460 |
| }, |
| { |
| "epoch": 0.518541797611565, |
| "grad_norm": 0.8385334014892578, |
| "learning_rate": 9.474555455799181e-05, |
| "loss": 0.471, |
| "step": 2475 |
| }, |
| { |
| "epoch": 0.5216844751728472, |
| "grad_norm": 0.9853966236114502, |
| "learning_rate": 9.466959416681495e-05, |
| "loss": 0.4233, |
| "step": 2490 |
| }, |
| { |
| "epoch": 0.5248271527341295, |
| "grad_norm": 1.1044224500656128, |
| "learning_rate": 9.459311955216428e-05, |
| "loss": 0.5188, |
| "step": 2505 |
| }, |
| { |
| "epoch": 0.5279698302954117, |
| "grad_norm": 0.870677649974823, |
| "learning_rate": 9.451613159439349e-05, |
| "loss": 0.4676, |
| "step": 2520 |
| }, |
| { |
| "epoch": 0.531112507856694, |
| "grad_norm": 0.8571140170097351, |
| "learning_rate": 9.443863117976573e-05, |
| "loss": 0.4863, |
| "step": 2535 |
| }, |
| { |
| "epoch": 0.5342551854179761, |
| "grad_norm": 1.0573495626449585, |
| "learning_rate": 9.436061920044341e-05, |
| "loss": 0.5057, |
| "step": 2550 |
| }, |
| { |
| "epoch": 0.5373978629792583, |
| "grad_norm": 0.9805963635444641, |
| "learning_rate": 9.42820965544779e-05, |
| "loss": 0.468, |
| "step": 2565 |
| }, |
| { |
| "epoch": 0.5405405405405406, |
| "grad_norm": 0.8198602199554443, |
| "learning_rate": 9.420306414579925e-05, |
| "loss": 0.5054, |
| "step": 2580 |
| }, |
| { |
| "epoch": 0.5436832181018227, |
| "grad_norm": 0.9718137979507446, |
| "learning_rate": 9.412352288420572e-05, |
| "loss": 0.4824, |
| "step": 2595 |
| }, |
| { |
| "epoch": 0.5468258956631049, |
| "grad_norm": 1.0223153829574585, |
| "learning_rate": 9.404347368535337e-05, |
| "loss": 0.4502, |
| "step": 2610 |
| }, |
| { |
| "epoch": 0.5499685732243872, |
| "grad_norm": 0.9398010969161987, |
| "learning_rate": 9.396291747074547e-05, |
| "loss": 0.4761, |
| "step": 2625 |
| }, |
| { |
| "epoch": 0.5531112507856694, |
| "grad_norm": 0.9091777801513672, |
| "learning_rate": 9.38818551677219e-05, |
| "loss": 0.4033, |
| "step": 2640 |
| }, |
| { |
| "epoch": 0.5562539283469516, |
| "grad_norm": 1.06580650806427, |
| "learning_rate": 9.380028770944849e-05, |
| "loss": 0.4052, |
| "step": 2655 |
| }, |
| { |
| "epoch": 0.5593966059082338, |
| "grad_norm": 0.7236329913139343, |
| "learning_rate": 9.371821603490627e-05, |
| "loss": 0.4677, |
| "step": 2670 |
| }, |
| { |
| "epoch": 0.562539283469516, |
| "grad_norm": 0.8263210654258728, |
| "learning_rate": 9.363564108888069e-05, |
| "loss": 0.4576, |
| "step": 2685 |
| }, |
| { |
| "epoch": 0.5656819610307983, |
| "grad_norm": 1.022448182106018, |
| "learning_rate": 9.355256382195068e-05, |
| "loss": 0.4963, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.5688246385920804, |
| "grad_norm": 0.9639766812324524, |
| "learning_rate": 9.346898519047775e-05, |
| "loss": 0.4113, |
| "step": 2715 |
| }, |
| { |
| "epoch": 0.5719673161533627, |
| "grad_norm": 1.1044561862945557, |
| "learning_rate": 9.338490615659499e-05, |
| "loss": 0.5023, |
| "step": 2730 |
| }, |
| { |
| "epoch": 0.5751099937146449, |
| "grad_norm": 0.8272239565849304, |
| "learning_rate": 9.330032768819596e-05, |
| "loss": 0.4699, |
| "step": 2745 |
| }, |
| { |
| "epoch": 0.578252671275927, |
| "grad_norm": 0.7692523002624512, |
| "learning_rate": 9.321525075892356e-05, |
| "loss": 0.4292, |
| "step": 2760 |
| }, |
| { |
| "epoch": 0.5813953488372093, |
| "grad_norm": 0.9032982587814331, |
| "learning_rate": 9.312967634815888e-05, |
| "loss": 0.4432, |
| "step": 2775 |
| }, |
| { |
| "epoch": 0.5845380263984915, |
| "grad_norm": 0.7676737904548645, |
| "learning_rate": 9.304360544100982e-05, |
| "loss": 0.4311, |
| "step": 2790 |
| }, |
| { |
| "epoch": 0.5876807039597737, |
| "grad_norm": 0.9019532799720764, |
| "learning_rate": 9.29570390282998e-05, |
| "loss": 0.4464, |
| "step": 2805 |
| }, |
| { |
| "epoch": 0.590823381521056, |
| "grad_norm": 0.9738386869430542, |
| "learning_rate": 9.286997810655638e-05, |
| "loss": 0.5019, |
| "step": 2820 |
| }, |
| { |
| "epoch": 0.5939660590823381, |
| "grad_norm": 0.7886769771575928, |
| "learning_rate": 9.278242367799978e-05, |
| "loss": 0.4919, |
| "step": 2835 |
| }, |
| { |
| "epoch": 0.5971087366436204, |
| "grad_norm": 0.9002622365951538, |
| "learning_rate": 9.269437675053129e-05, |
| "loss": 0.4695, |
| "step": 2850 |
| }, |
| { |
| "epoch": 0.6002514142049026, |
| "grad_norm": 0.7023227214813232, |
| "learning_rate": 9.260583833772172e-05, |
| "loss": 0.4338, |
| "step": 2865 |
| }, |
| { |
| "epoch": 0.6033940917661847, |
| "grad_norm": 0.9442479014396667, |
| "learning_rate": 9.251680945879975e-05, |
| "loss": 0.4907, |
| "step": 2880 |
| }, |
| { |
| "epoch": 0.606536769327467, |
| "grad_norm": 0.6304488778114319, |
| "learning_rate": 9.24272911386401e-05, |
| "loss": 0.4612, |
| "step": 2895 |
| }, |
| { |
| "epoch": 0.6096794468887492, |
| "grad_norm": 0.731960117816925, |
| "learning_rate": 9.233728440775185e-05, |
| "loss": 0.4207, |
| "step": 2910 |
| }, |
| { |
| "epoch": 0.6128221244500315, |
| "grad_norm": 1.083849549293518, |
| "learning_rate": 9.224679030226648e-05, |
| "loss": 0.4775, |
| "step": 2925 |
| }, |
| { |
| "epoch": 0.6159648020113137, |
| "grad_norm": 0.6792687177658081, |
| "learning_rate": 9.215580986392607e-05, |
| "loss": 0.4708, |
| "step": 2940 |
| }, |
| { |
| "epoch": 0.6191074795725958, |
| "grad_norm": 0.7582160830497742, |
| "learning_rate": 9.20643441400711e-05, |
| "loss": 0.4352, |
| "step": 2955 |
| }, |
| { |
| "epoch": 0.6222501571338781, |
| "grad_norm": 0.7785065174102783, |
| "learning_rate": 9.197239418362862e-05, |
| "loss": 0.4199, |
| "step": 2970 |
| }, |
| { |
| "epoch": 0.6253928346951603, |
| "grad_norm": 0.9076778292655945, |
| "learning_rate": 9.187996105309995e-05, |
| "loss": 0.4937, |
| "step": 2985 |
| }, |
| { |
| "epoch": 0.6285355122564424, |
| "grad_norm": 0.9189762473106384, |
| "learning_rate": 9.178704581254865e-05, |
| "loss": 0.4553, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.6316781898177247, |
| "grad_norm": 0.8485803008079529, |
| "learning_rate": 9.169364953158812e-05, |
| "loss": 0.4799, |
| "step": 3015 |
| }, |
| { |
| "epoch": 0.6348208673790069, |
| "grad_norm": 0.8296557068824768, |
| "learning_rate": 9.15997732853694e-05, |
| "loss": 0.4799, |
| "step": 3030 |
| }, |
| { |
| "epoch": 0.6379635449402892, |
| "grad_norm": 0.9346463680267334, |
| "learning_rate": 9.150541815456874e-05, |
| "loss": 0.4707, |
| "step": 3045 |
| }, |
| { |
| "epoch": 0.6411062225015713, |
| "grad_norm": 1.0045510530471802, |
| "learning_rate": 9.141058522537515e-05, |
| "loss": 0.5216, |
| "step": 3060 |
| }, |
| { |
| "epoch": 0.6442489000628535, |
| "grad_norm": 0.5840141773223877, |
| "learning_rate": 9.131527558947796e-05, |
| "loss": 0.429, |
| "step": 3075 |
| }, |
| { |
| "epoch": 0.6473915776241358, |
| "grad_norm": 0.8743481040000916, |
| "learning_rate": 9.121949034405417e-05, |
| "loss": 0.4734, |
| "step": 3090 |
| }, |
| { |
| "epoch": 0.650534255185418, |
| "grad_norm": 0.9631288051605225, |
| "learning_rate": 9.112323059175588e-05, |
| "loss": 0.4856, |
| "step": 3105 |
| }, |
| { |
| "epoch": 0.6536769327467001, |
| "grad_norm": 0.7583104372024536, |
| "learning_rate": 9.102649744069758e-05, |
| "loss": 0.4428, |
| "step": 3120 |
| }, |
| { |
| "epoch": 0.6568196103079824, |
| "grad_norm": 0.9227087497711182, |
| "learning_rate": 9.092929200444337e-05, |
| "loss": 0.4622, |
| "step": 3135 |
| }, |
| { |
| "epoch": 0.6599622878692646, |
| "grad_norm": 0.720124363899231, |
| "learning_rate": 9.083161540199417e-05, |
| "loss": 0.4136, |
| "step": 3150 |
| }, |
| { |
| "epoch": 0.6631049654305469, |
| "grad_norm": 0.6481117010116577, |
| "learning_rate": 9.073346875777487e-05, |
| "loss": 0.5445, |
| "step": 3165 |
| }, |
| { |
| "epoch": 0.666247642991829, |
| "grad_norm": 0.6970652937889099, |
| "learning_rate": 9.063485320162126e-05, |
| "loss": 0.4247, |
| "step": 3180 |
| }, |
| { |
| "epoch": 0.6693903205531112, |
| "grad_norm": 0.5132230520248413, |
| "learning_rate": 9.053576986876718e-05, |
| "loss": 0.4415, |
| "step": 3195 |
| }, |
| { |
| "epoch": 0.6725329981143935, |
| "grad_norm": 0.7673790454864502, |
| "learning_rate": 9.043621989983135e-05, |
| "loss": 0.5188, |
| "step": 3210 |
| }, |
| { |
| "epoch": 0.6756756756756757, |
| "grad_norm": 0.8441967368125916, |
| "learning_rate": 9.033620444080428e-05, |
| "loss": 0.4343, |
| "step": 3225 |
| }, |
| { |
| "epoch": 0.6788183532369579, |
| "grad_norm": 0.8746171593666077, |
| "learning_rate": 9.023572464303506e-05, |
| "loss": 0.4114, |
| "step": 3240 |
| }, |
| { |
| "epoch": 0.6819610307982401, |
| "grad_norm": 0.7494221925735474, |
| "learning_rate": 9.013478166321812e-05, |
| "loss": 0.4334, |
| "step": 3255 |
| }, |
| { |
| "epoch": 0.6851037083595223, |
| "grad_norm": 0.7263948917388916, |
| "learning_rate": 9.00333766633799e-05, |
| "loss": 0.4322, |
| "step": 3270 |
| }, |
| { |
| "epoch": 0.6882463859208046, |
| "grad_norm": 0.852172315120697, |
| "learning_rate": 8.99315108108655e-05, |
| "loss": 0.4506, |
| "step": 3285 |
| }, |
| { |
| "epoch": 0.6913890634820867, |
| "grad_norm": 0.7959320545196533, |
| "learning_rate": 8.98291852783252e-05, |
| "loss": 0.4456, |
| "step": 3300 |
| }, |
| { |
| "epoch": 0.6945317410433689, |
| "grad_norm": 0.5918748378753662, |
| "learning_rate": 8.9726401243701e-05, |
| "loss": 0.4181, |
| "step": 3315 |
| }, |
| { |
| "epoch": 0.6976744186046512, |
| "grad_norm": 0.9726805090904236, |
| "learning_rate": 8.962315989021304e-05, |
| "loss": 0.4964, |
| "step": 3330 |
| }, |
| { |
| "epoch": 0.7008170961659334, |
| "grad_norm": 0.8826568126678467, |
| "learning_rate": 8.951946240634596e-05, |
| "loss": 0.4702, |
| "step": 3345 |
| }, |
| { |
| "epoch": 0.7039597737272156, |
| "grad_norm": 0.7354099154472351, |
| "learning_rate": 8.941530998583527e-05, |
| "loss": 0.4258, |
| "step": 3360 |
| }, |
| { |
| "epoch": 0.7071024512884978, |
| "grad_norm": 0.9217835664749146, |
| "learning_rate": 8.931070382765359e-05, |
| "loss": 0.5185, |
| "step": 3375 |
| }, |
| { |
| "epoch": 0.71024512884978, |
| "grad_norm": 0.7444872260093689, |
| "learning_rate": 8.920564513599679e-05, |
| "loss": 0.4534, |
| "step": 3390 |
| }, |
| { |
| "epoch": 0.7133878064110623, |
| "grad_norm": 0.7847276926040649, |
| "learning_rate": 8.910013512027022e-05, |
| "loss": 0.4232, |
| "step": 3405 |
| }, |
| { |
| "epoch": 0.7165304839723444, |
| "grad_norm": 0.8024355173110962, |
| "learning_rate": 8.899417499507471e-05, |
| "loss": 0.4579, |
| "step": 3420 |
| }, |
| { |
| "epoch": 0.7196731615336267, |
| "grad_norm": 0.7088613510131836, |
| "learning_rate": 8.888776598019266e-05, |
| "loss": 0.4437, |
| "step": 3435 |
| }, |
| { |
| "epoch": 0.7228158390949089, |
| "grad_norm": 0.6009235382080078, |
| "learning_rate": 8.87809093005739e-05, |
| "loss": 0.397, |
| "step": 3450 |
| }, |
| { |
| "epoch": 0.725958516656191, |
| "grad_norm": 0.8743120431900024, |
| "learning_rate": 8.867360618632172e-05, |
| "loss": 0.5056, |
| "step": 3465 |
| }, |
| { |
| "epoch": 0.7291011942174733, |
| "grad_norm": 0.899148166179657, |
| "learning_rate": 8.856585787267856e-05, |
| "loss": 0.4521, |
| "step": 3480 |
| }, |
| { |
| "epoch": 0.7322438717787555, |
| "grad_norm": 0.8690171837806702, |
| "learning_rate": 8.845766560001193e-05, |
| "loss": 0.4708, |
| "step": 3495 |
| }, |
| { |
| "epoch": 0.7353865493400377, |
| "grad_norm": 0.9699186682701111, |
| "learning_rate": 8.834903061380002e-05, |
| "loss": 0.4534, |
| "step": 3510 |
| }, |
| { |
| "epoch": 0.73852922690132, |
| "grad_norm": 0.8577262163162231, |
| "learning_rate": 8.823995416461744e-05, |
| "loss": 0.4096, |
| "step": 3525 |
| }, |
| { |
| "epoch": 0.7416719044626021, |
| "grad_norm": 0.7458922266960144, |
| "learning_rate": 8.81304375081208e-05, |
| "loss": 0.46, |
| "step": 3540 |
| }, |
| { |
| "epoch": 0.7448145820238844, |
| "grad_norm": 0.7347140908241272, |
| "learning_rate": 8.802048190503423e-05, |
| "loss": 0.4684, |
| "step": 3555 |
| }, |
| { |
| "epoch": 0.7479572595851666, |
| "grad_norm": 0.7161451578140259, |
| "learning_rate": 8.79100886211349e-05, |
| "loss": 0.4715, |
| "step": 3570 |
| }, |
| { |
| "epoch": 0.7510999371464487, |
| "grad_norm": 0.8321588039398193, |
| "learning_rate": 8.779925892723842e-05, |
| "loss": 0.3598, |
| "step": 3585 |
| }, |
| { |
| "epoch": 0.754242614707731, |
| "grad_norm": 0.9462142586708069, |
| "learning_rate": 8.768799409918423e-05, |
| "loss": 0.4404, |
| "step": 3600 |
| }, |
| { |
| "epoch": 0.7573852922690132, |
| "grad_norm": 0.6842710971832275, |
| "learning_rate": 8.75762954178209e-05, |
| "loss": 0.4648, |
| "step": 3615 |
| }, |
| { |
| "epoch": 0.7605279698302954, |
| "grad_norm": 0.8573241829872131, |
| "learning_rate": 8.746416416899145e-05, |
| "loss": 0.4592, |
| "step": 3630 |
| }, |
| { |
| "epoch": 0.7636706473915776, |
| "grad_norm": 0.751291811466217, |
| "learning_rate": 8.735160164351841e-05, |
| "loss": 0.5319, |
| "step": 3645 |
| }, |
| { |
| "epoch": 0.7668133249528598, |
| "grad_norm": 0.731086790561676, |
| "learning_rate": 8.72386091371891e-05, |
| "loss": 0.4629, |
| "step": 3660 |
| }, |
| { |
| "epoch": 0.7699560025141421, |
| "grad_norm": 0.9289976358413696, |
| "learning_rate": 8.712518795074063e-05, |
| "loss": 0.4427, |
| "step": 3675 |
| }, |
| { |
| "epoch": 0.7730986800754243, |
| "grad_norm": 0.7036064267158508, |
| "learning_rate": 8.701133938984496e-05, |
| "loss": 0.4679, |
| "step": 3690 |
| }, |
| { |
| "epoch": 0.7762413576367064, |
| "grad_norm": 0.778161346912384, |
| "learning_rate": 8.689706476509385e-05, |
| "loss": 0.4489, |
| "step": 3705 |
| }, |
| { |
| "epoch": 0.7793840351979887, |
| "grad_norm": 0.8694556951522827, |
| "learning_rate": 8.678236539198382e-05, |
| "loss": 0.4048, |
| "step": 3720 |
| }, |
| { |
| "epoch": 0.7825267127592709, |
| "grad_norm": 0.5768362283706665, |
| "learning_rate": 8.666724259090092e-05, |
| "loss": 0.4434, |
| "step": 3735 |
| }, |
| { |
| "epoch": 0.7856693903205532, |
| "grad_norm": 0.604917585849762, |
| "learning_rate": 8.655169768710562e-05, |
| "loss": 0.4669, |
| "step": 3750 |
| }, |
| { |
| "epoch": 0.7888120678818353, |
| "grad_norm": 0.833985447883606, |
| "learning_rate": 8.643573201071748e-05, |
| "loss": 0.4267, |
| "step": 3765 |
| }, |
| { |
| "epoch": 0.7919547454431175, |
| "grad_norm": 0.7951568365097046, |
| "learning_rate": 8.631934689669992e-05, |
| "loss": 0.4028, |
| "step": 3780 |
| }, |
| { |
| "epoch": 0.7950974230043998, |
| "grad_norm": 0.7703410983085632, |
| "learning_rate": 8.620254368484474e-05, |
| "loss": 0.4153, |
| "step": 3795 |
| }, |
| { |
| "epoch": 0.798240100565682, |
| "grad_norm": 0.8545910716056824, |
| "learning_rate": 8.608532371975684e-05, |
| "loss": 0.4949, |
| "step": 3810 |
| }, |
| { |
| "epoch": 0.8013827781269641, |
| "grad_norm": 0.8206099271774292, |
| "learning_rate": 8.59676883508386e-05, |
| "loss": 0.4714, |
| "step": 3825 |
| }, |
| { |
| "epoch": 0.8045254556882464, |
| "grad_norm": 0.7841479182243347, |
| "learning_rate": 8.584963893227442e-05, |
| "loss": 0.4888, |
| "step": 3840 |
| }, |
| { |
| "epoch": 0.8076681332495286, |
| "grad_norm": 0.7417731285095215, |
| "learning_rate": 8.573117682301514e-05, |
| "loss": 0.4951, |
| "step": 3855 |
| }, |
| { |
| "epoch": 0.8108108108108109, |
| "grad_norm": 0.9013925194740295, |
| "learning_rate": 8.561230338676239e-05, |
| "loss": 0.4542, |
| "step": 3870 |
| }, |
| { |
| "epoch": 0.813953488372093, |
| "grad_norm": 1.2146642208099365, |
| "learning_rate": 8.549301999195283e-05, |
| "loss": 0.4606, |
| "step": 3885 |
| }, |
| { |
| "epoch": 0.8170961659333752, |
| "grad_norm": 0.8740483522415161, |
| "learning_rate": 8.537332801174245e-05, |
| "loss": 0.4562, |
| "step": 3900 |
| }, |
| { |
| "epoch": 0.8202388434946575, |
| "grad_norm": 0.7769590020179749, |
| "learning_rate": 8.525322882399082e-05, |
| "loss": 0.4385, |
| "step": 3915 |
| }, |
| { |
| "epoch": 0.8233815210559396, |
| "grad_norm": 0.7966271042823792, |
| "learning_rate": 8.513272381124511e-05, |
| "loss": 0.4011, |
| "step": 3930 |
| }, |
| { |
| "epoch": 0.8265241986172219, |
| "grad_norm": 0.6132526397705078, |
| "learning_rate": 8.501181436072422e-05, |
| "loss": 0.393, |
| "step": 3945 |
| }, |
| { |
| "epoch": 0.8296668761785041, |
| "grad_norm": 0.6438138484954834, |
| "learning_rate": 8.489050186430285e-05, |
| "loss": 0.4226, |
| "step": 3960 |
| }, |
| { |
| "epoch": 0.8328095537397863, |
| "grad_norm": 0.8362025022506714, |
| "learning_rate": 8.476878771849545e-05, |
| "loss": 0.4216, |
| "step": 3975 |
| }, |
| { |
| "epoch": 0.8359522313010685, |
| "grad_norm": 0.770706057548523, |
| "learning_rate": 8.464667332444012e-05, |
| "loss": 0.4278, |
| "step": 3990 |
| }, |
| { |
| "epoch": 0.8390949088623507, |
| "grad_norm": 0.8944802284240723, |
| "learning_rate": 8.452416008788254e-05, |
| "loss": 0.4609, |
| "step": 4005 |
| }, |
| { |
| "epoch": 0.8422375864236329, |
| "grad_norm": 0.9292035102844238, |
| "learning_rate": 8.440124941915972e-05, |
| "loss": 0.4124, |
| "step": 4020 |
| }, |
| { |
| "epoch": 0.8453802639849152, |
| "grad_norm": 0.6450730562210083, |
| "learning_rate": 8.427794273318377e-05, |
| "loss": 0.4124, |
| "step": 4035 |
| }, |
| { |
| "epoch": 0.8485229415461973, |
| "grad_norm": 1.0732468366622925, |
| "learning_rate": 8.415424144942569e-05, |
| "loss": 0.4678, |
| "step": 4050 |
| }, |
| { |
| "epoch": 0.8516656191074796, |
| "grad_norm": 0.900360107421875, |
| "learning_rate": 8.403014699189892e-05, |
| "loss": 0.4299, |
| "step": 4065 |
| }, |
| { |
| "epoch": 0.8548082966687618, |
| "grad_norm": 0.7163972854614258, |
| "learning_rate": 8.39056607891431e-05, |
| "loss": 0.4651, |
| "step": 4080 |
| }, |
| { |
| "epoch": 0.857950974230044, |
| "grad_norm": 0.6078224182128906, |
| "learning_rate": 8.378078427420739e-05, |
| "loss": 0.4612, |
| "step": 4095 |
| }, |
| { |
| "epoch": 0.8610936517913262, |
| "grad_norm": 0.7975668907165527, |
| "learning_rate": 8.365551888463423e-05, |
| "loss": 0.4521, |
| "step": 4110 |
| }, |
| { |
| "epoch": 0.8642363293526084, |
| "grad_norm": 0.7620348930358887, |
| "learning_rate": 8.352986606244262e-05, |
| "loss": 0.4527, |
| "step": 4125 |
| }, |
| { |
| "epoch": 0.8673790069138906, |
| "grad_norm": 0.7811437249183655, |
| "learning_rate": 8.340382725411155e-05, |
| "loss": 0.4639, |
| "step": 4140 |
| }, |
| { |
| "epoch": 0.8705216844751729, |
| "grad_norm": 0.46538805961608887, |
| "learning_rate": 8.327740391056343e-05, |
| "loss": 0.3793, |
| "step": 4155 |
| }, |
| { |
| "epoch": 0.873664362036455, |
| "grad_norm": 0.893225371837616, |
| "learning_rate": 8.315059748714728e-05, |
| "loss": 0.4824, |
| "step": 4170 |
| }, |
| { |
| "epoch": 0.8768070395977373, |
| "grad_norm": 0.8325145244598389, |
| "learning_rate": 8.302340944362205e-05, |
| "loss": 0.4623, |
| "step": 4185 |
| }, |
| { |
| "epoch": 0.8799497171590195, |
| "grad_norm": 0.7328510880470276, |
| "learning_rate": 8.289584124413978e-05, |
| "loss": 0.4075, |
| "step": 4200 |
| }, |
| { |
| "epoch": 0.8830923947203017, |
| "grad_norm": 0.35754507780075073, |
| "learning_rate": 8.276789435722875e-05, |
| "loss": 0.3328, |
| "step": 4215 |
| }, |
| { |
| "epoch": 0.8862350722815839, |
| "grad_norm": 0.78349369764328, |
| "learning_rate": 8.263957025577663e-05, |
| "loss": 0.4962, |
| "step": 4230 |
| }, |
| { |
| "epoch": 0.8893777498428661, |
| "grad_norm": 0.644481360912323, |
| "learning_rate": 8.251087041701339e-05, |
| "loss": 0.3977, |
| "step": 4245 |
| }, |
| { |
| "epoch": 0.8925204274041484, |
| "grad_norm": 0.618881344795227, |
| "learning_rate": 8.238179632249443e-05, |
| "loss": 0.3967, |
| "step": 4260 |
| }, |
| { |
| "epoch": 0.8956631049654306, |
| "grad_norm": 0.7603642344474792, |
| "learning_rate": 8.22523494580835e-05, |
| "loss": 0.4413, |
| "step": 4275 |
| }, |
| { |
| "epoch": 0.8988057825267127, |
| "grad_norm": 0.6301630735397339, |
| "learning_rate": 8.212253131393549e-05, |
| "loss": 0.4333, |
| "step": 4290 |
| }, |
| { |
| "epoch": 0.901948460087995, |
| "grad_norm": 0.7729358077049255, |
| "learning_rate": 8.199234338447942e-05, |
| "loss": 0.4633, |
| "step": 4305 |
| }, |
| { |
| "epoch": 0.9050911376492772, |
| "grad_norm": 0.9121199250221252, |
| "learning_rate": 8.186178716840118e-05, |
| "loss": 0.4411, |
| "step": 4320 |
| }, |
| { |
| "epoch": 0.9082338152105593, |
| "grad_norm": 0.5462374091148376, |
| "learning_rate": 8.17308641686262e-05, |
| "loss": 0.4659, |
| "step": 4335 |
| }, |
| { |
| "epoch": 0.9113764927718416, |
| "grad_norm": 0.7599003911018372, |
| "learning_rate": 8.15995758923023e-05, |
| "loss": 0.4015, |
| "step": 4350 |
| }, |
| { |
| "epoch": 0.9145191703331238, |
| "grad_norm": 0.8557884693145752, |
| "learning_rate": 8.14679238507822e-05, |
| "loss": 0.4574, |
| "step": 4365 |
| }, |
| { |
| "epoch": 0.9176618478944061, |
| "grad_norm": 0.7987812757492065, |
| "learning_rate": 8.133590955960619e-05, |
| "loss": 0.4501, |
| "step": 4380 |
| }, |
| { |
| "epoch": 0.9208045254556882, |
| "grad_norm": 0.8603717088699341, |
| "learning_rate": 8.120353453848471e-05, |
| "loss": 0.4201, |
| "step": 4395 |
| }, |
| { |
| "epoch": 0.9239472030169704, |
| "grad_norm": 0.7066472768783569, |
| "learning_rate": 8.107080031128078e-05, |
| "loss": 0.4035, |
| "step": 4410 |
| }, |
| { |
| "epoch": 0.9270898805782527, |
| "grad_norm": 0.6430373191833496, |
| "learning_rate": 8.09377084059925e-05, |
| "loss": 0.4141, |
| "step": 4425 |
| }, |
| { |
| "epoch": 0.9302325581395349, |
| "grad_norm": 0.6911259889602661, |
| "learning_rate": 8.080426035473549e-05, |
| "loss": 0.4431, |
| "step": 4440 |
| }, |
| { |
| "epoch": 0.933375235700817, |
| "grad_norm": 0.8445611000061035, |
| "learning_rate": 8.067045769372515e-05, |
| "loss": 0.4469, |
| "step": 4455 |
| }, |
| { |
| "epoch": 0.9365179132620993, |
| "grad_norm": 0.9317618012428284, |
| "learning_rate": 8.053630196325914e-05, |
| "loss": 0.4051, |
| "step": 4470 |
| }, |
| { |
| "epoch": 0.9396605908233815, |
| "grad_norm": 0.8286532163619995, |
| "learning_rate": 8.040179470769946e-05, |
| "loss": 0.4158, |
| "step": 4485 |
| }, |
| { |
| "epoch": 0.9428032683846638, |
| "grad_norm": 0.7000495195388794, |
| "learning_rate": 8.026693747545486e-05, |
| "loss": 0.4202, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.9459459459459459, |
| "grad_norm": 0.8104173541069031, |
| "learning_rate": 8.013173181896283e-05, |
| "loss": 0.4369, |
| "step": 4515 |
| }, |
| { |
| "epoch": 0.9490886235072281, |
| "grad_norm": 0.864750862121582, |
| "learning_rate": 7.999617929467187e-05, |
| "loss": 0.4152, |
| "step": 4530 |
| }, |
| { |
| "epoch": 0.9522313010685104, |
| "grad_norm": 0.7788864970207214, |
| "learning_rate": 7.98602814630235e-05, |
| "loss": 0.492, |
| "step": 4545 |
| }, |
| { |
| "epoch": 0.9553739786297926, |
| "grad_norm": 0.707156777381897, |
| "learning_rate": 7.972403988843435e-05, |
| "loss": 0.4105, |
| "step": 4560 |
| }, |
| { |
| "epoch": 0.9585166561910748, |
| "grad_norm": 0.8454593420028687, |
| "learning_rate": 7.958745613927809e-05, |
| "loss": 0.4622, |
| "step": 4575 |
| }, |
| { |
| "epoch": 0.961659333752357, |
| "grad_norm": 0.8026373982429504, |
| "learning_rate": 7.945053178786744e-05, |
| "loss": 0.4236, |
| "step": 4590 |
| }, |
| { |
| "epoch": 0.9648020113136392, |
| "grad_norm": 0.786409318447113, |
| "learning_rate": 7.931326841043596e-05, |
| "loss": 0.4677, |
| "step": 4605 |
| }, |
| { |
| "epoch": 0.9679446888749215, |
| "grad_norm": 0.5381405353546143, |
| "learning_rate": 7.917566758712005e-05, |
| "loss": 0.443, |
| "step": 4620 |
| }, |
| { |
| "epoch": 0.9710873664362036, |
| "grad_norm": 0.6609058380126953, |
| "learning_rate": 7.903773090194069e-05, |
| "loss": 0.4573, |
| "step": 4635 |
| }, |
| { |
| "epoch": 0.9742300439974858, |
| "grad_norm": 0.7192760705947876, |
| "learning_rate": 7.889945994278514e-05, |
| "loss": 0.4387, |
| "step": 4650 |
| }, |
| { |
| "epoch": 0.9773727215587681, |
| "grad_norm": 0.7502164244651794, |
| "learning_rate": 7.87608563013888e-05, |
| "loss": 0.399, |
| "step": 4665 |
| }, |
| { |
| "epoch": 0.9805153991200503, |
| "grad_norm": 0.7829092144966125, |
| "learning_rate": 7.86219215733168e-05, |
| "loss": 0.3705, |
| "step": 4680 |
| }, |
| { |
| "epoch": 0.9836580766813325, |
| "grad_norm": 0.791359007358551, |
| "learning_rate": 7.848265735794558e-05, |
| "loss": 0.4434, |
| "step": 4695 |
| }, |
| { |
| "epoch": 0.9868007542426147, |
| "grad_norm": 0.7627493739128113, |
| "learning_rate": 7.834306525844461e-05, |
| "loss": 0.4496, |
| "step": 4710 |
| }, |
| { |
| "epoch": 0.9899434318038969, |
| "grad_norm": 0.679959237575531, |
| "learning_rate": 7.820314688175784e-05, |
| "loss": 0.4815, |
| "step": 4725 |
| }, |
| { |
| "epoch": 0.9930861093651792, |
| "grad_norm": 0.8766529560089111, |
| "learning_rate": 7.806290383858523e-05, |
| "loss": 0.4704, |
| "step": 4740 |
| }, |
| { |
| "epoch": 0.9962287869264613, |
| "grad_norm": 1.1642574071884155, |
| "learning_rate": 7.792233774336423e-05, |
| "loss": 0.4974, |
| "step": 4755 |
| }, |
| { |
| "epoch": 0.9993714644877436, |
| "grad_norm": 0.7194317579269409, |
| "learning_rate": 7.778145021425114e-05, |
| "loss": 0.4423, |
| "step": 4770 |
| }, |
| { |
| "epoch": 1.0025141420490258, |
| "grad_norm": 0.7814803719520569, |
| "learning_rate": 7.764024287310252e-05, |
| "loss": 0.4194, |
| "step": 4785 |
| }, |
| { |
| "epoch": 1.005656819610308, |
| "grad_norm": 0.8891781568527222, |
| "learning_rate": 7.749871734545652e-05, |
| "loss": 0.3977, |
| "step": 4800 |
| }, |
| { |
| "epoch": 1.0087994971715901, |
| "grad_norm": 0.7444355487823486, |
| "learning_rate": 7.735687526051418e-05, |
| "loss": 0.3924, |
| "step": 4815 |
| }, |
| { |
| "epoch": 1.0119421747328725, |
| "grad_norm": 0.9248786568641663, |
| "learning_rate": 7.721471825112062e-05, |
| "loss": 0.4273, |
| "step": 4830 |
| }, |
| { |
| "epoch": 1.0150848522941547, |
| "grad_norm": 0.6513450741767883, |
| "learning_rate": 7.70722479537463e-05, |
| "loss": 0.3909, |
| "step": 4845 |
| }, |
| { |
| "epoch": 1.0182275298554369, |
| "grad_norm": 0.8597205877304077, |
| "learning_rate": 7.692946600846818e-05, |
| "loss": 0.4027, |
| "step": 4860 |
| }, |
| { |
| "epoch": 1.021370207416719, |
| "grad_norm": 0.9086320996284485, |
| "learning_rate": 7.678637405895076e-05, |
| "loss": 0.4225, |
| "step": 4875 |
| }, |
| { |
| "epoch": 1.0245128849780012, |
| "grad_norm": 0.8219915628433228, |
| "learning_rate": 7.66429737524273e-05, |
| "loss": 0.4055, |
| "step": 4890 |
| }, |
| { |
| "epoch": 1.0276555625392834, |
| "grad_norm": 0.9232605695724487, |
| "learning_rate": 7.649926673968069e-05, |
| "loss": 0.3801, |
| "step": 4905 |
| }, |
| { |
| "epoch": 1.0307982401005658, |
| "grad_norm": 0.8866775035858154, |
| "learning_rate": 7.635525467502462e-05, |
| "loss": 0.3887, |
| "step": 4920 |
| }, |
| { |
| "epoch": 1.033940917661848, |
| "grad_norm": 0.6395006775856018, |
| "learning_rate": 7.62109392162844e-05, |
| "loss": 0.4018, |
| "step": 4935 |
| }, |
| { |
| "epoch": 1.03708359522313, |
| "grad_norm": 0.8276055455207825, |
| "learning_rate": 7.60663220247779e-05, |
| "loss": 0.3875, |
| "step": 4950 |
| }, |
| { |
| "epoch": 1.0402262727844123, |
| "grad_norm": 0.8251763582229614, |
| "learning_rate": 7.592140476529652e-05, |
| "loss": 0.3912, |
| "step": 4965 |
| }, |
| { |
| "epoch": 1.0433689503456944, |
| "grad_norm": 0.8321304321289062, |
| "learning_rate": 7.577618910608591e-05, |
| "loss": 0.4317, |
| "step": 4980 |
| }, |
| { |
| "epoch": 1.0465116279069768, |
| "grad_norm": 0.6474670171737671, |
| "learning_rate": 7.56306767188268e-05, |
| "loss": 0.4594, |
| "step": 4995 |
| }, |
| { |
| "epoch": 1.049654305468259, |
| "grad_norm": 0.6989348530769348, |
| "learning_rate": 7.548486927861582e-05, |
| "loss": 0.3744, |
| "step": 5010 |
| }, |
| { |
| "epoch": 1.0527969830295412, |
| "grad_norm": 0.8184515237808228, |
| "learning_rate": 7.533876846394613e-05, |
| "loss": 0.3364, |
| "step": 5025 |
| }, |
| { |
| "epoch": 1.0559396605908233, |
| "grad_norm": 0.7965102195739746, |
| "learning_rate": 7.519237595668811e-05, |
| "loss": 0.3934, |
| "step": 5040 |
| }, |
| { |
| "epoch": 1.0590823381521055, |
| "grad_norm": 0.731299638748169, |
| "learning_rate": 7.504569344207007e-05, |
| "loss": 0.4161, |
| "step": 5055 |
| }, |
| { |
| "epoch": 1.062225015713388, |
| "grad_norm": 0.9074578881263733, |
| "learning_rate": 7.489872260865877e-05, |
| "loss": 0.4103, |
| "step": 5070 |
| }, |
| { |
| "epoch": 1.06536769327467, |
| "grad_norm": 0.8735909461975098, |
| "learning_rate": 7.475146514834001e-05, |
| "loss": 0.3686, |
| "step": 5085 |
| }, |
| { |
| "epoch": 1.0685103708359522, |
| "grad_norm": 0.7814076542854309, |
| "learning_rate": 7.460392275629918e-05, |
| "loss": 0.3943, |
| "step": 5100 |
| }, |
| { |
| "epoch": 1.0716530483972344, |
| "grad_norm": 0.8307476043701172, |
| "learning_rate": 7.445609713100171e-05, |
| "loss": 0.3999, |
| "step": 5115 |
| }, |
| { |
| "epoch": 1.0747957259585166, |
| "grad_norm": 0.7908287048339844, |
| "learning_rate": 7.430798997417353e-05, |
| "loss": 0.4104, |
| "step": 5130 |
| }, |
| { |
| "epoch": 1.077938403519799, |
| "grad_norm": 0.8598707914352417, |
| "learning_rate": 7.415960299078143e-05, |
| "loss": 0.3976, |
| "step": 5145 |
| }, |
| { |
| "epoch": 1.0810810810810811, |
| "grad_norm": 0.5163241028785706, |
| "learning_rate": 7.40109378890136e-05, |
| "loss": 0.3506, |
| "step": 5160 |
| }, |
| { |
| "epoch": 1.0842237586423633, |
| "grad_norm": 0.8642787933349609, |
| "learning_rate": 7.386199638025973e-05, |
| "loss": 0.31, |
| "step": 5175 |
| }, |
| { |
| "epoch": 1.0873664362036455, |
| "grad_norm": 0.7603743076324463, |
| "learning_rate": 7.371278017909148e-05, |
| "loss": 0.4695, |
| "step": 5190 |
| }, |
| { |
| "epoch": 1.0905091137649277, |
| "grad_norm": 0.7949853539466858, |
| "learning_rate": 7.356329100324273e-05, |
| "loss": 0.4076, |
| "step": 5205 |
| }, |
| { |
| "epoch": 1.0936517913262098, |
| "grad_norm": 0.8560110926628113, |
| "learning_rate": 7.341353057358966e-05, |
| "loss": 0.3833, |
| "step": 5220 |
| }, |
| { |
| "epoch": 1.0967944688874922, |
| "grad_norm": 0.632763147354126, |
| "learning_rate": 7.326350061413114e-05, |
| "loss": 0.4128, |
| "step": 5235 |
| }, |
| { |
| "epoch": 1.0999371464487744, |
| "grad_norm": 0.9416031837463379, |
| "learning_rate": 7.311320285196875e-05, |
| "loss": 0.3665, |
| "step": 5250 |
| }, |
| { |
| "epoch": 1.1030798240100566, |
| "grad_norm": 0.6195524334907532, |
| "learning_rate": 7.296263901728694e-05, |
| "loss": 0.362, |
| "step": 5265 |
| }, |
| { |
| "epoch": 1.1062225015713387, |
| "grad_norm": 0.8545498251914978, |
| "learning_rate": 7.281181084333311e-05, |
| "loss": 0.361, |
| "step": 5280 |
| }, |
| { |
| "epoch": 1.109365179132621, |
| "grad_norm": 0.75226229429245, |
| "learning_rate": 7.26607200663977e-05, |
| "loss": 0.3948, |
| "step": 5295 |
| }, |
| { |
| "epoch": 1.1125078566939033, |
| "grad_norm": 0.877756655216217, |
| "learning_rate": 7.250936842579407e-05, |
| "loss": 0.4061, |
| "step": 5310 |
| }, |
| { |
| "epoch": 1.1156505342551855, |
| "grad_norm": 0.5953283309936523, |
| "learning_rate": 7.235775766383862e-05, |
| "loss": 0.3273, |
| "step": 5325 |
| }, |
| { |
| "epoch": 1.1187932118164676, |
| "grad_norm": 0.8206706643104553, |
| "learning_rate": 7.220588952583071e-05, |
| "loss": 0.3757, |
| "step": 5340 |
| }, |
| { |
| "epoch": 1.1219358893777498, |
| "grad_norm": 0.7466344237327576, |
| "learning_rate": 7.205376576003247e-05, |
| "loss": 0.3892, |
| "step": 5355 |
| }, |
| { |
| "epoch": 1.125078566939032, |
| "grad_norm": 0.8034494519233704, |
| "learning_rate": 7.190138811764882e-05, |
| "loss": 0.4043, |
| "step": 5370 |
| }, |
| { |
| "epoch": 1.1282212445003144, |
| "grad_norm": 0.9050668478012085, |
| "learning_rate": 7.174875835280716e-05, |
| "loss": 0.3812, |
| "step": 5385 |
| }, |
| { |
| "epoch": 1.1313639220615965, |
| "grad_norm": 0.8540876507759094, |
| "learning_rate": 7.159587822253733e-05, |
| "loss": 0.3645, |
| "step": 5400 |
| }, |
| { |
| "epoch": 1.1345065996228787, |
| "grad_norm": 0.7688354849815369, |
| "learning_rate": 7.14427494867512e-05, |
| "loss": 0.3683, |
| "step": 5415 |
| }, |
| { |
| "epoch": 1.1376492771841609, |
| "grad_norm": 0.6950829029083252, |
| "learning_rate": 7.128937390822261e-05, |
| "loss": 0.3347, |
| "step": 5430 |
| }, |
| { |
| "epoch": 1.140791954745443, |
| "grad_norm": 0.8212427496910095, |
| "learning_rate": 7.113575325256694e-05, |
| "loss": 0.3775, |
| "step": 5445 |
| }, |
| { |
| "epoch": 1.1439346323067254, |
| "grad_norm": 0.8312988877296448, |
| "learning_rate": 7.098188928822084e-05, |
| "loss": 0.4325, |
| "step": 5460 |
| }, |
| { |
| "epoch": 1.1470773098680076, |
| "grad_norm": 0.9646623134613037, |
| "learning_rate": 7.082778378642184e-05, |
| "loss": 0.3898, |
| "step": 5475 |
| }, |
| { |
| "epoch": 1.1502199874292898, |
| "grad_norm": 0.8333424925804138, |
| "learning_rate": 7.0673438521188e-05, |
| "loss": 0.4068, |
| "step": 5490 |
| }, |
| { |
| "epoch": 1.153362664990572, |
| "grad_norm": 0.918892502784729, |
| "learning_rate": 7.051885526929747e-05, |
| "loss": 0.3968, |
| "step": 5505 |
| }, |
| { |
| "epoch": 1.156505342551854, |
| "grad_norm": 0.5460782647132874, |
| "learning_rate": 7.0364035810268e-05, |
| "loss": 0.3672, |
| "step": 5520 |
| }, |
| { |
| "epoch": 1.1596480201131363, |
| "grad_norm": 0.876811683177948, |
| "learning_rate": 7.020898192633655e-05, |
| "loss": 0.408, |
| "step": 5535 |
| }, |
| { |
| "epoch": 1.1627906976744187, |
| "grad_norm": 0.6740222573280334, |
| "learning_rate": 7.005369540243864e-05, |
| "loss": 0.2995, |
| "step": 5550 |
| }, |
| { |
| "epoch": 1.1659333752357008, |
| "grad_norm": 0.8702965378761292, |
| "learning_rate": 6.989817802618792e-05, |
| "loss": 0.3307, |
| "step": 5565 |
| }, |
| { |
| "epoch": 1.169076052796983, |
| "grad_norm": 0.8837511539459229, |
| "learning_rate": 6.974243158785554e-05, |
| "loss": 0.3864, |
| "step": 5580 |
| }, |
| { |
| "epoch": 1.1722187303582652, |
| "grad_norm": 0.4050454795360565, |
| "learning_rate": 6.958645788034952e-05, |
| "loss": 0.3525, |
| "step": 5595 |
| }, |
| { |
| "epoch": 1.1753614079195476, |
| "grad_norm": 0.8361005187034607, |
| "learning_rate": 6.943025869919418e-05, |
| "loss": 0.3747, |
| "step": 5610 |
| }, |
| { |
| "epoch": 1.1785040854808297, |
| "grad_norm": 0.841556191444397, |
| "learning_rate": 6.92738358425094e-05, |
| "loss": 0.406, |
| "step": 5625 |
| }, |
| { |
| "epoch": 1.181646763042112, |
| "grad_norm": 0.629443883895874, |
| "learning_rate": 6.911719111098996e-05, |
| "loss": 0.4175, |
| "step": 5640 |
| }, |
| { |
| "epoch": 1.184789440603394, |
| "grad_norm": 0.7146449685096741, |
| "learning_rate": 6.896032630788476e-05, |
| "loss": 0.3511, |
| "step": 5655 |
| }, |
| { |
| "epoch": 1.1879321181646763, |
| "grad_norm": 0.8358393311500549, |
| "learning_rate": 6.880324323897617e-05, |
| "loss": 0.3851, |
| "step": 5670 |
| }, |
| { |
| "epoch": 1.1910747957259584, |
| "grad_norm": 0.742857813835144, |
| "learning_rate": 6.864594371255913e-05, |
| "loss": 0.3821, |
| "step": 5685 |
| }, |
| { |
| "epoch": 1.1942174732872408, |
| "grad_norm": 0.7099196910858154, |
| "learning_rate": 6.848842953942036e-05, |
| "loss": 0.3789, |
| "step": 5700 |
| }, |
| { |
| "epoch": 1.197360150848523, |
| "grad_norm": 0.754542350769043, |
| "learning_rate": 6.83307025328176e-05, |
| "loss": 0.3472, |
| "step": 5715 |
| }, |
| { |
| "epoch": 1.2005028284098052, |
| "grad_norm": 0.7466986775398254, |
| "learning_rate": 6.817276450845856e-05, |
| "loss": 0.3393, |
| "step": 5730 |
| }, |
| { |
| "epoch": 1.2036455059710873, |
| "grad_norm": 0.7026840448379517, |
| "learning_rate": 6.801461728448022e-05, |
| "loss": 0.3891, |
| "step": 5745 |
| }, |
| { |
| "epoch": 1.2067881835323695, |
| "grad_norm": 1.1348669528961182, |
| "learning_rate": 6.785626268142777e-05, |
| "loss": 0.3802, |
| "step": 5760 |
| }, |
| { |
| "epoch": 1.2099308610936519, |
| "grad_norm": 0.7511578798294067, |
| "learning_rate": 6.769770252223369e-05, |
| "loss": 0.4252, |
| "step": 5775 |
| }, |
| { |
| "epoch": 1.213073538654934, |
| "grad_norm": 0.8412914276123047, |
| "learning_rate": 6.753893863219675e-05, |
| "loss": 0.3813, |
| "step": 5790 |
| }, |
| { |
| "epoch": 1.2162162162162162, |
| "grad_norm": 0.8765383958816528, |
| "learning_rate": 6.737997283896103e-05, |
| "loss": 0.3712, |
| "step": 5805 |
| }, |
| { |
| "epoch": 1.2193588937774984, |
| "grad_norm": 0.7843053340911865, |
| "learning_rate": 6.722080697249487e-05, |
| "loss": 0.3776, |
| "step": 5820 |
| }, |
| { |
| "epoch": 1.2225015713387806, |
| "grad_norm": 1.0745536088943481, |
| "learning_rate": 6.706144286506978e-05, |
| "loss": 0.3499, |
| "step": 5835 |
| }, |
| { |
| "epoch": 1.2256442489000627, |
| "grad_norm": 0.7722020745277405, |
| "learning_rate": 6.690188235123934e-05, |
| "loss": 0.4211, |
| "step": 5850 |
| }, |
| { |
| "epoch": 1.2287869264613451, |
| "grad_norm": 0.9631087183952332, |
| "learning_rate": 6.674212726781814e-05, |
| "loss": 0.3772, |
| "step": 5865 |
| }, |
| { |
| "epoch": 1.2319296040226273, |
| "grad_norm": 0.8981698751449585, |
| "learning_rate": 6.65821794538606e-05, |
| "loss": 0.4598, |
| "step": 5880 |
| }, |
| { |
| "epoch": 1.2350722815839095, |
| "grad_norm": 0.778362512588501, |
| "learning_rate": 6.642204075063974e-05, |
| "loss": 0.4179, |
| "step": 5895 |
| }, |
| { |
| "epoch": 1.2382149591451916, |
| "grad_norm": 0.8421118259429932, |
| "learning_rate": 6.626171300162615e-05, |
| "loss": 0.3583, |
| "step": 5910 |
| }, |
| { |
| "epoch": 1.241357636706474, |
| "grad_norm": 1.0227240324020386, |
| "learning_rate": 6.610119805246653e-05, |
| "loss": 0.3919, |
| "step": 5925 |
| }, |
| { |
| "epoch": 1.2445003142677562, |
| "grad_norm": 0.5748106837272644, |
| "learning_rate": 6.594049775096268e-05, |
| "loss": 0.3571, |
| "step": 5940 |
| }, |
| { |
| "epoch": 1.2476429918290384, |
| "grad_norm": 0.6924661993980408, |
| "learning_rate": 6.577961394705008e-05, |
| "loss": 0.3812, |
| "step": 5955 |
| }, |
| { |
| "epoch": 1.2507856693903205, |
| "grad_norm": 0.7702043056488037, |
| "learning_rate": 6.561854849277664e-05, |
| "loss": 0.331, |
| "step": 5970 |
| }, |
| { |
| "epoch": 1.2539283469516027, |
| "grad_norm": 0.6666329503059387, |
| "learning_rate": 6.545730324228136e-05, |
| "loss": 0.3266, |
| "step": 5985 |
| }, |
| { |
| "epoch": 1.2570710245128849, |
| "grad_norm": 0.9120034575462341, |
| "learning_rate": 6.529588005177305e-05, |
| "loss": 0.4188, |
| "step": 6000 |
| }, |
| { |
| "epoch": 1.260213702074167, |
| "grad_norm": 0.7251651287078857, |
| "learning_rate": 6.513428077950886e-05, |
| "loss": 0.4067, |
| "step": 6015 |
| }, |
| { |
| "epoch": 1.2633563796354494, |
| "grad_norm": 0.6845729947090149, |
| "learning_rate": 6.497250728577296e-05, |
| "loss": 0.4266, |
| "step": 6030 |
| }, |
| { |
| "epoch": 1.2664990571967316, |
| "grad_norm": 0.7530787587165833, |
| "learning_rate": 6.481056143285512e-05, |
| "loss": 0.3302, |
| "step": 6045 |
| }, |
| { |
| "epoch": 1.2696417347580138, |
| "grad_norm": 0.7474608421325684, |
| "learning_rate": 6.464844508502927e-05, |
| "loss": 0.4305, |
| "step": 6060 |
| }, |
| { |
| "epoch": 1.2727844123192962, |
| "grad_norm": 0.8672669529914856, |
| "learning_rate": 6.448616010853199e-05, |
| "loss": 0.4267, |
| "step": 6075 |
| }, |
| { |
| "epoch": 1.2759270898805783, |
| "grad_norm": 0.7703887224197388, |
| "learning_rate": 6.432370837154109e-05, |
| "loss": 0.3531, |
| "step": 6090 |
| }, |
| { |
| "epoch": 1.2790697674418605, |
| "grad_norm": 0.7432886958122253, |
| "learning_rate": 6.416109174415406e-05, |
| "loss": 0.3189, |
| "step": 6105 |
| }, |
| { |
| "epoch": 1.2822124450031427, |
| "grad_norm": 0.9600912928581238, |
| "learning_rate": 6.399831209836659e-05, |
| "loss": 0.4036, |
| "step": 6120 |
| }, |
| { |
| "epoch": 1.2853551225644249, |
| "grad_norm": 0.7727882862091064, |
| "learning_rate": 6.383537130805098e-05, |
| "loss": 0.3857, |
| "step": 6135 |
| }, |
| { |
| "epoch": 1.288497800125707, |
| "grad_norm": 0.7628008723258972, |
| "learning_rate": 6.367227124893455e-05, |
| "loss": 0.4229, |
| "step": 6150 |
| }, |
| { |
| "epoch": 1.2916404776869892, |
| "grad_norm": 0.9682219624519348, |
| "learning_rate": 6.350901379857814e-05, |
| "loss": 0.3544, |
| "step": 6165 |
| }, |
| { |
| "epoch": 1.2947831552482716, |
| "grad_norm": 0.7553837895393372, |
| "learning_rate": 6.334560083635434e-05, |
| "loss": 0.3968, |
| "step": 6180 |
| }, |
| { |
| "epoch": 1.2979258328095538, |
| "grad_norm": 0.7951422333717346, |
| "learning_rate": 6.318203424342605e-05, |
| "loss": 0.2946, |
| "step": 6195 |
| }, |
| { |
| "epoch": 1.301068510370836, |
| "grad_norm": 0.9351706504821777, |
| "learning_rate": 6.301831590272465e-05, |
| "loss": 0.4203, |
| "step": 6210 |
| }, |
| { |
| "epoch": 1.304211187932118, |
| "grad_norm": 0.8283166289329529, |
| "learning_rate": 6.28544476989284e-05, |
| "loss": 0.4166, |
| "step": 6225 |
| }, |
| { |
| "epoch": 1.3073538654934005, |
| "grad_norm": 0.7889246940612793, |
| "learning_rate": 6.269043151844081e-05, |
| "loss": 0.4084, |
| "step": 6240 |
| }, |
| { |
| "epoch": 1.3104965430546827, |
| "grad_norm": 0.7893148064613342, |
| "learning_rate": 6.252626924936876e-05, |
| "loss": 0.3327, |
| "step": 6255 |
| }, |
| { |
| "epoch": 1.3136392206159648, |
| "grad_norm": 0.9599968194961548, |
| "learning_rate": 6.236196278150092e-05, |
| "loss": 0.3987, |
| "step": 6270 |
| }, |
| { |
| "epoch": 1.316781898177247, |
| "grad_norm": 0.7326962351799011, |
| "learning_rate": 6.219751400628593e-05, |
| "loss": 0.3872, |
| "step": 6285 |
| }, |
| { |
| "epoch": 1.3199245757385292, |
| "grad_norm": 0.7666275501251221, |
| "learning_rate": 6.203292481681061e-05, |
| "loss": 0.2906, |
| "step": 6300 |
| }, |
| { |
| "epoch": 1.3230672532998113, |
| "grad_norm": 0.7648006081581116, |
| "learning_rate": 6.186819710777819e-05, |
| "loss": 0.4077, |
| "step": 6315 |
| }, |
| { |
| "epoch": 1.3262099308610937, |
| "grad_norm": 0.8993086218833923, |
| "learning_rate": 6.170333277548653e-05, |
| "loss": 0.3334, |
| "step": 6330 |
| }, |
| { |
| "epoch": 1.329352608422376, |
| "grad_norm": 0.8966405987739563, |
| "learning_rate": 6.153833371780622e-05, |
| "loss": 0.3772, |
| "step": 6345 |
| }, |
| { |
| "epoch": 1.332495285983658, |
| "grad_norm": 0.955697774887085, |
| "learning_rate": 6.137320183415877e-05, |
| "loss": 0.3652, |
| "step": 6360 |
| }, |
| { |
| "epoch": 1.3356379635449402, |
| "grad_norm": 0.913931667804718, |
| "learning_rate": 6.120793902549478e-05, |
| "loss": 0.3943, |
| "step": 6375 |
| }, |
| { |
| "epoch": 1.3387806411062226, |
| "grad_norm": 0.471160352230072, |
| "learning_rate": 6.1042547194272e-05, |
| "loss": 0.3656, |
| "step": 6390 |
| }, |
| { |
| "epoch": 1.3419233186675048, |
| "grad_norm": 0.7883521914482117, |
| "learning_rate": 6.0877028244433444e-05, |
| "loss": 0.3494, |
| "step": 6405 |
| }, |
| { |
| "epoch": 1.345065996228787, |
| "grad_norm": 0.8015203475952148, |
| "learning_rate": 6.071138408138547e-05, |
| "loss": 0.3498, |
| "step": 6420 |
| }, |
| { |
| "epoch": 1.3482086737900691, |
| "grad_norm": 0.8431302905082703, |
| "learning_rate": 6.0545616611975886e-05, |
| "loss": 0.3726, |
| "step": 6435 |
| }, |
| { |
| "epoch": 1.3513513513513513, |
| "grad_norm": 0.6410717964172363, |
| "learning_rate": 6.0379727744471936e-05, |
| "loss": 0.3793, |
| "step": 6450 |
| }, |
| { |
| "epoch": 1.3544940289126335, |
| "grad_norm": 0.8410218358039856, |
| "learning_rate": 6.021371938853839e-05, |
| "loss": 0.4294, |
| "step": 6465 |
| }, |
| { |
| "epoch": 1.3576367064739157, |
| "grad_norm": 0.622178852558136, |
| "learning_rate": 6.004759345521552e-05, |
| "loss": 0.3373, |
| "step": 6480 |
| }, |
| { |
| "epoch": 1.360779384035198, |
| "grad_norm": 0.8277848362922668, |
| "learning_rate": 5.988135185689712e-05, |
| "loss": 0.3796, |
| "step": 6495 |
| }, |
| { |
| "epoch": 1.3639220615964802, |
| "grad_norm": 0.799150824546814, |
| "learning_rate": 5.9714996507308465e-05, |
| "loss": 0.3361, |
| "step": 6510 |
| }, |
| { |
| "epoch": 1.3670647391577624, |
| "grad_norm": 0.8518102765083313, |
| "learning_rate": 5.954852932148433e-05, |
| "loss": 0.3913, |
| "step": 6525 |
| }, |
| { |
| "epoch": 1.3702074167190446, |
| "grad_norm": 0.7465687990188599, |
| "learning_rate": 5.9381952215746905e-05, |
| "loss": 0.3546, |
| "step": 6540 |
| }, |
| { |
| "epoch": 1.373350094280327, |
| "grad_norm": 0.7342978119850159, |
| "learning_rate": 5.921526710768376e-05, |
| "loss": 0.3832, |
| "step": 6555 |
| }, |
| { |
| "epoch": 1.3764927718416091, |
| "grad_norm": 0.6754856109619141, |
| "learning_rate": 5.9048475916125723e-05, |
| "loss": 0.4051, |
| "step": 6570 |
| }, |
| { |
| "epoch": 1.3796354494028913, |
| "grad_norm": 0.6392863988876343, |
| "learning_rate": 5.888158056112486e-05, |
| "loss": 0.3828, |
| "step": 6585 |
| }, |
| { |
| "epoch": 1.3827781269641735, |
| "grad_norm": 0.897132933139801, |
| "learning_rate": 5.871458296393231e-05, |
| "loss": 0.405, |
| "step": 6600 |
| }, |
| { |
| "epoch": 1.3859208045254556, |
| "grad_norm": 0.7124328017234802, |
| "learning_rate": 5.854748504697624e-05, |
| "loss": 0.3712, |
| "step": 6615 |
| }, |
| { |
| "epoch": 1.3890634820867378, |
| "grad_norm": 0.8436194062232971, |
| "learning_rate": 5.8380288733839585e-05, |
| "loss": 0.3773, |
| "step": 6630 |
| }, |
| { |
| "epoch": 1.3922061596480202, |
| "grad_norm": 0.780944287776947, |
| "learning_rate": 5.8212995949238083e-05, |
| "loss": 0.3529, |
| "step": 6645 |
| }, |
| { |
| "epoch": 1.3953488372093024, |
| "grad_norm": 1.0335406064987183, |
| "learning_rate": 5.804560861899795e-05, |
| "loss": 0.4262, |
| "step": 6660 |
| }, |
| { |
| "epoch": 1.3984915147705845, |
| "grad_norm": 0.7593971490859985, |
| "learning_rate": 5.7878128670033826e-05, |
| "loss": 0.4079, |
| "step": 6675 |
| }, |
| { |
| "epoch": 1.4016341923318667, |
| "grad_norm": 0.7240027189254761, |
| "learning_rate": 5.7710558030326545e-05, |
| "loss": 0.3835, |
| "step": 6690 |
| }, |
| { |
| "epoch": 1.404776869893149, |
| "grad_norm": 1.530868411064148, |
| "learning_rate": 5.754289862890093e-05, |
| "loss": 0.4294, |
| "step": 6705 |
| }, |
| { |
| "epoch": 1.4079195474544313, |
| "grad_norm": 0.6043078899383545, |
| "learning_rate": 5.7375152395803624e-05, |
| "loss": 0.3343, |
| "step": 6720 |
| }, |
| { |
| "epoch": 1.4110622250157134, |
| "grad_norm": 0.8058659434318542, |
| "learning_rate": 5.720732126208082e-05, |
| "loss": 0.4533, |
| "step": 6735 |
| }, |
| { |
| "epoch": 1.4142049025769956, |
| "grad_norm": 0.7185141444206238, |
| "learning_rate": 5.7039407159756106e-05, |
| "loss": 0.42, |
| "step": 6750 |
| }, |
| { |
| "epoch": 1.4173475801382778, |
| "grad_norm": 1.0086369514465332, |
| "learning_rate": 5.687141202180817e-05, |
| "loss": 0.3701, |
| "step": 6765 |
| }, |
| { |
| "epoch": 1.42049025769956, |
| "grad_norm": 1.0289742946624756, |
| "learning_rate": 5.67033377821485e-05, |
| "loss": 0.4565, |
| "step": 6780 |
| }, |
| { |
| "epoch": 1.4236329352608421, |
| "grad_norm": 1.1389039754867554, |
| "learning_rate": 5.6535186375599266e-05, |
| "loss": 0.3555, |
| "step": 6795 |
| }, |
| { |
| "epoch": 1.4267756128221245, |
| "grad_norm": 0.887610673904419, |
| "learning_rate": 5.636695973787093e-05, |
| "loss": 0.368, |
| "step": 6810 |
| }, |
| { |
| "epoch": 1.4299182903834067, |
| "grad_norm": 0.9625629186630249, |
| "learning_rate": 5.619865980553994e-05, |
| "loss": 0.3962, |
| "step": 6825 |
| }, |
| { |
| "epoch": 1.4330609679446888, |
| "grad_norm": 0.8793766498565674, |
| "learning_rate": 5.6030288516026564e-05, |
| "loss": 0.3979, |
| "step": 6840 |
| }, |
| { |
| "epoch": 1.436203645505971, |
| "grad_norm": 0.7626388669013977, |
| "learning_rate": 5.586184780757251e-05, |
| "loss": 0.345, |
| "step": 6855 |
| }, |
| { |
| "epoch": 1.4393463230672534, |
| "grad_norm": 1.109713077545166, |
| "learning_rate": 5.5693339619218534e-05, |
| "loss": 0.4446, |
| "step": 6870 |
| }, |
| { |
| "epoch": 1.4424890006285356, |
| "grad_norm": 0.9758956432342529, |
| "learning_rate": 5.552476589078231e-05, |
| "loss": 0.401, |
| "step": 6885 |
| }, |
| { |
| "epoch": 1.4456316781898177, |
| "grad_norm": 0.923329770565033, |
| "learning_rate": 5.5356128562835904e-05, |
| "loss": 0.385, |
| "step": 6900 |
| }, |
| { |
| "epoch": 1.4487743557511, |
| "grad_norm": 0.7539265155792236, |
| "learning_rate": 5.518742957668359e-05, |
| "loss": 0.3274, |
| "step": 6915 |
| }, |
| { |
| "epoch": 1.451917033312382, |
| "grad_norm": 0.8187793493270874, |
| "learning_rate": 5.5018670874339386e-05, |
| "loss": 0.3677, |
| "step": 6930 |
| }, |
| { |
| "epoch": 1.4550597108736643, |
| "grad_norm": 0.9522603750228882, |
| "learning_rate": 5.484985439850473e-05, |
| "loss": 0.3319, |
| "step": 6945 |
| }, |
| { |
| "epoch": 1.4582023884349467, |
| "grad_norm": 0.8808611631393433, |
| "learning_rate": 5.468098209254622e-05, |
| "loss": 0.4311, |
| "step": 6960 |
| }, |
| { |
| "epoch": 1.4613450659962288, |
| "grad_norm": 0.6949836611747742, |
| "learning_rate": 5.4512055900473035e-05, |
| "loss": 0.3679, |
| "step": 6975 |
| }, |
| { |
| "epoch": 1.464487743557511, |
| "grad_norm": 0.783545196056366, |
| "learning_rate": 5.434307776691479e-05, |
| "loss": 0.3552, |
| "step": 6990 |
| }, |
| { |
| "epoch": 1.4676304211187932, |
| "grad_norm": 0.8342312574386597, |
| "learning_rate": 5.417404963709894e-05, |
| "loss": 0.3755, |
| "step": 7005 |
| }, |
| { |
| "epoch": 1.4707730986800756, |
| "grad_norm": 0.7615540027618408, |
| "learning_rate": 5.400497345682857e-05, |
| "loss": 0.3605, |
| "step": 7020 |
| }, |
| { |
| "epoch": 1.4739157762413577, |
| "grad_norm": 0.8944594860076904, |
| "learning_rate": 5.3835851172459794e-05, |
| "loss": 0.3948, |
| "step": 7035 |
| }, |
| { |
| "epoch": 1.47705845380264, |
| "grad_norm": 0.8412215113639832, |
| "learning_rate": 5.36666847308796e-05, |
| "loss": 0.3658, |
| "step": 7050 |
| }, |
| { |
| "epoch": 1.480201131363922, |
| "grad_norm": 0.8457724452018738, |
| "learning_rate": 5.34974760794832e-05, |
| "loss": 0.4327, |
| "step": 7065 |
| }, |
| { |
| "epoch": 1.4833438089252042, |
| "grad_norm": 0.7231891751289368, |
| "learning_rate": 5.332822716615172e-05, |
| "loss": 0.3489, |
| "step": 7080 |
| }, |
| { |
| "epoch": 1.4864864864864864, |
| "grad_norm": 0.8975026607513428, |
| "learning_rate": 5.315893993922986e-05, |
| "loss": 0.331, |
| "step": 7095 |
| }, |
| { |
| "epoch": 1.4896291640477686, |
| "grad_norm": 0.871842086315155, |
| "learning_rate": 5.2989616347503244e-05, |
| "loss": 0.4056, |
| "step": 7110 |
| }, |
| { |
| "epoch": 1.492771841609051, |
| "grad_norm": 0.5846161246299744, |
| "learning_rate": 5.282025834017623e-05, |
| "loss": 0.381, |
| "step": 7125 |
| }, |
| { |
| "epoch": 1.4959145191703331, |
| "grad_norm": 0.6650387644767761, |
| "learning_rate": 5.265086786684929e-05, |
| "loss": 0.34, |
| "step": 7140 |
| }, |
| { |
| "epoch": 1.4990571967316153, |
| "grad_norm": 0.862241804599762, |
| "learning_rate": 5.2481446877496665e-05, |
| "loss": 0.354, |
| "step": 7155 |
| }, |
| { |
| "epoch": 1.5021998742928977, |
| "grad_norm": 0.8328828811645508, |
| "learning_rate": 5.231199732244386e-05, |
| "loss": 0.3772, |
| "step": 7170 |
| }, |
| { |
| "epoch": 1.5053425518541799, |
| "grad_norm": 0.5438669323921204, |
| "learning_rate": 5.214252115234527e-05, |
| "loss": 0.3493, |
| "step": 7185 |
| }, |
| { |
| "epoch": 1.508485229415462, |
| "grad_norm": 0.7722681760787964, |
| "learning_rate": 5.197302031816165e-05, |
| "loss": 0.3494, |
| "step": 7200 |
| }, |
| { |
| "epoch": 1.5116279069767442, |
| "grad_norm": 0.9693325161933899, |
| "learning_rate": 5.180349677113762e-05, |
| "loss": 0.3512, |
| "step": 7215 |
| }, |
| { |
| "epoch": 1.5147705845380264, |
| "grad_norm": 1.0208348035812378, |
| "learning_rate": 5.163395246277938e-05, |
| "loss": 0.2772, |
| "step": 7230 |
| }, |
| { |
| "epoch": 1.5179132620993085, |
| "grad_norm": 0.8255509734153748, |
| "learning_rate": 5.1464389344832024e-05, |
| "loss": 0.3491, |
| "step": 7245 |
| }, |
| { |
| "epoch": 1.5210559396605907, |
| "grad_norm": 0.723574697971344, |
| "learning_rate": 5.1294809369257244e-05, |
| "loss": 0.3894, |
| "step": 7260 |
| }, |
| { |
| "epoch": 1.5241986172218729, |
| "grad_norm": 0.8955418467521667, |
| "learning_rate": 5.112521448821076e-05, |
| "loss": 0.3722, |
| "step": 7275 |
| }, |
| { |
| "epoch": 1.5273412947831553, |
| "grad_norm": 0.9446234703063965, |
| "learning_rate": 5.0955606654019895e-05, |
| "loss": 0.3602, |
| "step": 7290 |
| }, |
| { |
| "epoch": 1.5304839723444374, |
| "grad_norm": 0.7256786227226257, |
| "learning_rate": 5.078598781916107e-05, |
| "loss": 0.3488, |
| "step": 7305 |
| }, |
| { |
| "epoch": 1.5336266499057196, |
| "grad_norm": 0.775834858417511, |
| "learning_rate": 5.0616359936237355e-05, |
| "loss": 0.3983, |
| "step": 7320 |
| }, |
| { |
| "epoch": 1.536769327467002, |
| "grad_norm": 0.7684575915336609, |
| "learning_rate": 5.044672495795598e-05, |
| "loss": 0.3992, |
| "step": 7335 |
| }, |
| { |
| "epoch": 1.5399120050282842, |
| "grad_norm": 0.7569010853767395, |
| "learning_rate": 5.0277084837105826e-05, |
| "loss": 0.352, |
| "step": 7350 |
| }, |
| { |
| "epoch": 1.5430546825895664, |
| "grad_norm": 0.7330282926559448, |
| "learning_rate": 5.010744152653501e-05, |
| "loss": 0.3486, |
| "step": 7365 |
| }, |
| { |
| "epoch": 1.5461973601508485, |
| "grad_norm": 0.8921106457710266, |
| "learning_rate": 4.993779697912837e-05, |
| "loss": 0.3107, |
| "step": 7380 |
| }, |
| { |
| "epoch": 1.5493400377121307, |
| "grad_norm": 0.7190592288970947, |
| "learning_rate": 4.976815314778493e-05, |
| "loss": 0.3429, |
| "step": 7395 |
| }, |
| { |
| "epoch": 1.5524827152734129, |
| "grad_norm": 0.8145999312400818, |
| "learning_rate": 4.9598511985395535e-05, |
| "loss": 0.3455, |
| "step": 7410 |
| }, |
| { |
| "epoch": 1.555625392834695, |
| "grad_norm": 0.7628950476646423, |
| "learning_rate": 4.942887544482029e-05, |
| "loss": 0.3362, |
| "step": 7425 |
| }, |
| { |
| "epoch": 1.5587680703959774, |
| "grad_norm": 0.5859194993972778, |
| "learning_rate": 4.925924547886603e-05, |
| "loss": 0.3723, |
| "step": 7440 |
| }, |
| { |
| "epoch": 1.5619107479572596, |
| "grad_norm": 0.7906526327133179, |
| "learning_rate": 4.9089624040264013e-05, |
| "loss": 0.3511, |
| "step": 7455 |
| }, |
| { |
| "epoch": 1.5650534255185418, |
| "grad_norm": 0.7591722011566162, |
| "learning_rate": 4.892001308164727e-05, |
| "loss": 0.4439, |
| "step": 7470 |
| }, |
| { |
| "epoch": 1.5681961030798242, |
| "grad_norm": 0.9237760901451111, |
| "learning_rate": 4.875041455552817e-05, |
| "loss": 0.3638, |
| "step": 7485 |
| }, |
| { |
| "epoch": 1.5713387806411063, |
| "grad_norm": 0.734752893447876, |
| "learning_rate": 4.858083041427599e-05, |
| "loss": 0.4047, |
| "step": 7500 |
| }, |
| { |
| "epoch": 1.5744814582023885, |
| "grad_norm": 0.676703155040741, |
| "learning_rate": 4.8411262610094445e-05, |
| "loss": 0.3566, |
| "step": 7515 |
| }, |
| { |
| "epoch": 1.5776241357636707, |
| "grad_norm": 0.8751126527786255, |
| "learning_rate": 4.824171309499913e-05, |
| "loss": 0.3743, |
| "step": 7530 |
| }, |
| { |
| "epoch": 1.5807668133249528, |
| "grad_norm": 0.6884835958480835, |
| "learning_rate": 4.807218382079511e-05, |
| "loss": 0.3821, |
| "step": 7545 |
| }, |
| { |
| "epoch": 1.583909490886235, |
| "grad_norm": 0.8230961561203003, |
| "learning_rate": 4.790267673905447e-05, |
| "loss": 0.3193, |
| "step": 7560 |
| }, |
| { |
| "epoch": 1.5870521684475172, |
| "grad_norm": 0.8046270608901978, |
| "learning_rate": 4.7733193801093803e-05, |
| "loss": 0.3714, |
| "step": 7575 |
| }, |
| { |
| "epoch": 1.5901948460087993, |
| "grad_norm": 0.895897626876831, |
| "learning_rate": 4.756373695795177e-05, |
| "loss": 0.386, |
| "step": 7590 |
| }, |
| { |
| "epoch": 1.5933375235700817, |
| "grad_norm": 0.8858537077903748, |
| "learning_rate": 4.7394308160366617e-05, |
| "loss": 0.3755, |
| "step": 7605 |
| }, |
| { |
| "epoch": 1.596480201131364, |
| "grad_norm": 0.6874979138374329, |
| "learning_rate": 4.722490935875377e-05, |
| "loss": 0.3547, |
| "step": 7620 |
| }, |
| { |
| "epoch": 1.5996228786926463, |
| "grad_norm": 0.8027022480964661, |
| "learning_rate": 4.705554250318335e-05, |
| "loss": 0.3702, |
| "step": 7635 |
| }, |
| { |
| "epoch": 1.6027655562539285, |
| "grad_norm": 0.9383290410041809, |
| "learning_rate": 4.688620954335766e-05, |
| "loss": 0.4038, |
| "step": 7650 |
| }, |
| { |
| "epoch": 1.6059082338152106, |
| "grad_norm": 0.8475779294967651, |
| "learning_rate": 4.671691242858891e-05, |
| "loss": 0.3257, |
| "step": 7665 |
| }, |
| { |
| "epoch": 1.6090509113764928, |
| "grad_norm": 0.702893853187561, |
| "learning_rate": 4.654765310777659e-05, |
| "loss": 0.3642, |
| "step": 7680 |
| }, |
| { |
| "epoch": 1.612193588937775, |
| "grad_norm": 0.7762289047241211, |
| "learning_rate": 4.6378433529385157e-05, |
| "loss": 0.3859, |
| "step": 7695 |
| }, |
| { |
| "epoch": 1.6153362664990571, |
| "grad_norm": 0.7309826016426086, |
| "learning_rate": 4.620925564142151e-05, |
| "loss": 0.3427, |
| "step": 7710 |
| }, |
| { |
| "epoch": 1.6184789440603393, |
| "grad_norm": 0.655974805355072, |
| "learning_rate": 4.60401213914127e-05, |
| "loss": 0.3893, |
| "step": 7725 |
| }, |
| { |
| "epoch": 1.6216216216216215, |
| "grad_norm": 0.7434260845184326, |
| "learning_rate": 4.5871032726383386e-05, |
| "loss": 0.3528, |
| "step": 7740 |
| }, |
| { |
| "epoch": 1.6247642991829039, |
| "grad_norm": 0.981696605682373, |
| "learning_rate": 4.570199159283345e-05, |
| "loss": 0.3792, |
| "step": 7755 |
| }, |
| { |
| "epoch": 1.627906976744186, |
| "grad_norm": 0.5884058475494385, |
| "learning_rate": 4.553299993671567e-05, |
| "loss": 0.3082, |
| "step": 7770 |
| }, |
| { |
| "epoch": 1.6310496543054682, |
| "grad_norm": 0.9349349737167358, |
| "learning_rate": 4.536405970341317e-05, |
| "loss": 0.3736, |
| "step": 7785 |
| }, |
| { |
| "epoch": 1.6341923318667506, |
| "grad_norm": 0.8422302603721619, |
| "learning_rate": 4.519517283771717e-05, |
| "loss": 0.3897, |
| "step": 7800 |
| }, |
| { |
| "epoch": 1.6373350094280328, |
| "grad_norm": 0.7569222450256348, |
| "learning_rate": 4.502634128380448e-05, |
| "loss": 0.3581, |
| "step": 7815 |
| }, |
| { |
| "epoch": 1.640477686989315, |
| "grad_norm": 0.8034069538116455, |
| "learning_rate": 4.4857566985215276e-05, |
| "loss": 0.3542, |
| "step": 7830 |
| }, |
| { |
| "epoch": 1.6436203645505971, |
| "grad_norm": 0.5547857284545898, |
| "learning_rate": 4.4688851884830516e-05, |
| "loss": 0.3089, |
| "step": 7845 |
| }, |
| { |
| "epoch": 1.6467630421118793, |
| "grad_norm": 0.8145669102668762, |
| "learning_rate": 4.452019792484975e-05, |
| "loss": 0.3391, |
| "step": 7860 |
| }, |
| { |
| "epoch": 1.6499057196731615, |
| "grad_norm": 0.672332227230072, |
| "learning_rate": 4.4351607046768704e-05, |
| "loss": 0.3866, |
| "step": 7875 |
| }, |
| { |
| "epoch": 1.6530483972344436, |
| "grad_norm": 0.7952318787574768, |
| "learning_rate": 4.418308119135686e-05, |
| "loss": 0.4221, |
| "step": 7890 |
| }, |
| { |
| "epoch": 1.6561910747957258, |
| "grad_norm": 0.7489158511161804, |
| "learning_rate": 4.401462229863526e-05, |
| "loss": 0.3687, |
| "step": 7905 |
| }, |
| { |
| "epoch": 1.6593337523570082, |
| "grad_norm": 0.8457122445106506, |
| "learning_rate": 4.3846232307854e-05, |
| "loss": 0.3888, |
| "step": 7920 |
| }, |
| { |
| "epoch": 1.6624764299182904, |
| "grad_norm": 0.7040199637413025, |
| "learning_rate": 4.36779131574701e-05, |
| "loss": 0.3437, |
| "step": 7935 |
| }, |
| { |
| "epoch": 1.6656191074795728, |
| "grad_norm": 1.0369516611099243, |
| "learning_rate": 4.3509666785125005e-05, |
| "loss": 0.3557, |
| "step": 7950 |
| }, |
| { |
| "epoch": 1.668761785040855, |
| "grad_norm": 0.7418217062950134, |
| "learning_rate": 4.334149512762238e-05, |
| "loss": 0.351, |
| "step": 7965 |
| }, |
| { |
| "epoch": 1.671904462602137, |
| "grad_norm": 0.6527841687202454, |
| "learning_rate": 4.3173400120905824e-05, |
| "loss": 0.3286, |
| "step": 7980 |
| }, |
| { |
| "epoch": 1.6750471401634193, |
| "grad_norm": 0.9062017798423767, |
| "learning_rate": 4.3005383700036525e-05, |
| "loss": 0.3828, |
| "step": 7995 |
| }, |
| { |
| "epoch": 1.6781898177247014, |
| "grad_norm": 0.6981047987937927, |
| "learning_rate": 4.283744779917102e-05, |
| "loss": 0.3689, |
| "step": 8010 |
| }, |
| { |
| "epoch": 1.6813324952859836, |
| "grad_norm": 0.8865767121315002, |
| "learning_rate": 4.26695943515389e-05, |
| "loss": 0.3912, |
| "step": 8025 |
| }, |
| { |
| "epoch": 1.6844751728472658, |
| "grad_norm": 0.5835604667663574, |
| "learning_rate": 4.250182528942065e-05, |
| "loss": 0.317, |
| "step": 8040 |
| }, |
| { |
| "epoch": 1.687617850408548, |
| "grad_norm": 0.869529128074646, |
| "learning_rate": 4.233414254412525e-05, |
| "loss": 0.4031, |
| "step": 8055 |
| }, |
| { |
| "epoch": 1.6907605279698303, |
| "grad_norm": 0.7666299939155579, |
| "learning_rate": 4.216654804596808e-05, |
| "loss": 0.3635, |
| "step": 8070 |
| }, |
| { |
| "epoch": 1.6939032055311125, |
| "grad_norm": 0.6868289709091187, |
| "learning_rate": 4.199904372424858e-05, |
| "loss": 0.3554, |
| "step": 8085 |
| }, |
| { |
| "epoch": 1.6970458830923947, |
| "grad_norm": 0.7406291961669922, |
| "learning_rate": 4.183163150722822e-05, |
| "loss": 0.3216, |
| "step": 8100 |
| }, |
| { |
| "epoch": 1.700188560653677, |
| "grad_norm": 0.7962248921394348, |
| "learning_rate": 4.166431332210807e-05, |
| "loss": 0.3398, |
| "step": 8115 |
| }, |
| { |
| "epoch": 1.7033312382149592, |
| "grad_norm": 1.02495276927948, |
| "learning_rate": 4.149709109500678e-05, |
| "loss": 0.3817, |
| "step": 8130 |
| }, |
| { |
| "epoch": 1.7064739157762414, |
| "grad_norm": 0.7741113305091858, |
| "learning_rate": 4.13299667509384e-05, |
| "loss": 0.4072, |
| "step": 8145 |
| }, |
| { |
| "epoch": 1.7096165933375236, |
| "grad_norm": 0.7952526807785034, |
| "learning_rate": 4.1162942213790086e-05, |
| "loss": 0.3441, |
| "step": 8160 |
| }, |
| { |
| "epoch": 1.7127592708988058, |
| "grad_norm": 0.7849689722061157, |
| "learning_rate": 4.0996019406300126e-05, |
| "loss": 0.3417, |
| "step": 8175 |
| }, |
| { |
| "epoch": 1.715901948460088, |
| "grad_norm": 0.7431788444519043, |
| "learning_rate": 4.082920025003567e-05, |
| "loss": 0.3995, |
| "step": 8190 |
| }, |
| { |
| "epoch": 1.71904462602137, |
| "grad_norm": 0.7709872126579285, |
| "learning_rate": 4.0662486665370734e-05, |
| "loss": 0.4069, |
| "step": 8205 |
| }, |
| { |
| "epoch": 1.7221873035826523, |
| "grad_norm": 0.6013693809509277, |
| "learning_rate": 4.049588057146394e-05, |
| "loss": 0.3877, |
| "step": 8220 |
| }, |
| { |
| "epoch": 1.7253299811439347, |
| "grad_norm": 0.7985032796859741, |
| "learning_rate": 4.032938388623657e-05, |
| "loss": 0.3407, |
| "step": 8235 |
| }, |
| { |
| "epoch": 1.7284726587052168, |
| "grad_norm": 0.6259362101554871, |
| "learning_rate": 4.01629985263504e-05, |
| "loss": 0.3167, |
| "step": 8250 |
| }, |
| { |
| "epoch": 1.7316153362664992, |
| "grad_norm": 0.7632457613945007, |
| "learning_rate": 3.999672640718567e-05, |
| "loss": 0.365, |
| "step": 8265 |
| }, |
| { |
| "epoch": 1.7347580138277814, |
| "grad_norm": 0.9532593488693237, |
| "learning_rate": 3.983056944281901e-05, |
| "loss": 0.427, |
| "step": 8280 |
| }, |
| { |
| "epoch": 1.7379006913890636, |
| "grad_norm": 0.7168596386909485, |
| "learning_rate": 3.966452954600142e-05, |
| "loss": 0.3776, |
| "step": 8295 |
| }, |
| { |
| "epoch": 1.7410433689503457, |
| "grad_norm": 0.753966748714447, |
| "learning_rate": 3.94986086281363e-05, |
| "loss": 0.3792, |
| "step": 8310 |
| }, |
| { |
| "epoch": 1.744186046511628, |
| "grad_norm": 0.38063740730285645, |
| "learning_rate": 3.933280859925734e-05, |
| "loss": 0.3499, |
| "step": 8325 |
| }, |
| { |
| "epoch": 1.74732872407291, |
| "grad_norm": 0.8001086711883545, |
| "learning_rate": 3.916713136800659e-05, |
| "loss": 0.3491, |
| "step": 8340 |
| }, |
| { |
| "epoch": 1.7504714016341922, |
| "grad_norm": 0.7394033074378967, |
| "learning_rate": 3.900157884161255e-05, |
| "loss": 0.3383, |
| "step": 8355 |
| }, |
| { |
| "epoch": 1.7536140791954744, |
| "grad_norm": 0.7337818741798401, |
| "learning_rate": 3.8836152925868114e-05, |
| "loss": 0.3705, |
| "step": 8370 |
| }, |
| { |
| "epoch": 1.7567567567567568, |
| "grad_norm": 0.7671971917152405, |
| "learning_rate": 3.867085552510864e-05, |
| "loss": 0.3125, |
| "step": 8385 |
| }, |
| { |
| "epoch": 1.759899434318039, |
| "grad_norm": 0.8018542528152466, |
| "learning_rate": 3.850568854219011e-05, |
| "loss": 0.3678, |
| "step": 8400 |
| }, |
| { |
| "epoch": 1.7630421118793211, |
| "grad_norm": 0.8364083766937256, |
| "learning_rate": 3.834065387846718e-05, |
| "loss": 0.4179, |
| "step": 8415 |
| }, |
| { |
| "epoch": 1.7661847894406035, |
| "grad_norm": 0.8526837825775146, |
| "learning_rate": 3.817575343377122e-05, |
| "loss": 0.3881, |
| "step": 8430 |
| }, |
| { |
| "epoch": 1.7693274670018857, |
| "grad_norm": 0.6416676640510559, |
| "learning_rate": 3.8010989106388554e-05, |
| "loss": 0.3099, |
| "step": 8445 |
| }, |
| { |
| "epoch": 1.7724701445631679, |
| "grad_norm": 0.7990739941596985, |
| "learning_rate": 3.784636279303858e-05, |
| "loss": 0.3598, |
| "step": 8460 |
| }, |
| { |
| "epoch": 1.77561282212445, |
| "grad_norm": 0.8872657418251038, |
| "learning_rate": 3.76818763888519e-05, |
| "loss": 0.3882, |
| "step": 8475 |
| }, |
| { |
| "epoch": 1.7787554996857322, |
| "grad_norm": 0.8712546229362488, |
| "learning_rate": 3.7517531787348484e-05, |
| "loss": 0.3773, |
| "step": 8490 |
| }, |
| { |
| "epoch": 1.7818981772470144, |
| "grad_norm": 0.7423908710479736, |
| "learning_rate": 3.735333088041596e-05, |
| "loss": 0.3777, |
| "step": 8505 |
| }, |
| { |
| "epoch": 1.7850408548082966, |
| "grad_norm": 0.9166727066040039, |
| "learning_rate": 3.718927555828779e-05, |
| "loss": 0.4059, |
| "step": 8520 |
| }, |
| { |
| "epoch": 1.7881835323695787, |
| "grad_norm": 0.7207896113395691, |
| "learning_rate": 3.702536770952148e-05, |
| "loss": 0.3754, |
| "step": 8535 |
| }, |
| { |
| "epoch": 1.7913262099308611, |
| "grad_norm": 0.844727635383606, |
| "learning_rate": 3.6861609220976846e-05, |
| "loss": 0.3328, |
| "step": 8550 |
| }, |
| { |
| "epoch": 1.7944688874921433, |
| "grad_norm": 0.7674320340156555, |
| "learning_rate": 3.6698001977794366e-05, |
| "loss": 0.3806, |
| "step": 8565 |
| }, |
| { |
| "epoch": 1.7976115650534257, |
| "grad_norm": 0.6307094693183899, |
| "learning_rate": 3.6534547863373394e-05, |
| "loss": 0.3694, |
| "step": 8580 |
| }, |
| { |
| "epoch": 1.8007542426147078, |
| "grad_norm": 0.767432451248169, |
| "learning_rate": 3.63712487593505e-05, |
| "loss": 0.4028, |
| "step": 8595 |
| }, |
| { |
| "epoch": 1.80389692017599, |
| "grad_norm": 0.8937990665435791, |
| "learning_rate": 3.6208106545577824e-05, |
| "loss": 0.3372, |
| "step": 8610 |
| }, |
| { |
| "epoch": 1.8070395977372722, |
| "grad_norm": 0.590930163860321, |
| "learning_rate": 3.604512310010146e-05, |
| "loss": 0.3684, |
| "step": 8625 |
| }, |
| { |
| "epoch": 1.8101822752985544, |
| "grad_norm": 0.8184636831283569, |
| "learning_rate": 3.58823002991398e-05, |
| "loss": 0.373, |
| "step": 8640 |
| }, |
| { |
| "epoch": 1.8133249528598365, |
| "grad_norm": 0.9741955399513245, |
| "learning_rate": 3.5719640017061885e-05, |
| "loss": 0.3374, |
| "step": 8655 |
| }, |
| { |
| "epoch": 1.8164676304211187, |
| "grad_norm": 1.0014973878860474, |
| "learning_rate": 3.555714412636595e-05, |
| "loss": 0.3848, |
| "step": 8670 |
| }, |
| { |
| "epoch": 1.8196103079824009, |
| "grad_norm": 0.6335365772247314, |
| "learning_rate": 3.53948144976578e-05, |
| "loss": 0.3689, |
| "step": 8685 |
| }, |
| { |
| "epoch": 1.8227529855436833, |
| "grad_norm": 0.5687909722328186, |
| "learning_rate": 3.523265299962924e-05, |
| "loss": 0.4178, |
| "step": 8700 |
| }, |
| { |
| "epoch": 1.8258956631049654, |
| "grad_norm": 0.8622750043869019, |
| "learning_rate": 3.507066149903662e-05, |
| "loss": 0.3899, |
| "step": 8715 |
| }, |
| { |
| "epoch": 1.8290383406662476, |
| "grad_norm": 0.7984293699264526, |
| "learning_rate": 3.490884186067935e-05, |
| "loss": 0.4353, |
| "step": 8730 |
| }, |
| { |
| "epoch": 1.83218101822753, |
| "grad_norm": 0.7962972521781921, |
| "learning_rate": 3.474719594737842e-05, |
| "loss": 0.3324, |
| "step": 8745 |
| }, |
| { |
| "epoch": 1.8353236957888122, |
| "grad_norm": 0.7194257974624634, |
| "learning_rate": 3.4585725619954864e-05, |
| "loss": 0.3765, |
| "step": 8760 |
| }, |
| { |
| "epoch": 1.8384663733500943, |
| "grad_norm": 0.6931387782096863, |
| "learning_rate": 3.442443273720853e-05, |
| "loss": 0.3183, |
| "step": 8775 |
| }, |
| { |
| "epoch": 1.8416090509113765, |
| "grad_norm": 0.7540430426597595, |
| "learning_rate": 3.426331915589651e-05, |
| "loss": 0.3975, |
| "step": 8790 |
| }, |
| { |
| "epoch": 1.8447517284726587, |
| "grad_norm": 0.7310993671417236, |
| "learning_rate": 3.410238673071185e-05, |
| "loss": 0.3975, |
| "step": 8805 |
| }, |
| { |
| "epoch": 1.8478944060339408, |
| "grad_norm": 0.7351768612861633, |
| "learning_rate": 3.394163731426216e-05, |
| "loss": 0.3558, |
| "step": 8820 |
| }, |
| { |
| "epoch": 1.851037083595223, |
| "grad_norm": 0.7860934138298035, |
| "learning_rate": 3.378107275704834e-05, |
| "loss": 0.3601, |
| "step": 8835 |
| }, |
| { |
| "epoch": 1.8541797611565052, |
| "grad_norm": 0.6049594283103943, |
| "learning_rate": 3.362069490744322e-05, |
| "loss": 0.3692, |
| "step": 8850 |
| }, |
| { |
| "epoch": 1.8573224387177876, |
| "grad_norm": 0.9184178709983826, |
| "learning_rate": 3.346050561167029e-05, |
| "loss": 0.3518, |
| "step": 8865 |
| }, |
| { |
| "epoch": 1.8604651162790697, |
| "grad_norm": 0.7558075189590454, |
| "learning_rate": 3.3300506713782495e-05, |
| "loss": 0.3587, |
| "step": 8880 |
| }, |
| { |
| "epoch": 1.8636077938403521, |
| "grad_norm": 0.7545658349990845, |
| "learning_rate": 3.314070005564097e-05, |
| "loss": 0.3679, |
| "step": 8895 |
| }, |
| { |
| "epoch": 1.8667504714016343, |
| "grad_norm": 0.9135695695877075, |
| "learning_rate": 3.2981087476893853e-05, |
| "loss": 0.3725, |
| "step": 8910 |
| }, |
| { |
| "epoch": 1.8698931489629165, |
| "grad_norm": 0.9788998961448669, |
| "learning_rate": 3.2821670814955026e-05, |
| "loss": 0.3149, |
| "step": 8925 |
| }, |
| { |
| "epoch": 1.8730358265241986, |
| "grad_norm": 0.7953155636787415, |
| "learning_rate": 3.266245190498311e-05, |
| "loss": 0.3461, |
| "step": 8940 |
| }, |
| { |
| "epoch": 1.8761785040854808, |
| "grad_norm": 0.9166163802146912, |
| "learning_rate": 3.250343257986027e-05, |
| "loss": 0.3866, |
| "step": 8955 |
| }, |
| { |
| "epoch": 1.879321181646763, |
| "grad_norm": 0.9379754066467285, |
| "learning_rate": 3.2344614670171025e-05, |
| "loss": 0.3928, |
| "step": 8970 |
| }, |
| { |
| "epoch": 1.8824638592080452, |
| "grad_norm": 0.8782539963722229, |
| "learning_rate": 3.2186000004181314e-05, |
| "loss": 0.3959, |
| "step": 8985 |
| }, |
| { |
| "epoch": 1.8856065367693273, |
| "grad_norm": 0.7237117886543274, |
| "learning_rate": 3.2027590407817407e-05, |
| "loss": 0.3458, |
| "step": 9000 |
| }, |
| { |
| "epoch": 1.8887492143306097, |
| "grad_norm": 0.8787809014320374, |
| "learning_rate": 3.186938770464486e-05, |
| "loss": 0.4081, |
| "step": 9015 |
| }, |
| { |
| "epoch": 1.8918918918918919, |
| "grad_norm": 0.7628602981567383, |
| "learning_rate": 3.1711393715847476e-05, |
| "loss": 0.3928, |
| "step": 9030 |
| }, |
| { |
| "epoch": 1.895034569453174, |
| "grad_norm": 0.9172194600105286, |
| "learning_rate": 3.15536102602065e-05, |
| "loss": 0.3777, |
| "step": 9045 |
| }, |
| { |
| "epoch": 1.8981772470144564, |
| "grad_norm": 0.8413445353507996, |
| "learning_rate": 3.13960391540795e-05, |
| "loss": 0.36, |
| "step": 9060 |
| }, |
| { |
| "epoch": 1.9013199245757386, |
| "grad_norm": 0.9793257117271423, |
| "learning_rate": 3.1238682211379586e-05, |
| "loss": 0.3801, |
| "step": 9075 |
| }, |
| { |
| "epoch": 1.9044626021370208, |
| "grad_norm": 0.7620652318000793, |
| "learning_rate": 3.1081541243554427e-05, |
| "loss": 0.3689, |
| "step": 9090 |
| }, |
| { |
| "epoch": 1.907605279698303, |
| "grad_norm": 0.8353012800216675, |
| "learning_rate": 3.092461805956551e-05, |
| "loss": 0.3961, |
| "step": 9105 |
| }, |
| { |
| "epoch": 1.9107479572595851, |
| "grad_norm": 0.8704758882522583, |
| "learning_rate": 3.0767914465867246e-05, |
| "loss": 0.3168, |
| "step": 9120 |
| }, |
| { |
| "epoch": 1.9138906348208673, |
| "grad_norm": 0.6754759550094604, |
| "learning_rate": 3.061143226638611e-05, |
| "loss": 0.3407, |
| "step": 9135 |
| }, |
| { |
| "epoch": 1.9170333123821495, |
| "grad_norm": 0.9682889580726624, |
| "learning_rate": 3.0455173262500093e-05, |
| "loss": 0.4251, |
| "step": 9150 |
| }, |
| { |
| "epoch": 1.9201759899434316, |
| "grad_norm": 0.8114556670188904, |
| "learning_rate": 3.0299139253017695e-05, |
| "loss": 0.3397, |
| "step": 9165 |
| }, |
| { |
| "epoch": 1.923318667504714, |
| "grad_norm": 0.8123522996902466, |
| "learning_rate": 3.014333203415741e-05, |
| "loss": 0.3372, |
| "step": 9180 |
| }, |
| { |
| "epoch": 1.9264613450659962, |
| "grad_norm": 0.6080268025398254, |
| "learning_rate": 2.9987753399526934e-05, |
| "loss": 0.3506, |
| "step": 9195 |
| }, |
| { |
| "epoch": 1.9296040226272786, |
| "grad_norm": 0.8804168701171875, |
| "learning_rate": 2.9832405140102637e-05, |
| "loss": 0.3689, |
| "step": 9210 |
| }, |
| { |
| "epoch": 1.9327467001885608, |
| "grad_norm": 0.8579033613204956, |
| "learning_rate": 2.9677289044208833e-05, |
| "loss": 0.3875, |
| "step": 9225 |
| }, |
| { |
| "epoch": 1.935889377749843, |
| "grad_norm": 0.9520317316055298, |
| "learning_rate": 2.952240689749722e-05, |
| "loss": 0.422, |
| "step": 9240 |
| }, |
| { |
| "epoch": 1.939032055311125, |
| "grad_norm": 0.9517824053764343, |
| "learning_rate": 2.9367760482926393e-05, |
| "loss": 0.3917, |
| "step": 9255 |
| }, |
| { |
| "epoch": 1.9421747328724073, |
| "grad_norm": 0.8813058733940125, |
| "learning_rate": 2.921335158074122e-05, |
| "loss": 0.3551, |
| "step": 9270 |
| }, |
| { |
| "epoch": 1.9453174104336894, |
| "grad_norm": 0.8402652144432068, |
| "learning_rate": 2.905918196845242e-05, |
| "loss": 0.3468, |
| "step": 9285 |
| }, |
| { |
| "epoch": 1.9484600879949716, |
| "grad_norm": 0.855032205581665, |
| "learning_rate": 2.8905253420816035e-05, |
| "loss": 0.3534, |
| "step": 9300 |
| }, |
| { |
| "epoch": 1.9516027655562538, |
| "grad_norm": 0.7760915756225586, |
| "learning_rate": 2.875156770981311e-05, |
| "loss": 0.348, |
| "step": 9315 |
| }, |
| { |
| "epoch": 1.9547454431175362, |
| "grad_norm": 0.946934163570404, |
| "learning_rate": 2.8598126604629195e-05, |
| "loss": 0.3556, |
| "step": 9330 |
| }, |
| { |
| "epoch": 1.9578881206788183, |
| "grad_norm": 0.7589976191520691, |
| "learning_rate": 2.844493187163395e-05, |
| "loss": 0.3944, |
| "step": 9345 |
| }, |
| { |
| "epoch": 1.9610307982401005, |
| "grad_norm": 0.8831868171691895, |
| "learning_rate": 2.8291985274360983e-05, |
| "loss": 0.3192, |
| "step": 9360 |
| }, |
| { |
| "epoch": 1.964173475801383, |
| "grad_norm": 0.8260477781295776, |
| "learning_rate": 2.8139288573487337e-05, |
| "loss": 0.3476, |
| "step": 9375 |
| }, |
| { |
| "epoch": 1.967316153362665, |
| "grad_norm": 0.9583712816238403, |
| "learning_rate": 2.7986843526813343e-05, |
| "loss": 0.3112, |
| "step": 9390 |
| }, |
| { |
| "epoch": 1.9704588309239472, |
| "grad_norm": 0.8534590005874634, |
| "learning_rate": 2.783465188924239e-05, |
| "loss": 0.3738, |
| "step": 9405 |
| }, |
| { |
| "epoch": 1.9736015084852294, |
| "grad_norm": 0.8562766909599304, |
| "learning_rate": 2.7682715412760696e-05, |
| "loss": 0.3831, |
| "step": 9420 |
| }, |
| { |
| "epoch": 1.9767441860465116, |
| "grad_norm": 0.649868905544281, |
| "learning_rate": 2.7531035846417107e-05, |
| "loss": 0.379, |
| "step": 9435 |
| }, |
| { |
| "epoch": 1.9798868636077938, |
| "grad_norm": 0.7702896595001221, |
| "learning_rate": 2.7379614936302982e-05, |
| "loss": 0.3617, |
| "step": 9450 |
| }, |
| { |
| "epoch": 1.983029541169076, |
| "grad_norm": 0.9378584623336792, |
| "learning_rate": 2.7228454425532157e-05, |
| "loss": 0.3681, |
| "step": 9465 |
| }, |
| { |
| "epoch": 1.9861722187303583, |
| "grad_norm": 1.0069222450256348, |
| "learning_rate": 2.7077556054220804e-05, |
| "loss": 0.3356, |
| "step": 9480 |
| }, |
| { |
| "epoch": 1.9893148962916405, |
| "grad_norm": 0.9345496892929077, |
| "learning_rate": 2.6926921559467412e-05, |
| "loss": 0.3974, |
| "step": 9495 |
| }, |
| { |
| "epoch": 1.9924575738529227, |
| "grad_norm": 0.8090453147888184, |
| "learning_rate": 2.6776552675332768e-05, |
| "loss": 0.3397, |
| "step": 9510 |
| }, |
| { |
| "epoch": 1.995600251414205, |
| "grad_norm": 0.647416353225708, |
| "learning_rate": 2.6626451132820085e-05, |
| "loss": 0.3259, |
| "step": 9525 |
| }, |
| { |
| "epoch": 1.9987429289754872, |
| "grad_norm": 0.7810280323028564, |
| "learning_rate": 2.6476618659855023e-05, |
| "loss": 0.3234, |
| "step": 9540 |
| }, |
| { |
| "epoch": 2.0018856065367694, |
| "grad_norm": 0.7231355309486389, |
| "learning_rate": 2.6327056981265708e-05, |
| "loss": 0.3276, |
| "step": 9555 |
| }, |
| { |
| "epoch": 2.0050282840980516, |
| "grad_norm": 0.7072864174842834, |
| "learning_rate": 2.6177767818763062e-05, |
| "loss": 0.2683, |
| "step": 9570 |
| }, |
| { |
| "epoch": 2.0081709616593337, |
| "grad_norm": 0.8502817749977112, |
| "learning_rate": 2.6028752890920783e-05, |
| "loss": 0.2844, |
| "step": 9585 |
| }, |
| { |
| "epoch": 2.011313639220616, |
| "grad_norm": 0.6001257300376892, |
| "learning_rate": 2.5880013913155743e-05, |
| "loss": 0.2582, |
| "step": 9600 |
| }, |
| { |
| "epoch": 2.014456316781898, |
| "grad_norm": 1.037467360496521, |
| "learning_rate": 2.5731552597708086e-05, |
| "loss": 0.2666, |
| "step": 9615 |
| }, |
| { |
| "epoch": 2.0175989943431802, |
| "grad_norm": 0.990047812461853, |
| "learning_rate": 2.5583370653621652e-05, |
| "loss": 0.3042, |
| "step": 9630 |
| }, |
| { |
| "epoch": 2.0207416719044624, |
| "grad_norm": 1.0518317222595215, |
| "learning_rate": 2.5435469786724204e-05, |
| "loss": 0.2543, |
| "step": 9645 |
| }, |
| { |
| "epoch": 2.023884349465745, |
| "grad_norm": 1.225774884223938, |
| "learning_rate": 2.528785169960779e-05, |
| "loss": 0.3183, |
| "step": 9660 |
| }, |
| { |
| "epoch": 2.027027027027027, |
| "grad_norm": 0.9525572061538696, |
| "learning_rate": 2.5140518091609256e-05, |
| "loss": 0.3426, |
| "step": 9675 |
| }, |
| { |
| "epoch": 2.0301697045883094, |
| "grad_norm": 1.0750566720962524, |
| "learning_rate": 2.4993470658790573e-05, |
| "loss": 0.3172, |
| "step": 9690 |
| }, |
| { |
| "epoch": 2.0333123821495915, |
| "grad_norm": 0.8268773555755615, |
| "learning_rate": 2.484671109391933e-05, |
| "loss": 0.31, |
| "step": 9705 |
| }, |
| { |
| "epoch": 2.0364550597108737, |
| "grad_norm": 0.679678201675415, |
| "learning_rate": 2.470024108644925e-05, |
| "loss": 0.2868, |
| "step": 9720 |
| }, |
| { |
| "epoch": 2.039597737272156, |
| "grad_norm": 0.997440755367279, |
| "learning_rate": 2.4554062322500797e-05, |
| "loss": 0.3291, |
| "step": 9735 |
| }, |
| { |
| "epoch": 2.042740414833438, |
| "grad_norm": 0.9968817830085754, |
| "learning_rate": 2.4408176484841732e-05, |
| "loss": 0.2664, |
| "step": 9750 |
| }, |
| { |
| "epoch": 2.04588309239472, |
| "grad_norm": 1.0939124822616577, |
| "learning_rate": 2.4262585252867686e-05, |
| "loss": 0.2895, |
| "step": 9765 |
| }, |
| { |
| "epoch": 2.0490257699560024, |
| "grad_norm": 1.0220900774002075, |
| "learning_rate": 2.4117290302582872e-05, |
| "loss": 0.3191, |
| "step": 9780 |
| }, |
| { |
| "epoch": 2.0521684475172846, |
| "grad_norm": 0.635898768901825, |
| "learning_rate": 2.397229330658084e-05, |
| "loss": 0.307, |
| "step": 9795 |
| }, |
| { |
| "epoch": 2.0553111250785667, |
| "grad_norm": 1.112257719039917, |
| "learning_rate": 2.382759593402517e-05, |
| "loss": 0.2748, |
| "step": 9810 |
| }, |
| { |
| "epoch": 2.0584538026398493, |
| "grad_norm": 0.9440275430679321, |
| "learning_rate": 2.3683199850630213e-05, |
| "loss": 0.2893, |
| "step": 9825 |
| }, |
| { |
| "epoch": 2.0615964802011315, |
| "grad_norm": 1.2118226289749146, |
| "learning_rate": 2.3539106718642034e-05, |
| "loss": 0.2791, |
| "step": 9840 |
| }, |
| { |
| "epoch": 2.0647391577624137, |
| "grad_norm": 1.1374374628067017, |
| "learning_rate": 2.339531819681914e-05, |
| "loss": 0.2777, |
| "step": 9855 |
| }, |
| { |
| "epoch": 2.067881835323696, |
| "grad_norm": 0.6932136416435242, |
| "learning_rate": 2.3251835940413517e-05, |
| "loss": 0.2828, |
| "step": 9870 |
| }, |
| { |
| "epoch": 2.071024512884978, |
| "grad_norm": 1.0308489799499512, |
| "learning_rate": 2.310866160115146e-05, |
| "loss": 0.2947, |
| "step": 9885 |
| }, |
| { |
| "epoch": 2.07416719044626, |
| "grad_norm": 1.063235878944397, |
| "learning_rate": 2.2965796827214665e-05, |
| "loss": 0.3204, |
| "step": 9900 |
| }, |
| { |
| "epoch": 2.0773098680075424, |
| "grad_norm": 1.1612193584442139, |
| "learning_rate": 2.282324326322115e-05, |
| "loss": 0.2976, |
| "step": 9915 |
| }, |
| { |
| "epoch": 2.0804525455688245, |
| "grad_norm": 0.8928938508033752, |
| "learning_rate": 2.2681002550206355e-05, |
| "loss": 0.2921, |
| "step": 9930 |
| }, |
| { |
| "epoch": 2.0835952231301067, |
| "grad_norm": 1.066124677658081, |
| "learning_rate": 2.253907632560439e-05, |
| "loss": 0.298, |
| "step": 9945 |
| }, |
| { |
| "epoch": 2.086737900691389, |
| "grad_norm": 0.8713576197624207, |
| "learning_rate": 2.2397466223228947e-05, |
| "loss": 0.275, |
| "step": 9960 |
| }, |
| { |
| "epoch": 2.0898805782526715, |
| "grad_norm": 1.1056296825408936, |
| "learning_rate": 2.2256173873254643e-05, |
| "loss": 0.3266, |
| "step": 9975 |
| }, |
| { |
| "epoch": 2.0930232558139537, |
| "grad_norm": 0.9172502160072327, |
| "learning_rate": 2.211520090219821e-05, |
| "loss": 0.2731, |
| "step": 9990 |
| }, |
| { |
| "epoch": 2.0951183741881416, |
| "eval_accuracy": 0.009820309467613697, |
| "eval_loss": 0.4190310835838318, |
| "eval_runtime": 424.9528, |
| "eval_samples_per_second": 11.26, |
| "eval_steps_per_second": 2.817, |
| "step": 10000 |
| } |
| ], |
| "logging_steps": 15, |
| "max_steps": 14319, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 10000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.9082208625284874e+18, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|