| { | |
| "best_metric": 0.009820309467613697, | |
| "best_model_checkpoint": "/workspace/previous_works/RadFM/output/RadFM-Llama3-8B-pretrain-0002-embed_tokens-depth32-lora-10ep/checkpoint-10000", | |
| "epoch": 3.0, | |
| "eval_steps": 10000, | |
| "global_step": 14319, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0031426775612822125, | |
| "grad_norm": 38.333740234375, | |
| "learning_rate": 3.488372093023256e-06, | |
| "loss": 2.6324, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.006285355122564425, | |
| "grad_norm": 23.8914794921875, | |
| "learning_rate": 6.976744186046512e-06, | |
| "loss": 2.3565, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.009428032683846637, | |
| "grad_norm": 6.890503883361816, | |
| "learning_rate": 1.0465116279069768e-05, | |
| "loss": 1.8897, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.01257071024512885, | |
| "grad_norm": 3.9464468955993652, | |
| "learning_rate": 1.3953488372093024e-05, | |
| "loss": 1.3707, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.01571338780641106, | |
| "grad_norm": 4.443431854248047, | |
| "learning_rate": 1.744186046511628e-05, | |
| "loss": 1.055, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.018856065367693273, | |
| "grad_norm": 3.5747361183166504, | |
| "learning_rate": 2.0930232558139536e-05, | |
| "loss": 0.9048, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.02199874292897549, | |
| "grad_norm": 4.540731430053711, | |
| "learning_rate": 2.441860465116279e-05, | |
| "loss": 0.9143, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.0251414204902577, | |
| "grad_norm": 4.121450424194336, | |
| "learning_rate": 2.7906976744186048e-05, | |
| "loss": 0.7641, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.028284098051539912, | |
| "grad_norm": 3.1179299354553223, | |
| "learning_rate": 3.13953488372093e-05, | |
| "loss": 0.7784, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.03142677561282212, | |
| "grad_norm": 2.9703869819641113, | |
| "learning_rate": 3.488372093023256e-05, | |
| "loss": 0.7299, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.034569453174104335, | |
| "grad_norm": 2.706854820251465, | |
| "learning_rate": 3.837209302325582e-05, | |
| "loss": 0.6778, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.03771213073538655, | |
| "grad_norm": 3.361267328262329, | |
| "learning_rate": 4.186046511627907e-05, | |
| "loss": 0.7222, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.04085480829666876, | |
| "grad_norm": 4.040229797363281, | |
| "learning_rate": 4.5348837209302326e-05, | |
| "loss": 0.6684, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.04399748585795098, | |
| "grad_norm": 2.817627429962158, | |
| "learning_rate": 4.883720930232558e-05, | |
| "loss": 0.7458, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.04714016341923319, | |
| "grad_norm": 2.8800182342529297, | |
| "learning_rate": 5.232558139534884e-05, | |
| "loss": 0.6338, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.0502828409805154, | |
| "grad_norm": 2.436993360519409, | |
| "learning_rate": 5.5813953488372095e-05, | |
| "loss": 0.6299, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.05342551854179761, | |
| "grad_norm": 3.5814456939697266, | |
| "learning_rate": 5.9302325581395356e-05, | |
| "loss": 0.5728, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.056568196103079824, | |
| "grad_norm": 2.8744938373565674, | |
| "learning_rate": 6.27906976744186e-05, | |
| "loss": 0.59, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.059710873664362035, | |
| "grad_norm": 2.679749011993408, | |
| "learning_rate": 6.627906976744186e-05, | |
| "loss": 0.6016, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.06285355122564425, | |
| "grad_norm": 3.1333463191986084, | |
| "learning_rate": 6.976744186046513e-05, | |
| "loss": 0.6569, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.06599622878692646, | |
| "grad_norm": 2.2865939140319824, | |
| "learning_rate": 7.325581395348837e-05, | |
| "loss": 0.6385, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.06913890634820867, | |
| "grad_norm": 2.9787251949310303, | |
| "learning_rate": 7.674418604651163e-05, | |
| "loss": 0.6307, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.07228158390949088, | |
| "grad_norm": 2.078509569168091, | |
| "learning_rate": 8.023255813953489e-05, | |
| "loss": 0.5454, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.0754242614707731, | |
| "grad_norm": 2.6606740951538086, | |
| "learning_rate": 8.372093023255814e-05, | |
| "loss": 0.6211, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.0785669390320553, | |
| "grad_norm": 1.9346429109573364, | |
| "learning_rate": 8.72093023255814e-05, | |
| "loss": 0.5954, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.08170961659333752, | |
| "grad_norm": 2.2432360649108887, | |
| "learning_rate": 9.069767441860465e-05, | |
| "loss": 0.5385, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.08485229415461974, | |
| "grad_norm": 2.1645498275756836, | |
| "learning_rate": 9.418604651162792e-05, | |
| "loss": 0.592, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.08799497171590195, | |
| "grad_norm": 2.1806533336639404, | |
| "learning_rate": 9.767441860465116e-05, | |
| "loss": 0.5372, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.09113764927718417, | |
| "grad_norm": 2.445610761642456, | |
| "learning_rate": 9.999996802299678e-05, | |
| "loss": 0.6487, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.09428032683846638, | |
| "grad_norm": 2.3592734336853027, | |
| "learning_rate": 9.999948836876656e-05, | |
| "loss": 0.5957, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.09742300439974859, | |
| "grad_norm": 2.3027069568634033, | |
| "learning_rate": 9.999843313485898e-05, | |
| "loss": 0.5835, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.1005656819610308, | |
| "grad_norm": 2.6429057121276855, | |
| "learning_rate": 9.999680233342161e-05, | |
| "loss": 0.592, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.10370835952231301, | |
| "grad_norm": 2.0832202434539795, | |
| "learning_rate": 9.999459598322778e-05, | |
| "loss": 0.6203, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.10685103708359522, | |
| "grad_norm": 2.481870412826538, | |
| "learning_rate": 9.999181410967633e-05, | |
| "loss": 0.5428, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.10999371464487744, | |
| "grad_norm": 1.9621151685714722, | |
| "learning_rate": 9.99884567447914e-05, | |
| "loss": 0.6101, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.11313639220615965, | |
| "grad_norm": 2.8833186626434326, | |
| "learning_rate": 9.998452392722198e-05, | |
| "loss": 0.5577, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.11627906976744186, | |
| "grad_norm": 2.4447429180145264, | |
| "learning_rate": 9.998001570224158e-05, | |
| "loss": 0.566, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 0.11942174732872407, | |
| "grad_norm": 2.141496419906616, | |
| "learning_rate": 9.997493212174753e-05, | |
| "loss": 0.6211, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.12256442489000628, | |
| "grad_norm": 2.389796495437622, | |
| "learning_rate": 9.996927324426057e-05, | |
| "loss": 0.5937, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 0.1257071024512885, | |
| "grad_norm": 2.1194262504577637, | |
| "learning_rate": 9.996303913492408e-05, | |
| "loss": 0.5847, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.12884978001257072, | |
| "grad_norm": 1.7767274379730225, | |
| "learning_rate": 9.99562298655033e-05, | |
| "loss": 0.518, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 0.13199245757385292, | |
| "grad_norm": 2.0348453521728516, | |
| "learning_rate": 9.994884551438458e-05, | |
| "loss": 0.5941, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.13513513513513514, | |
| "grad_norm": 1.443819284439087, | |
| "learning_rate": 9.994088616657444e-05, | |
| "loss": 0.5022, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 0.13827781269641734, | |
| "grad_norm": 2.1748251914978027, | |
| "learning_rate": 9.993235191369861e-05, | |
| "loss": 0.5369, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.14142049025769957, | |
| "grad_norm": 1.9295774698257446, | |
| "learning_rate": 9.99232428540009e-05, | |
| "loss": 0.607, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.14456316781898176, | |
| "grad_norm": 1.7530088424682617, | |
| "learning_rate": 9.991355909234224e-05, | |
| "loss": 0.5417, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.147705845380264, | |
| "grad_norm": 10.02226448059082, | |
| "learning_rate": 9.990330074019925e-05, | |
| "loss": 0.5901, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 0.1508485229415462, | |
| "grad_norm": 1.3864644765853882, | |
| "learning_rate": 9.989246791566314e-05, | |
| "loss": 0.678, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.1539912005028284, | |
| "grad_norm": 1.6103929281234741, | |
| "learning_rate": 9.988106074343823e-05, | |
| "loss": 0.4741, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 0.1571338780641106, | |
| "grad_norm": 1.5933347940444946, | |
| "learning_rate": 9.986907935484064e-05, | |
| "loss": 0.5391, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.16027655562539284, | |
| "grad_norm": 1.5971338748931885, | |
| "learning_rate": 9.985652388779663e-05, | |
| "loss": 0.5782, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 0.16341923318667503, | |
| "grad_norm": 1.559793472290039, | |
| "learning_rate": 9.984339448684113e-05, | |
| "loss": 0.5227, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.16656191074795726, | |
| "grad_norm": 1.3077164888381958, | |
| "learning_rate": 9.982969130311597e-05, | |
| "loss": 0.5203, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 0.16970458830923948, | |
| "grad_norm": 1.6828336715698242, | |
| "learning_rate": 9.98154144943683e-05, | |
| "loss": 0.5471, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.17284726587052168, | |
| "grad_norm": 1.387099266052246, | |
| "learning_rate": 9.98005642249486e-05, | |
| "loss": 0.5399, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 0.1759899434318039, | |
| "grad_norm": 1.723253607749939, | |
| "learning_rate": 9.978514066580886e-05, | |
| "loss": 0.5606, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.1791326209930861, | |
| "grad_norm": 1.22931706905365, | |
| "learning_rate": 9.976914399450068e-05, | |
| "loss": 0.5024, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 0.18227529855436833, | |
| "grad_norm": 1.4278538227081299, | |
| "learning_rate": 9.97525743951731e-05, | |
| "loss": 0.5983, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.18541797611565053, | |
| "grad_norm": 1.4029372930526733, | |
| "learning_rate": 9.973543205857057e-05, | |
| "loss": 0.5699, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 0.18856065367693275, | |
| "grad_norm": 1.3018133640289307, | |
| "learning_rate": 9.971771718203072e-05, | |
| "loss": 0.4936, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.19170333123821495, | |
| "grad_norm": 1.3082265853881836, | |
| "learning_rate": 9.969942996948209e-05, | |
| "loss": 0.5025, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 0.19484600879949718, | |
| "grad_norm": 1.2923167943954468, | |
| "learning_rate": 9.968057063144182e-05, | |
| "loss": 0.5779, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.19798868636077938, | |
| "grad_norm": 1.2902971506118774, | |
| "learning_rate": 9.966113938501313e-05, | |
| "loss": 0.5373, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 0.2011313639220616, | |
| "grad_norm": 1.391560673713684, | |
| "learning_rate": 9.964113645388293e-05, | |
| "loss": 0.5858, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.2042740414833438, | |
| "grad_norm": 1.3245513439178467, | |
| "learning_rate": 9.96205620683192e-05, | |
| "loss": 0.6043, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 0.20741671904462602, | |
| "grad_norm": 1.4998241662979126, | |
| "learning_rate": 9.95994164651683e-05, | |
| "loss": 0.5785, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.21055939660590822, | |
| "grad_norm": 1.090804934501648, | |
| "learning_rate": 9.957769988785236e-05, | |
| "loss": 0.6439, | |
| "step": 1005 | |
| }, | |
| { | |
| "epoch": 0.21370207416719045, | |
| "grad_norm": 1.1564654111862183, | |
| "learning_rate": 9.955541258636631e-05, | |
| "loss": 0.5091, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.21684475172847265, | |
| "grad_norm": 1.1778066158294678, | |
| "learning_rate": 9.953255481727513e-05, | |
| "loss": 0.5456, | |
| "step": 1035 | |
| }, | |
| { | |
| "epoch": 0.21998742928975487, | |
| "grad_norm": 1.3568626642227173, | |
| "learning_rate": 9.950912684371088e-05, | |
| "loss": 0.5208, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.2231301068510371, | |
| "grad_norm": 1.804425597190857, | |
| "learning_rate": 9.948512893536961e-05, | |
| "loss": 0.4956, | |
| "step": 1065 | |
| }, | |
| { | |
| "epoch": 0.2262727844123193, | |
| "grad_norm": 1.226159930229187, | |
| "learning_rate": 9.946056136850833e-05, | |
| "loss": 0.5812, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.22941546197360152, | |
| "grad_norm": 1.1530790328979492, | |
| "learning_rate": 9.943542442594177e-05, | |
| "loss": 0.4742, | |
| "step": 1095 | |
| }, | |
| { | |
| "epoch": 0.23255813953488372, | |
| "grad_norm": 1.390417218208313, | |
| "learning_rate": 9.940971839703916e-05, | |
| "loss": 0.619, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.23570081709616594, | |
| "grad_norm": 1.4010789394378662, | |
| "learning_rate": 9.938344357772087e-05, | |
| "loss": 0.6086, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 0.23884349465744814, | |
| "grad_norm": 1.6488044261932373, | |
| "learning_rate": 9.935660027045506e-05, | |
| "loss": 0.551, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.24198617221873037, | |
| "grad_norm": 1.0560044050216675, | |
| "learning_rate": 9.932918878425412e-05, | |
| "loss": 0.532, | |
| "step": 1155 | |
| }, | |
| { | |
| "epoch": 0.24512884978001256, | |
| "grad_norm": 1.0651888847351074, | |
| "learning_rate": 9.930120943467117e-05, | |
| "loss": 0.5012, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.2482715273412948, | |
| "grad_norm": 1.0553079843521118, | |
| "learning_rate": 9.927266254379642e-05, | |
| "loss": 0.5576, | |
| "step": 1185 | |
| }, | |
| { | |
| "epoch": 0.251414204902577, | |
| "grad_norm": 1.007480263710022, | |
| "learning_rate": 9.924354844025339e-05, | |
| "loss": 0.4839, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.2545568824638592, | |
| "grad_norm": 1.0924334526062012, | |
| "learning_rate": 9.921386745919528e-05, | |
| "loss": 0.595, | |
| "step": 1215 | |
| }, | |
| { | |
| "epoch": 0.25769956002514144, | |
| "grad_norm": 1.3309390544891357, | |
| "learning_rate": 9.918361994230097e-05, | |
| "loss": 0.5224, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.2608422375864236, | |
| "grad_norm": 0.9702763557434082, | |
| "learning_rate": 9.915280623777114e-05, | |
| "loss": 0.4871, | |
| "step": 1245 | |
| }, | |
| { | |
| "epoch": 0.26398491514770583, | |
| "grad_norm": 1.0511876344680786, | |
| "learning_rate": 9.912142670032427e-05, | |
| "loss": 0.5861, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.26712759270898806, | |
| "grad_norm": 1.396050214767456, | |
| "learning_rate": 9.908948169119251e-05, | |
| "loss": 0.4651, | |
| "step": 1275 | |
| }, | |
| { | |
| "epoch": 0.2702702702702703, | |
| "grad_norm": 0.985396683216095, | |
| "learning_rate": 9.905697157811761e-05, | |
| "loss": 0.4302, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.27341294783155246, | |
| "grad_norm": 0.9169828295707703, | |
| "learning_rate": 9.902389673534659e-05, | |
| "loss": 0.5212, | |
| "step": 1305 | |
| }, | |
| { | |
| "epoch": 0.2765556253928347, | |
| "grad_norm": 0.9107710123062134, | |
| "learning_rate": 9.899025754362751e-05, | |
| "loss": 0.4941, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.2796983029541169, | |
| "grad_norm": 0.9720286726951599, | |
| "learning_rate": 9.8956054390205e-05, | |
| "loss": 0.5169, | |
| "step": 1335 | |
| }, | |
| { | |
| "epoch": 0.28284098051539913, | |
| "grad_norm": 1.1490366458892822, | |
| "learning_rate": 9.892128766881596e-05, | |
| "loss": 0.4973, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.28598365807668136, | |
| "grad_norm": 1.2628952264785767, | |
| "learning_rate": 9.888595777968479e-05, | |
| "loss": 0.5194, | |
| "step": 1365 | |
| }, | |
| { | |
| "epoch": 0.2891263356379635, | |
| "grad_norm": 1.1610651016235352, | |
| "learning_rate": 9.885006512951897e-05, | |
| "loss": 0.4994, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.29226901319924575, | |
| "grad_norm": 1.054768681526184, | |
| "learning_rate": 9.881361013150436e-05, | |
| "loss": 0.4664, | |
| "step": 1395 | |
| }, | |
| { | |
| "epoch": 0.295411690760528, | |
| "grad_norm": 1.0745666027069092, | |
| "learning_rate": 9.877659320530037e-05, | |
| "loss": 0.5306, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.2985543683218102, | |
| "grad_norm": 1.3258591890335083, | |
| "learning_rate": 9.873901477703516e-05, | |
| "loss": 0.5076, | |
| "step": 1425 | |
| }, | |
| { | |
| "epoch": 0.3016970458830924, | |
| "grad_norm": 1.222783088684082, | |
| "learning_rate": 9.870087527930077e-05, | |
| "loss": 0.4581, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.3048397234443746, | |
| "grad_norm": 0.9374076724052429, | |
| "learning_rate": 9.866217515114805e-05, | |
| "loss": 0.4643, | |
| "step": 1455 | |
| }, | |
| { | |
| "epoch": 0.3079824010056568, | |
| "grad_norm": 1.3485162258148193, | |
| "learning_rate": 9.862291483808173e-05, | |
| "loss": 0.5551, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.31112507856693905, | |
| "grad_norm": 0.9162548780441284, | |
| "learning_rate": 9.858309479205519e-05, | |
| "loss": 0.5592, | |
| "step": 1485 | |
| }, | |
| { | |
| "epoch": 0.3142677561282212, | |
| "grad_norm": 1.1385138034820557, | |
| "learning_rate": 9.854271547146531e-05, | |
| "loss": 0.477, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.31741043368950345, | |
| "grad_norm": 1.0023164749145508, | |
| "learning_rate": 9.850177734114718e-05, | |
| "loss": 0.4972, | |
| "step": 1515 | |
| }, | |
| { | |
| "epoch": 0.32055311125078567, | |
| "grad_norm": 2.540215492248535, | |
| "learning_rate": 9.846028087236873e-05, | |
| "loss": 0.5007, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.3236957888120679, | |
| "grad_norm": 1.2012773752212524, | |
| "learning_rate": 9.841822654282533e-05, | |
| "loss": 0.5481, | |
| "step": 1545 | |
| }, | |
| { | |
| "epoch": 0.32683846637335007, | |
| "grad_norm": 0.9517608284950256, | |
| "learning_rate": 9.837561483663429e-05, | |
| "loss": 0.567, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.3299811439346323, | |
| "grad_norm": 1.0308321714401245, | |
| "learning_rate": 9.833244624432927e-05, | |
| "loss": 0.4856, | |
| "step": 1575 | |
| }, | |
| { | |
| "epoch": 0.3331238214959145, | |
| "grad_norm": 1.118574857711792, | |
| "learning_rate": 9.828872126285465e-05, | |
| "loss": 0.465, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.33626649905719674, | |
| "grad_norm": 1.0821537971496582, | |
| "learning_rate": 9.824444039555977e-05, | |
| "loss": 0.4394, | |
| "step": 1605 | |
| }, | |
| { | |
| "epoch": 0.33940917661847897, | |
| "grad_norm": 0.8795451521873474, | |
| "learning_rate": 9.81996041521932e-05, | |
| "loss": 0.4383, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.34255185417976114, | |
| "grad_norm": 1.1455141305923462, | |
| "learning_rate": 9.815421304889687e-05, | |
| "loss": 0.4805, | |
| "step": 1635 | |
| }, | |
| { | |
| "epoch": 0.34569453174104336, | |
| "grad_norm": 1.1445369720458984, | |
| "learning_rate": 9.81082676082e-05, | |
| "loss": 0.5315, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.3488372093023256, | |
| "grad_norm": 1.0800312757492065, | |
| "learning_rate": 9.806176835901328e-05, | |
| "loss": 0.5205, | |
| "step": 1665 | |
| }, | |
| { | |
| "epoch": 0.3519798868636078, | |
| "grad_norm": 0.7038319706916809, | |
| "learning_rate": 9.801471583662263e-05, | |
| "loss": 0.515, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.35512256442489, | |
| "grad_norm": 0.9790651202201843, | |
| "learning_rate": 9.796711058268313e-05, | |
| "loss": 0.504, | |
| "step": 1695 | |
| }, | |
| { | |
| "epoch": 0.3582652419861722, | |
| "grad_norm": 1.1764894723892212, | |
| "learning_rate": 9.791895314521267e-05, | |
| "loss": 0.4806, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.36140791954745444, | |
| "grad_norm": 0.9900022745132446, | |
| "learning_rate": 9.787024407858582e-05, | |
| "loss": 0.5358, | |
| "step": 1725 | |
| }, | |
| { | |
| "epoch": 0.36455059710873666, | |
| "grad_norm": 0.8621386289596558, | |
| "learning_rate": 9.782098394352725e-05, | |
| "loss": 0.5494, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.36769327467001883, | |
| "grad_norm": 0.8717844486236572, | |
| "learning_rate": 9.777117330710547e-05, | |
| "loss": 0.4967, | |
| "step": 1755 | |
| }, | |
| { | |
| "epoch": 0.37083595223130106, | |
| "grad_norm": 0.9800569415092468, | |
| "learning_rate": 9.772081274272611e-05, | |
| "loss": 0.4538, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.3739786297925833, | |
| "grad_norm": 0.9540134072303772, | |
| "learning_rate": 9.766990283012544e-05, | |
| "loss": 0.5149, | |
| "step": 1785 | |
| }, | |
| { | |
| "epoch": 0.3771213073538655, | |
| "grad_norm": 1.0856047868728638, | |
| "learning_rate": 9.761844415536372e-05, | |
| "loss": 0.5042, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.3802639849151477, | |
| "grad_norm": 1.0914040803909302, | |
| "learning_rate": 9.756643731081833e-05, | |
| "loss": 0.5059, | |
| "step": 1815 | |
| }, | |
| { | |
| "epoch": 0.3834066624764299, | |
| "grad_norm": 1.2371134757995605, | |
| "learning_rate": 9.751388289517704e-05, | |
| "loss": 0.4506, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.38654934003771213, | |
| "grad_norm": 1.0402591228485107, | |
| "learning_rate": 9.746078151343116e-05, | |
| "loss": 0.5535, | |
| "step": 1845 | |
| }, | |
| { | |
| "epoch": 0.38969201759899436, | |
| "grad_norm": 0.6260209083557129, | |
| "learning_rate": 9.740713377686843e-05, | |
| "loss": 0.4436, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.3928346951602766, | |
| "grad_norm": 0.9588780999183655, | |
| "learning_rate": 9.735294030306611e-05, | |
| "loss": 0.5573, | |
| "step": 1875 | |
| }, | |
| { | |
| "epoch": 0.39597737272155875, | |
| "grad_norm": 1.0838474035263062, | |
| "learning_rate": 9.729820171588384e-05, | |
| "loss": 0.4627, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 0.399120050282841, | |
| "grad_norm": 1.0682798624038696, | |
| "learning_rate": 9.724291864545643e-05, | |
| "loss": 0.4893, | |
| "step": 1905 | |
| }, | |
| { | |
| "epoch": 0.4022627278441232, | |
| "grad_norm": 0.9129301309585571, | |
| "learning_rate": 9.718709172818661e-05, | |
| "loss": 0.4898, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.40540540540540543, | |
| "grad_norm": 1.0116883516311646, | |
| "learning_rate": 9.713072160673777e-05, | |
| "loss": 0.4615, | |
| "step": 1935 | |
| }, | |
| { | |
| "epoch": 0.4085480829666876, | |
| "grad_norm": 1.057822823524475, | |
| "learning_rate": 9.707380893002646e-05, | |
| "loss": 0.4899, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.4116907605279698, | |
| "grad_norm": 0.6419869661331177, | |
| "learning_rate": 9.7016354353215e-05, | |
| "loss": 0.4348, | |
| "step": 1965 | |
| }, | |
| { | |
| "epoch": 0.41483343808925205, | |
| "grad_norm": 0.961713433265686, | |
| "learning_rate": 9.695835853770387e-05, | |
| "loss": 0.4921, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 0.4179761156505343, | |
| "grad_norm": 0.9473373889923096, | |
| "learning_rate": 9.689982215112417e-05, | |
| "loss": 0.4926, | |
| "step": 1995 | |
| }, | |
| { | |
| "epoch": 0.42111879321181644, | |
| "grad_norm": 1.2034335136413574, | |
| "learning_rate": 9.684074586732987e-05, | |
| "loss": 0.5042, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 0.42426147077309867, | |
| "grad_norm": 0.9373855590820312, | |
| "learning_rate": 9.678113036639014e-05, | |
| "loss": 0.5076, | |
| "step": 2025 | |
| }, | |
| { | |
| "epoch": 0.4274041483343809, | |
| "grad_norm": 1.016756296157837, | |
| "learning_rate": 9.672097633458136e-05, | |
| "loss": 0.4805, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 0.4305468258956631, | |
| "grad_norm": 0.7454690337181091, | |
| "learning_rate": 9.666028446437942e-05, | |
| "loss": 0.5382, | |
| "step": 2055 | |
| }, | |
| { | |
| "epoch": 0.4336895034569453, | |
| "grad_norm": 0.8196286559104919, | |
| "learning_rate": 9.659905545445159e-05, | |
| "loss": 0.4613, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 0.4368321810182275, | |
| "grad_norm": 0.9132091403007507, | |
| "learning_rate": 9.653729000964857e-05, | |
| "loss": 0.4595, | |
| "step": 2085 | |
| }, | |
| { | |
| "epoch": 0.43997485857950974, | |
| "grad_norm": 0.8063992857933044, | |
| "learning_rate": 9.647498884099633e-05, | |
| "loss": 0.4139, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.44311753614079197, | |
| "grad_norm": 0.9756997227668762, | |
| "learning_rate": 9.641215266568794e-05, | |
| "loss": 0.3941, | |
| "step": 2115 | |
| }, | |
| { | |
| "epoch": 0.4462602137020742, | |
| "grad_norm": 0.6542510390281677, | |
| "learning_rate": 9.634878220707531e-05, | |
| "loss": 0.4768, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 0.44940289126335636, | |
| "grad_norm": 0.9039008617401123, | |
| "learning_rate": 9.628487819466086e-05, | |
| "loss": 0.4248, | |
| "step": 2145 | |
| }, | |
| { | |
| "epoch": 0.4525455688246386, | |
| "grad_norm": 1.1151047945022583, | |
| "learning_rate": 9.622044136408914e-05, | |
| "loss": 0.5041, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 0.4556882463859208, | |
| "grad_norm": 0.8580663800239563, | |
| "learning_rate": 9.615547245713836e-05, | |
| "loss": 0.4766, | |
| "step": 2175 | |
| }, | |
| { | |
| "epoch": 0.45883092394720304, | |
| "grad_norm": 0.9799042344093323, | |
| "learning_rate": 9.608997222171178e-05, | |
| "loss": 0.4714, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 0.4619736015084852, | |
| "grad_norm": 0.8485172986984253, | |
| "learning_rate": 9.602394141182927e-05, | |
| "loss": 0.4556, | |
| "step": 2205 | |
| }, | |
| { | |
| "epoch": 0.46511627906976744, | |
| "grad_norm": 0.9632934927940369, | |
| "learning_rate": 9.595738078761837e-05, | |
| "loss": 0.4791, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 0.46825895663104966, | |
| "grad_norm": 0.8843478560447693, | |
| "learning_rate": 9.589029111530586e-05, | |
| "loss": 0.4603, | |
| "step": 2235 | |
| }, | |
| { | |
| "epoch": 0.4714016341923319, | |
| "grad_norm": 1.1230348348617554, | |
| "learning_rate": 9.582267316720861e-05, | |
| "loss": 0.491, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.47454431175361406, | |
| "grad_norm": 0.8234013915061951, | |
| "learning_rate": 9.575452772172495e-05, | |
| "loss": 0.44, | |
| "step": 2265 | |
| }, | |
| { | |
| "epoch": 0.4776869893148963, | |
| "grad_norm": 0.6838919520378113, | |
| "learning_rate": 9.568585556332559e-05, | |
| "loss": 0.4456, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 0.4808296668761785, | |
| "grad_norm": 0.8424423336982727, | |
| "learning_rate": 9.561665748254456e-05, | |
| "loss": 0.4556, | |
| "step": 2295 | |
| }, | |
| { | |
| "epoch": 0.48397234443746073, | |
| "grad_norm": 0.6735498905181885, | |
| "learning_rate": 9.554693427597024e-05, | |
| "loss": 0.5184, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 0.4871150219987429, | |
| "grad_norm": 0.8868768811225891, | |
| "learning_rate": 9.5476686746236e-05, | |
| "loss": 0.5403, | |
| "step": 2325 | |
| }, | |
| { | |
| "epoch": 0.49025769956002513, | |
| "grad_norm": 0.9957670569419861, | |
| "learning_rate": 9.540591570201116e-05, | |
| "loss": 0.4997, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 0.49340037712130735, | |
| "grad_norm": 0.76320481300354, | |
| "learning_rate": 9.533462195799157e-05, | |
| "loss": 0.4534, | |
| "step": 2355 | |
| }, | |
| { | |
| "epoch": 0.4965430546825896, | |
| "grad_norm": 0.8841500282287598, | |
| "learning_rate": 9.526280633489018e-05, | |
| "loss": 0.4724, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 0.4996857322438718, | |
| "grad_norm": 0.8852142095565796, | |
| "learning_rate": 9.519046965942776e-05, | |
| "loss": 0.4655, | |
| "step": 2385 | |
| }, | |
| { | |
| "epoch": 0.502828409805154, | |
| "grad_norm": 0.839430570602417, | |
| "learning_rate": 9.511761276432321e-05, | |
| "loss": 0.4386, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.5059710873664363, | |
| "grad_norm": 0.7581266760826111, | |
| "learning_rate": 9.50442364882841e-05, | |
| "loss": 0.4774, | |
| "step": 2415 | |
| }, | |
| { | |
| "epoch": 0.5091137649277184, | |
| "grad_norm": 0.8754017949104309, | |
| "learning_rate": 9.497034167599691e-05, | |
| "loss": 0.4744, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 0.5122564424890006, | |
| "grad_norm": 0.9099476337432861, | |
| "learning_rate": 9.48959291781174e-05, | |
| "loss": 0.4292, | |
| "step": 2445 | |
| }, | |
| { | |
| "epoch": 0.5153991200502829, | |
| "grad_norm": 0.9721155166625977, | |
| "learning_rate": 9.482099985126079e-05, | |
| "loss": 0.4137, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 0.518541797611565, | |
| "grad_norm": 0.8385334014892578, | |
| "learning_rate": 9.474555455799181e-05, | |
| "loss": 0.471, | |
| "step": 2475 | |
| }, | |
| { | |
| "epoch": 0.5216844751728472, | |
| "grad_norm": 0.9853966236114502, | |
| "learning_rate": 9.466959416681495e-05, | |
| "loss": 0.4233, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 0.5248271527341295, | |
| "grad_norm": 1.1044224500656128, | |
| "learning_rate": 9.459311955216428e-05, | |
| "loss": 0.5188, | |
| "step": 2505 | |
| }, | |
| { | |
| "epoch": 0.5279698302954117, | |
| "grad_norm": 0.870677649974823, | |
| "learning_rate": 9.451613159439349e-05, | |
| "loss": 0.4676, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 0.531112507856694, | |
| "grad_norm": 0.8571140170097351, | |
| "learning_rate": 9.443863117976573e-05, | |
| "loss": 0.4863, | |
| "step": 2535 | |
| }, | |
| { | |
| "epoch": 0.5342551854179761, | |
| "grad_norm": 1.0573495626449585, | |
| "learning_rate": 9.436061920044341e-05, | |
| "loss": 0.5057, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.5373978629792583, | |
| "grad_norm": 0.9805963635444641, | |
| "learning_rate": 9.42820965544779e-05, | |
| "loss": 0.468, | |
| "step": 2565 | |
| }, | |
| { | |
| "epoch": 0.5405405405405406, | |
| "grad_norm": 0.8198602199554443, | |
| "learning_rate": 9.420306414579925e-05, | |
| "loss": 0.5054, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 0.5436832181018227, | |
| "grad_norm": 0.9718137979507446, | |
| "learning_rate": 9.412352288420572e-05, | |
| "loss": 0.4824, | |
| "step": 2595 | |
| }, | |
| { | |
| "epoch": 0.5468258956631049, | |
| "grad_norm": 1.0223153829574585, | |
| "learning_rate": 9.404347368535337e-05, | |
| "loss": 0.4502, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 0.5499685732243872, | |
| "grad_norm": 0.9398010969161987, | |
| "learning_rate": 9.396291747074547e-05, | |
| "loss": 0.4761, | |
| "step": 2625 | |
| }, | |
| { | |
| "epoch": 0.5531112507856694, | |
| "grad_norm": 0.9091777801513672, | |
| "learning_rate": 9.38818551677219e-05, | |
| "loss": 0.4033, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 0.5562539283469516, | |
| "grad_norm": 1.06580650806427, | |
| "learning_rate": 9.380028770944849e-05, | |
| "loss": 0.4052, | |
| "step": 2655 | |
| }, | |
| { | |
| "epoch": 0.5593966059082338, | |
| "grad_norm": 0.7236329913139343, | |
| "learning_rate": 9.371821603490627e-05, | |
| "loss": 0.4677, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 0.562539283469516, | |
| "grad_norm": 0.8263210654258728, | |
| "learning_rate": 9.363564108888069e-05, | |
| "loss": 0.4576, | |
| "step": 2685 | |
| }, | |
| { | |
| "epoch": 0.5656819610307983, | |
| "grad_norm": 1.022448182106018, | |
| "learning_rate": 9.355256382195068e-05, | |
| "loss": 0.4963, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.5688246385920804, | |
| "grad_norm": 0.9639766812324524, | |
| "learning_rate": 9.346898519047775e-05, | |
| "loss": 0.4113, | |
| "step": 2715 | |
| }, | |
| { | |
| "epoch": 0.5719673161533627, | |
| "grad_norm": 1.1044561862945557, | |
| "learning_rate": 9.338490615659499e-05, | |
| "loss": 0.5023, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 0.5751099937146449, | |
| "grad_norm": 0.8272239565849304, | |
| "learning_rate": 9.330032768819596e-05, | |
| "loss": 0.4699, | |
| "step": 2745 | |
| }, | |
| { | |
| "epoch": 0.578252671275927, | |
| "grad_norm": 0.7692523002624512, | |
| "learning_rate": 9.321525075892356e-05, | |
| "loss": 0.4292, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 0.5813953488372093, | |
| "grad_norm": 0.9032982587814331, | |
| "learning_rate": 9.312967634815888e-05, | |
| "loss": 0.4432, | |
| "step": 2775 | |
| }, | |
| { | |
| "epoch": 0.5845380263984915, | |
| "grad_norm": 0.7676737904548645, | |
| "learning_rate": 9.304360544100982e-05, | |
| "loss": 0.4311, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 0.5876807039597737, | |
| "grad_norm": 0.9019532799720764, | |
| "learning_rate": 9.29570390282998e-05, | |
| "loss": 0.4464, | |
| "step": 2805 | |
| }, | |
| { | |
| "epoch": 0.590823381521056, | |
| "grad_norm": 0.9738386869430542, | |
| "learning_rate": 9.286997810655638e-05, | |
| "loss": 0.5019, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 0.5939660590823381, | |
| "grad_norm": 0.7886769771575928, | |
| "learning_rate": 9.278242367799978e-05, | |
| "loss": 0.4919, | |
| "step": 2835 | |
| }, | |
| { | |
| "epoch": 0.5971087366436204, | |
| "grad_norm": 0.9002622365951538, | |
| "learning_rate": 9.269437675053129e-05, | |
| "loss": 0.4695, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 0.6002514142049026, | |
| "grad_norm": 0.7023227214813232, | |
| "learning_rate": 9.260583833772172e-05, | |
| "loss": 0.4338, | |
| "step": 2865 | |
| }, | |
| { | |
| "epoch": 0.6033940917661847, | |
| "grad_norm": 0.9442479014396667, | |
| "learning_rate": 9.251680945879975e-05, | |
| "loss": 0.4907, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 0.606536769327467, | |
| "grad_norm": 0.6304488778114319, | |
| "learning_rate": 9.24272911386401e-05, | |
| "loss": 0.4612, | |
| "step": 2895 | |
| }, | |
| { | |
| "epoch": 0.6096794468887492, | |
| "grad_norm": 0.731960117816925, | |
| "learning_rate": 9.233728440775185e-05, | |
| "loss": 0.4207, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 0.6128221244500315, | |
| "grad_norm": 1.083849549293518, | |
| "learning_rate": 9.224679030226648e-05, | |
| "loss": 0.4775, | |
| "step": 2925 | |
| }, | |
| { | |
| "epoch": 0.6159648020113137, | |
| "grad_norm": 0.6792687177658081, | |
| "learning_rate": 9.215580986392607e-05, | |
| "loss": 0.4708, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 0.6191074795725958, | |
| "grad_norm": 0.7582160830497742, | |
| "learning_rate": 9.20643441400711e-05, | |
| "loss": 0.4352, | |
| "step": 2955 | |
| }, | |
| { | |
| "epoch": 0.6222501571338781, | |
| "grad_norm": 0.7785065174102783, | |
| "learning_rate": 9.197239418362862e-05, | |
| "loss": 0.4199, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 0.6253928346951603, | |
| "grad_norm": 0.9076778292655945, | |
| "learning_rate": 9.187996105309995e-05, | |
| "loss": 0.4937, | |
| "step": 2985 | |
| }, | |
| { | |
| "epoch": 0.6285355122564424, | |
| "grad_norm": 0.9189762473106384, | |
| "learning_rate": 9.178704581254865e-05, | |
| "loss": 0.4553, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.6316781898177247, | |
| "grad_norm": 0.8485803008079529, | |
| "learning_rate": 9.169364953158812e-05, | |
| "loss": 0.4799, | |
| "step": 3015 | |
| }, | |
| { | |
| "epoch": 0.6348208673790069, | |
| "grad_norm": 0.8296557068824768, | |
| "learning_rate": 9.15997732853694e-05, | |
| "loss": 0.4799, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 0.6379635449402892, | |
| "grad_norm": 0.9346463680267334, | |
| "learning_rate": 9.150541815456874e-05, | |
| "loss": 0.4707, | |
| "step": 3045 | |
| }, | |
| { | |
| "epoch": 0.6411062225015713, | |
| "grad_norm": 1.0045510530471802, | |
| "learning_rate": 9.141058522537515e-05, | |
| "loss": 0.5216, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 0.6442489000628535, | |
| "grad_norm": 0.5840141773223877, | |
| "learning_rate": 9.131527558947796e-05, | |
| "loss": 0.429, | |
| "step": 3075 | |
| }, | |
| { | |
| "epoch": 0.6473915776241358, | |
| "grad_norm": 0.8743481040000916, | |
| "learning_rate": 9.121949034405417e-05, | |
| "loss": 0.4734, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 0.650534255185418, | |
| "grad_norm": 0.9631288051605225, | |
| "learning_rate": 9.112323059175588e-05, | |
| "loss": 0.4856, | |
| "step": 3105 | |
| }, | |
| { | |
| "epoch": 0.6536769327467001, | |
| "grad_norm": 0.7583104372024536, | |
| "learning_rate": 9.102649744069758e-05, | |
| "loss": 0.4428, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 0.6568196103079824, | |
| "grad_norm": 0.9227087497711182, | |
| "learning_rate": 9.092929200444337e-05, | |
| "loss": 0.4622, | |
| "step": 3135 | |
| }, | |
| { | |
| "epoch": 0.6599622878692646, | |
| "grad_norm": 0.720124363899231, | |
| "learning_rate": 9.083161540199417e-05, | |
| "loss": 0.4136, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 0.6631049654305469, | |
| "grad_norm": 0.6481117010116577, | |
| "learning_rate": 9.073346875777487e-05, | |
| "loss": 0.5445, | |
| "step": 3165 | |
| }, | |
| { | |
| "epoch": 0.666247642991829, | |
| "grad_norm": 0.6970652937889099, | |
| "learning_rate": 9.063485320162126e-05, | |
| "loss": 0.4247, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 0.6693903205531112, | |
| "grad_norm": 0.5132230520248413, | |
| "learning_rate": 9.053576986876718e-05, | |
| "loss": 0.4415, | |
| "step": 3195 | |
| }, | |
| { | |
| "epoch": 0.6725329981143935, | |
| "grad_norm": 0.7673790454864502, | |
| "learning_rate": 9.043621989983135e-05, | |
| "loss": 0.5188, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 0.6756756756756757, | |
| "grad_norm": 0.8441967368125916, | |
| "learning_rate": 9.033620444080428e-05, | |
| "loss": 0.4343, | |
| "step": 3225 | |
| }, | |
| { | |
| "epoch": 0.6788183532369579, | |
| "grad_norm": 0.8746171593666077, | |
| "learning_rate": 9.023572464303506e-05, | |
| "loss": 0.4114, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 0.6819610307982401, | |
| "grad_norm": 0.7494221925735474, | |
| "learning_rate": 9.013478166321812e-05, | |
| "loss": 0.4334, | |
| "step": 3255 | |
| }, | |
| { | |
| "epoch": 0.6851037083595223, | |
| "grad_norm": 0.7263948917388916, | |
| "learning_rate": 9.00333766633799e-05, | |
| "loss": 0.4322, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 0.6882463859208046, | |
| "grad_norm": 0.852172315120697, | |
| "learning_rate": 8.99315108108655e-05, | |
| "loss": 0.4506, | |
| "step": 3285 | |
| }, | |
| { | |
| "epoch": 0.6913890634820867, | |
| "grad_norm": 0.7959320545196533, | |
| "learning_rate": 8.98291852783252e-05, | |
| "loss": 0.4456, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.6945317410433689, | |
| "grad_norm": 0.5918748378753662, | |
| "learning_rate": 8.9726401243701e-05, | |
| "loss": 0.4181, | |
| "step": 3315 | |
| }, | |
| { | |
| "epoch": 0.6976744186046512, | |
| "grad_norm": 0.9726805090904236, | |
| "learning_rate": 8.962315989021304e-05, | |
| "loss": 0.4964, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 0.7008170961659334, | |
| "grad_norm": 0.8826568126678467, | |
| "learning_rate": 8.951946240634596e-05, | |
| "loss": 0.4702, | |
| "step": 3345 | |
| }, | |
| { | |
| "epoch": 0.7039597737272156, | |
| "grad_norm": 0.7354099154472351, | |
| "learning_rate": 8.941530998583527e-05, | |
| "loss": 0.4258, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 0.7071024512884978, | |
| "grad_norm": 0.9217835664749146, | |
| "learning_rate": 8.931070382765359e-05, | |
| "loss": 0.5185, | |
| "step": 3375 | |
| }, | |
| { | |
| "epoch": 0.71024512884978, | |
| "grad_norm": 0.7444872260093689, | |
| "learning_rate": 8.920564513599679e-05, | |
| "loss": 0.4534, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 0.7133878064110623, | |
| "grad_norm": 0.7847276926040649, | |
| "learning_rate": 8.910013512027022e-05, | |
| "loss": 0.4232, | |
| "step": 3405 | |
| }, | |
| { | |
| "epoch": 0.7165304839723444, | |
| "grad_norm": 0.8024355173110962, | |
| "learning_rate": 8.899417499507471e-05, | |
| "loss": 0.4579, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 0.7196731615336267, | |
| "grad_norm": 0.7088613510131836, | |
| "learning_rate": 8.888776598019266e-05, | |
| "loss": 0.4437, | |
| "step": 3435 | |
| }, | |
| { | |
| "epoch": 0.7228158390949089, | |
| "grad_norm": 0.6009235382080078, | |
| "learning_rate": 8.87809093005739e-05, | |
| "loss": 0.397, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 0.725958516656191, | |
| "grad_norm": 0.8743120431900024, | |
| "learning_rate": 8.867360618632172e-05, | |
| "loss": 0.5056, | |
| "step": 3465 | |
| }, | |
| { | |
| "epoch": 0.7291011942174733, | |
| "grad_norm": 0.899148166179657, | |
| "learning_rate": 8.856585787267856e-05, | |
| "loss": 0.4521, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 0.7322438717787555, | |
| "grad_norm": 0.8690171837806702, | |
| "learning_rate": 8.845766560001193e-05, | |
| "loss": 0.4708, | |
| "step": 3495 | |
| }, | |
| { | |
| "epoch": 0.7353865493400377, | |
| "grad_norm": 0.9699186682701111, | |
| "learning_rate": 8.834903061380002e-05, | |
| "loss": 0.4534, | |
| "step": 3510 | |
| }, | |
| { | |
| "epoch": 0.73852922690132, | |
| "grad_norm": 0.8577262163162231, | |
| "learning_rate": 8.823995416461744e-05, | |
| "loss": 0.4096, | |
| "step": 3525 | |
| }, | |
| { | |
| "epoch": 0.7416719044626021, | |
| "grad_norm": 0.7458922266960144, | |
| "learning_rate": 8.81304375081208e-05, | |
| "loss": 0.46, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 0.7448145820238844, | |
| "grad_norm": 0.7347140908241272, | |
| "learning_rate": 8.802048190503423e-05, | |
| "loss": 0.4684, | |
| "step": 3555 | |
| }, | |
| { | |
| "epoch": 0.7479572595851666, | |
| "grad_norm": 0.7161451578140259, | |
| "learning_rate": 8.79100886211349e-05, | |
| "loss": 0.4715, | |
| "step": 3570 | |
| }, | |
| { | |
| "epoch": 0.7510999371464487, | |
| "grad_norm": 0.8321588039398193, | |
| "learning_rate": 8.779925892723842e-05, | |
| "loss": 0.3598, | |
| "step": 3585 | |
| }, | |
| { | |
| "epoch": 0.754242614707731, | |
| "grad_norm": 0.9462142586708069, | |
| "learning_rate": 8.768799409918423e-05, | |
| "loss": 0.4404, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.7573852922690132, | |
| "grad_norm": 0.6842710971832275, | |
| "learning_rate": 8.75762954178209e-05, | |
| "loss": 0.4648, | |
| "step": 3615 | |
| }, | |
| { | |
| "epoch": 0.7605279698302954, | |
| "grad_norm": 0.8573241829872131, | |
| "learning_rate": 8.746416416899145e-05, | |
| "loss": 0.4592, | |
| "step": 3630 | |
| }, | |
| { | |
| "epoch": 0.7636706473915776, | |
| "grad_norm": 0.751291811466217, | |
| "learning_rate": 8.735160164351841e-05, | |
| "loss": 0.5319, | |
| "step": 3645 | |
| }, | |
| { | |
| "epoch": 0.7668133249528598, | |
| "grad_norm": 0.731086790561676, | |
| "learning_rate": 8.72386091371891e-05, | |
| "loss": 0.4629, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 0.7699560025141421, | |
| "grad_norm": 0.9289976358413696, | |
| "learning_rate": 8.712518795074063e-05, | |
| "loss": 0.4427, | |
| "step": 3675 | |
| }, | |
| { | |
| "epoch": 0.7730986800754243, | |
| "grad_norm": 0.7036064267158508, | |
| "learning_rate": 8.701133938984496e-05, | |
| "loss": 0.4679, | |
| "step": 3690 | |
| }, | |
| { | |
| "epoch": 0.7762413576367064, | |
| "grad_norm": 0.778161346912384, | |
| "learning_rate": 8.689706476509385e-05, | |
| "loss": 0.4489, | |
| "step": 3705 | |
| }, | |
| { | |
| "epoch": 0.7793840351979887, | |
| "grad_norm": 0.8694556951522827, | |
| "learning_rate": 8.678236539198382e-05, | |
| "loss": 0.4048, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 0.7825267127592709, | |
| "grad_norm": 0.5768362283706665, | |
| "learning_rate": 8.666724259090092e-05, | |
| "loss": 0.4434, | |
| "step": 3735 | |
| }, | |
| { | |
| "epoch": 0.7856693903205532, | |
| "grad_norm": 0.604917585849762, | |
| "learning_rate": 8.655169768710562e-05, | |
| "loss": 0.4669, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 0.7888120678818353, | |
| "grad_norm": 0.833985447883606, | |
| "learning_rate": 8.643573201071748e-05, | |
| "loss": 0.4267, | |
| "step": 3765 | |
| }, | |
| { | |
| "epoch": 0.7919547454431175, | |
| "grad_norm": 0.7951568365097046, | |
| "learning_rate": 8.631934689669992e-05, | |
| "loss": 0.4028, | |
| "step": 3780 | |
| }, | |
| { | |
| "epoch": 0.7950974230043998, | |
| "grad_norm": 0.7703410983085632, | |
| "learning_rate": 8.620254368484474e-05, | |
| "loss": 0.4153, | |
| "step": 3795 | |
| }, | |
| { | |
| "epoch": 0.798240100565682, | |
| "grad_norm": 0.8545910716056824, | |
| "learning_rate": 8.608532371975684e-05, | |
| "loss": 0.4949, | |
| "step": 3810 | |
| }, | |
| { | |
| "epoch": 0.8013827781269641, | |
| "grad_norm": 0.8206099271774292, | |
| "learning_rate": 8.59676883508386e-05, | |
| "loss": 0.4714, | |
| "step": 3825 | |
| }, | |
| { | |
| "epoch": 0.8045254556882464, | |
| "grad_norm": 0.7841479182243347, | |
| "learning_rate": 8.584963893227442e-05, | |
| "loss": 0.4888, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 0.8076681332495286, | |
| "grad_norm": 0.7417731285095215, | |
| "learning_rate": 8.573117682301514e-05, | |
| "loss": 0.4951, | |
| "step": 3855 | |
| }, | |
| { | |
| "epoch": 0.8108108108108109, | |
| "grad_norm": 0.9013925194740295, | |
| "learning_rate": 8.561230338676239e-05, | |
| "loss": 0.4542, | |
| "step": 3870 | |
| }, | |
| { | |
| "epoch": 0.813953488372093, | |
| "grad_norm": 1.2146642208099365, | |
| "learning_rate": 8.549301999195283e-05, | |
| "loss": 0.4606, | |
| "step": 3885 | |
| }, | |
| { | |
| "epoch": 0.8170961659333752, | |
| "grad_norm": 0.8740483522415161, | |
| "learning_rate": 8.537332801174245e-05, | |
| "loss": 0.4562, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.8202388434946575, | |
| "grad_norm": 0.7769590020179749, | |
| "learning_rate": 8.525322882399082e-05, | |
| "loss": 0.4385, | |
| "step": 3915 | |
| }, | |
| { | |
| "epoch": 0.8233815210559396, | |
| "grad_norm": 0.7966271042823792, | |
| "learning_rate": 8.513272381124511e-05, | |
| "loss": 0.4011, | |
| "step": 3930 | |
| }, | |
| { | |
| "epoch": 0.8265241986172219, | |
| "grad_norm": 0.6132526397705078, | |
| "learning_rate": 8.501181436072422e-05, | |
| "loss": 0.393, | |
| "step": 3945 | |
| }, | |
| { | |
| "epoch": 0.8296668761785041, | |
| "grad_norm": 0.6438138484954834, | |
| "learning_rate": 8.489050186430285e-05, | |
| "loss": 0.4226, | |
| "step": 3960 | |
| }, | |
| { | |
| "epoch": 0.8328095537397863, | |
| "grad_norm": 0.8362025022506714, | |
| "learning_rate": 8.476878771849545e-05, | |
| "loss": 0.4216, | |
| "step": 3975 | |
| }, | |
| { | |
| "epoch": 0.8359522313010685, | |
| "grad_norm": 0.770706057548523, | |
| "learning_rate": 8.464667332444012e-05, | |
| "loss": 0.4278, | |
| "step": 3990 | |
| }, | |
| { | |
| "epoch": 0.8390949088623507, | |
| "grad_norm": 0.8944802284240723, | |
| "learning_rate": 8.452416008788254e-05, | |
| "loss": 0.4609, | |
| "step": 4005 | |
| }, | |
| { | |
| "epoch": 0.8422375864236329, | |
| "grad_norm": 0.9292035102844238, | |
| "learning_rate": 8.440124941915972e-05, | |
| "loss": 0.4124, | |
| "step": 4020 | |
| }, | |
| { | |
| "epoch": 0.8453802639849152, | |
| "grad_norm": 0.6450730562210083, | |
| "learning_rate": 8.427794273318377e-05, | |
| "loss": 0.4124, | |
| "step": 4035 | |
| }, | |
| { | |
| "epoch": 0.8485229415461973, | |
| "grad_norm": 1.0732468366622925, | |
| "learning_rate": 8.415424144942569e-05, | |
| "loss": 0.4678, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 0.8516656191074796, | |
| "grad_norm": 0.900360107421875, | |
| "learning_rate": 8.403014699189892e-05, | |
| "loss": 0.4299, | |
| "step": 4065 | |
| }, | |
| { | |
| "epoch": 0.8548082966687618, | |
| "grad_norm": 0.7163972854614258, | |
| "learning_rate": 8.39056607891431e-05, | |
| "loss": 0.4651, | |
| "step": 4080 | |
| }, | |
| { | |
| "epoch": 0.857950974230044, | |
| "grad_norm": 0.6078224182128906, | |
| "learning_rate": 8.378078427420739e-05, | |
| "loss": 0.4612, | |
| "step": 4095 | |
| }, | |
| { | |
| "epoch": 0.8610936517913262, | |
| "grad_norm": 0.7975668907165527, | |
| "learning_rate": 8.365551888463423e-05, | |
| "loss": 0.4521, | |
| "step": 4110 | |
| }, | |
| { | |
| "epoch": 0.8642363293526084, | |
| "grad_norm": 0.7620348930358887, | |
| "learning_rate": 8.352986606244262e-05, | |
| "loss": 0.4527, | |
| "step": 4125 | |
| }, | |
| { | |
| "epoch": 0.8673790069138906, | |
| "grad_norm": 0.7811437249183655, | |
| "learning_rate": 8.340382725411155e-05, | |
| "loss": 0.4639, | |
| "step": 4140 | |
| }, | |
| { | |
| "epoch": 0.8705216844751729, | |
| "grad_norm": 0.46538805961608887, | |
| "learning_rate": 8.327740391056343e-05, | |
| "loss": 0.3793, | |
| "step": 4155 | |
| }, | |
| { | |
| "epoch": 0.873664362036455, | |
| "grad_norm": 0.893225371837616, | |
| "learning_rate": 8.315059748714728e-05, | |
| "loss": 0.4824, | |
| "step": 4170 | |
| }, | |
| { | |
| "epoch": 0.8768070395977373, | |
| "grad_norm": 0.8325145244598389, | |
| "learning_rate": 8.302340944362205e-05, | |
| "loss": 0.4623, | |
| "step": 4185 | |
| }, | |
| { | |
| "epoch": 0.8799497171590195, | |
| "grad_norm": 0.7328510880470276, | |
| "learning_rate": 8.289584124413978e-05, | |
| "loss": 0.4075, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.8830923947203017, | |
| "grad_norm": 0.35754507780075073, | |
| "learning_rate": 8.276789435722875e-05, | |
| "loss": 0.3328, | |
| "step": 4215 | |
| }, | |
| { | |
| "epoch": 0.8862350722815839, | |
| "grad_norm": 0.78349369764328, | |
| "learning_rate": 8.263957025577663e-05, | |
| "loss": 0.4962, | |
| "step": 4230 | |
| }, | |
| { | |
| "epoch": 0.8893777498428661, | |
| "grad_norm": 0.644481360912323, | |
| "learning_rate": 8.251087041701339e-05, | |
| "loss": 0.3977, | |
| "step": 4245 | |
| }, | |
| { | |
| "epoch": 0.8925204274041484, | |
| "grad_norm": 0.618881344795227, | |
| "learning_rate": 8.238179632249443e-05, | |
| "loss": 0.3967, | |
| "step": 4260 | |
| }, | |
| { | |
| "epoch": 0.8956631049654306, | |
| "grad_norm": 0.7603642344474792, | |
| "learning_rate": 8.22523494580835e-05, | |
| "loss": 0.4413, | |
| "step": 4275 | |
| }, | |
| { | |
| "epoch": 0.8988057825267127, | |
| "grad_norm": 0.6301630735397339, | |
| "learning_rate": 8.212253131393549e-05, | |
| "loss": 0.4333, | |
| "step": 4290 | |
| }, | |
| { | |
| "epoch": 0.901948460087995, | |
| "grad_norm": 0.7729358077049255, | |
| "learning_rate": 8.199234338447942e-05, | |
| "loss": 0.4633, | |
| "step": 4305 | |
| }, | |
| { | |
| "epoch": 0.9050911376492772, | |
| "grad_norm": 0.9121199250221252, | |
| "learning_rate": 8.186178716840118e-05, | |
| "loss": 0.4411, | |
| "step": 4320 | |
| }, | |
| { | |
| "epoch": 0.9082338152105593, | |
| "grad_norm": 0.5462374091148376, | |
| "learning_rate": 8.17308641686262e-05, | |
| "loss": 0.4659, | |
| "step": 4335 | |
| }, | |
| { | |
| "epoch": 0.9113764927718416, | |
| "grad_norm": 0.7599003911018372, | |
| "learning_rate": 8.15995758923023e-05, | |
| "loss": 0.4015, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 0.9145191703331238, | |
| "grad_norm": 0.8557884693145752, | |
| "learning_rate": 8.14679238507822e-05, | |
| "loss": 0.4574, | |
| "step": 4365 | |
| }, | |
| { | |
| "epoch": 0.9176618478944061, | |
| "grad_norm": 0.7987812757492065, | |
| "learning_rate": 8.133590955960619e-05, | |
| "loss": 0.4501, | |
| "step": 4380 | |
| }, | |
| { | |
| "epoch": 0.9208045254556882, | |
| "grad_norm": 0.8603717088699341, | |
| "learning_rate": 8.120353453848471e-05, | |
| "loss": 0.4201, | |
| "step": 4395 | |
| }, | |
| { | |
| "epoch": 0.9239472030169704, | |
| "grad_norm": 0.7066472768783569, | |
| "learning_rate": 8.107080031128078e-05, | |
| "loss": 0.4035, | |
| "step": 4410 | |
| }, | |
| { | |
| "epoch": 0.9270898805782527, | |
| "grad_norm": 0.6430373191833496, | |
| "learning_rate": 8.09377084059925e-05, | |
| "loss": 0.4141, | |
| "step": 4425 | |
| }, | |
| { | |
| "epoch": 0.9302325581395349, | |
| "grad_norm": 0.6911259889602661, | |
| "learning_rate": 8.080426035473549e-05, | |
| "loss": 0.4431, | |
| "step": 4440 | |
| }, | |
| { | |
| "epoch": 0.933375235700817, | |
| "grad_norm": 0.8445611000061035, | |
| "learning_rate": 8.067045769372515e-05, | |
| "loss": 0.4469, | |
| "step": 4455 | |
| }, | |
| { | |
| "epoch": 0.9365179132620993, | |
| "grad_norm": 0.9317618012428284, | |
| "learning_rate": 8.053630196325914e-05, | |
| "loss": 0.4051, | |
| "step": 4470 | |
| }, | |
| { | |
| "epoch": 0.9396605908233815, | |
| "grad_norm": 0.8286532163619995, | |
| "learning_rate": 8.040179470769946e-05, | |
| "loss": 0.4158, | |
| "step": 4485 | |
| }, | |
| { | |
| "epoch": 0.9428032683846638, | |
| "grad_norm": 0.7000495195388794, | |
| "learning_rate": 8.026693747545486e-05, | |
| "loss": 0.4202, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.9459459459459459, | |
| "grad_norm": 0.8104173541069031, | |
| "learning_rate": 8.013173181896283e-05, | |
| "loss": 0.4369, | |
| "step": 4515 | |
| }, | |
| { | |
| "epoch": 0.9490886235072281, | |
| "grad_norm": 0.864750862121582, | |
| "learning_rate": 7.999617929467187e-05, | |
| "loss": 0.4152, | |
| "step": 4530 | |
| }, | |
| { | |
| "epoch": 0.9522313010685104, | |
| "grad_norm": 0.7788864970207214, | |
| "learning_rate": 7.98602814630235e-05, | |
| "loss": 0.492, | |
| "step": 4545 | |
| }, | |
| { | |
| "epoch": 0.9553739786297926, | |
| "grad_norm": 0.707156777381897, | |
| "learning_rate": 7.972403988843435e-05, | |
| "loss": 0.4105, | |
| "step": 4560 | |
| }, | |
| { | |
| "epoch": 0.9585166561910748, | |
| "grad_norm": 0.8454593420028687, | |
| "learning_rate": 7.958745613927809e-05, | |
| "loss": 0.4622, | |
| "step": 4575 | |
| }, | |
| { | |
| "epoch": 0.961659333752357, | |
| "grad_norm": 0.8026373982429504, | |
| "learning_rate": 7.945053178786744e-05, | |
| "loss": 0.4236, | |
| "step": 4590 | |
| }, | |
| { | |
| "epoch": 0.9648020113136392, | |
| "grad_norm": 0.786409318447113, | |
| "learning_rate": 7.931326841043596e-05, | |
| "loss": 0.4677, | |
| "step": 4605 | |
| }, | |
| { | |
| "epoch": 0.9679446888749215, | |
| "grad_norm": 0.5381405353546143, | |
| "learning_rate": 7.917566758712005e-05, | |
| "loss": 0.443, | |
| "step": 4620 | |
| }, | |
| { | |
| "epoch": 0.9710873664362036, | |
| "grad_norm": 0.6609058380126953, | |
| "learning_rate": 7.903773090194069e-05, | |
| "loss": 0.4573, | |
| "step": 4635 | |
| }, | |
| { | |
| "epoch": 0.9742300439974858, | |
| "grad_norm": 0.7192760705947876, | |
| "learning_rate": 7.889945994278514e-05, | |
| "loss": 0.4387, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 0.9773727215587681, | |
| "grad_norm": 0.7502164244651794, | |
| "learning_rate": 7.87608563013888e-05, | |
| "loss": 0.399, | |
| "step": 4665 | |
| }, | |
| { | |
| "epoch": 0.9805153991200503, | |
| "grad_norm": 0.7829092144966125, | |
| "learning_rate": 7.86219215733168e-05, | |
| "loss": 0.3705, | |
| "step": 4680 | |
| }, | |
| { | |
| "epoch": 0.9836580766813325, | |
| "grad_norm": 0.791359007358551, | |
| "learning_rate": 7.848265735794558e-05, | |
| "loss": 0.4434, | |
| "step": 4695 | |
| }, | |
| { | |
| "epoch": 0.9868007542426147, | |
| "grad_norm": 0.7627493739128113, | |
| "learning_rate": 7.834306525844461e-05, | |
| "loss": 0.4496, | |
| "step": 4710 | |
| }, | |
| { | |
| "epoch": 0.9899434318038969, | |
| "grad_norm": 0.679959237575531, | |
| "learning_rate": 7.820314688175784e-05, | |
| "loss": 0.4815, | |
| "step": 4725 | |
| }, | |
| { | |
| "epoch": 0.9930861093651792, | |
| "grad_norm": 0.8766529560089111, | |
| "learning_rate": 7.806290383858523e-05, | |
| "loss": 0.4704, | |
| "step": 4740 | |
| }, | |
| { | |
| "epoch": 0.9962287869264613, | |
| "grad_norm": 1.1642574071884155, | |
| "learning_rate": 7.792233774336423e-05, | |
| "loss": 0.4974, | |
| "step": 4755 | |
| }, | |
| { | |
| "epoch": 0.9993714644877436, | |
| "grad_norm": 0.7194317579269409, | |
| "learning_rate": 7.778145021425114e-05, | |
| "loss": 0.4423, | |
| "step": 4770 | |
| }, | |
| { | |
| "epoch": 1.0025141420490258, | |
| "grad_norm": 0.7814803719520569, | |
| "learning_rate": 7.764024287310252e-05, | |
| "loss": 0.4194, | |
| "step": 4785 | |
| }, | |
| { | |
| "epoch": 1.005656819610308, | |
| "grad_norm": 0.8891781568527222, | |
| "learning_rate": 7.749871734545652e-05, | |
| "loss": 0.3977, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 1.0087994971715901, | |
| "grad_norm": 0.7444355487823486, | |
| "learning_rate": 7.735687526051418e-05, | |
| "loss": 0.3924, | |
| "step": 4815 | |
| }, | |
| { | |
| "epoch": 1.0119421747328725, | |
| "grad_norm": 0.9248786568641663, | |
| "learning_rate": 7.721471825112062e-05, | |
| "loss": 0.4273, | |
| "step": 4830 | |
| }, | |
| { | |
| "epoch": 1.0150848522941547, | |
| "grad_norm": 0.6513450741767883, | |
| "learning_rate": 7.70722479537463e-05, | |
| "loss": 0.3909, | |
| "step": 4845 | |
| }, | |
| { | |
| "epoch": 1.0182275298554369, | |
| "grad_norm": 0.8597205877304077, | |
| "learning_rate": 7.692946600846818e-05, | |
| "loss": 0.4027, | |
| "step": 4860 | |
| }, | |
| { | |
| "epoch": 1.021370207416719, | |
| "grad_norm": 0.9086320996284485, | |
| "learning_rate": 7.678637405895076e-05, | |
| "loss": 0.4225, | |
| "step": 4875 | |
| }, | |
| { | |
| "epoch": 1.0245128849780012, | |
| "grad_norm": 0.8219915628433228, | |
| "learning_rate": 7.66429737524273e-05, | |
| "loss": 0.4055, | |
| "step": 4890 | |
| }, | |
| { | |
| "epoch": 1.0276555625392834, | |
| "grad_norm": 0.9232605695724487, | |
| "learning_rate": 7.649926673968069e-05, | |
| "loss": 0.3801, | |
| "step": 4905 | |
| }, | |
| { | |
| "epoch": 1.0307982401005658, | |
| "grad_norm": 0.8866775035858154, | |
| "learning_rate": 7.635525467502462e-05, | |
| "loss": 0.3887, | |
| "step": 4920 | |
| }, | |
| { | |
| "epoch": 1.033940917661848, | |
| "grad_norm": 0.6395006775856018, | |
| "learning_rate": 7.62109392162844e-05, | |
| "loss": 0.4018, | |
| "step": 4935 | |
| }, | |
| { | |
| "epoch": 1.03708359522313, | |
| "grad_norm": 0.8276055455207825, | |
| "learning_rate": 7.60663220247779e-05, | |
| "loss": 0.3875, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 1.0402262727844123, | |
| "grad_norm": 0.8251763582229614, | |
| "learning_rate": 7.592140476529652e-05, | |
| "loss": 0.3912, | |
| "step": 4965 | |
| }, | |
| { | |
| "epoch": 1.0433689503456944, | |
| "grad_norm": 0.8321304321289062, | |
| "learning_rate": 7.577618910608591e-05, | |
| "loss": 0.4317, | |
| "step": 4980 | |
| }, | |
| { | |
| "epoch": 1.0465116279069768, | |
| "grad_norm": 0.6474670171737671, | |
| "learning_rate": 7.56306767188268e-05, | |
| "loss": 0.4594, | |
| "step": 4995 | |
| }, | |
| { | |
| "epoch": 1.049654305468259, | |
| "grad_norm": 0.6989348530769348, | |
| "learning_rate": 7.548486927861582e-05, | |
| "loss": 0.3744, | |
| "step": 5010 | |
| }, | |
| { | |
| "epoch": 1.0527969830295412, | |
| "grad_norm": 0.8184515237808228, | |
| "learning_rate": 7.533876846394613e-05, | |
| "loss": 0.3364, | |
| "step": 5025 | |
| }, | |
| { | |
| "epoch": 1.0559396605908233, | |
| "grad_norm": 0.7965102195739746, | |
| "learning_rate": 7.519237595668811e-05, | |
| "loss": 0.3934, | |
| "step": 5040 | |
| }, | |
| { | |
| "epoch": 1.0590823381521055, | |
| "grad_norm": 0.731299638748169, | |
| "learning_rate": 7.504569344207007e-05, | |
| "loss": 0.4161, | |
| "step": 5055 | |
| }, | |
| { | |
| "epoch": 1.062225015713388, | |
| "grad_norm": 0.9074578881263733, | |
| "learning_rate": 7.489872260865877e-05, | |
| "loss": 0.4103, | |
| "step": 5070 | |
| }, | |
| { | |
| "epoch": 1.06536769327467, | |
| "grad_norm": 0.8735909461975098, | |
| "learning_rate": 7.475146514834001e-05, | |
| "loss": 0.3686, | |
| "step": 5085 | |
| }, | |
| { | |
| "epoch": 1.0685103708359522, | |
| "grad_norm": 0.7814076542854309, | |
| "learning_rate": 7.460392275629918e-05, | |
| "loss": 0.3943, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 1.0716530483972344, | |
| "grad_norm": 0.8307476043701172, | |
| "learning_rate": 7.445609713100171e-05, | |
| "loss": 0.3999, | |
| "step": 5115 | |
| }, | |
| { | |
| "epoch": 1.0747957259585166, | |
| "grad_norm": 0.7908287048339844, | |
| "learning_rate": 7.430798997417353e-05, | |
| "loss": 0.4104, | |
| "step": 5130 | |
| }, | |
| { | |
| "epoch": 1.077938403519799, | |
| "grad_norm": 0.8598707914352417, | |
| "learning_rate": 7.415960299078143e-05, | |
| "loss": 0.3976, | |
| "step": 5145 | |
| }, | |
| { | |
| "epoch": 1.0810810810810811, | |
| "grad_norm": 0.5163241028785706, | |
| "learning_rate": 7.40109378890136e-05, | |
| "loss": 0.3506, | |
| "step": 5160 | |
| }, | |
| { | |
| "epoch": 1.0842237586423633, | |
| "grad_norm": 0.8642787933349609, | |
| "learning_rate": 7.386199638025973e-05, | |
| "loss": 0.31, | |
| "step": 5175 | |
| }, | |
| { | |
| "epoch": 1.0873664362036455, | |
| "grad_norm": 0.7603743076324463, | |
| "learning_rate": 7.371278017909148e-05, | |
| "loss": 0.4695, | |
| "step": 5190 | |
| }, | |
| { | |
| "epoch": 1.0905091137649277, | |
| "grad_norm": 0.7949853539466858, | |
| "learning_rate": 7.356329100324273e-05, | |
| "loss": 0.4076, | |
| "step": 5205 | |
| }, | |
| { | |
| "epoch": 1.0936517913262098, | |
| "grad_norm": 0.8560110926628113, | |
| "learning_rate": 7.341353057358966e-05, | |
| "loss": 0.3833, | |
| "step": 5220 | |
| }, | |
| { | |
| "epoch": 1.0967944688874922, | |
| "grad_norm": 0.632763147354126, | |
| "learning_rate": 7.326350061413114e-05, | |
| "loss": 0.4128, | |
| "step": 5235 | |
| }, | |
| { | |
| "epoch": 1.0999371464487744, | |
| "grad_norm": 0.9416031837463379, | |
| "learning_rate": 7.311320285196875e-05, | |
| "loss": 0.3665, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 1.1030798240100566, | |
| "grad_norm": 0.6195524334907532, | |
| "learning_rate": 7.296263901728694e-05, | |
| "loss": 0.362, | |
| "step": 5265 | |
| }, | |
| { | |
| "epoch": 1.1062225015713387, | |
| "grad_norm": 0.8545498251914978, | |
| "learning_rate": 7.281181084333311e-05, | |
| "loss": 0.361, | |
| "step": 5280 | |
| }, | |
| { | |
| "epoch": 1.109365179132621, | |
| "grad_norm": 0.75226229429245, | |
| "learning_rate": 7.26607200663977e-05, | |
| "loss": 0.3948, | |
| "step": 5295 | |
| }, | |
| { | |
| "epoch": 1.1125078566939033, | |
| "grad_norm": 0.877756655216217, | |
| "learning_rate": 7.250936842579407e-05, | |
| "loss": 0.4061, | |
| "step": 5310 | |
| }, | |
| { | |
| "epoch": 1.1156505342551855, | |
| "grad_norm": 0.5953283309936523, | |
| "learning_rate": 7.235775766383862e-05, | |
| "loss": 0.3273, | |
| "step": 5325 | |
| }, | |
| { | |
| "epoch": 1.1187932118164676, | |
| "grad_norm": 0.8206706643104553, | |
| "learning_rate": 7.220588952583071e-05, | |
| "loss": 0.3757, | |
| "step": 5340 | |
| }, | |
| { | |
| "epoch": 1.1219358893777498, | |
| "grad_norm": 0.7466344237327576, | |
| "learning_rate": 7.205376576003247e-05, | |
| "loss": 0.3892, | |
| "step": 5355 | |
| }, | |
| { | |
| "epoch": 1.125078566939032, | |
| "grad_norm": 0.8034494519233704, | |
| "learning_rate": 7.190138811764882e-05, | |
| "loss": 0.4043, | |
| "step": 5370 | |
| }, | |
| { | |
| "epoch": 1.1282212445003144, | |
| "grad_norm": 0.9050668478012085, | |
| "learning_rate": 7.174875835280716e-05, | |
| "loss": 0.3812, | |
| "step": 5385 | |
| }, | |
| { | |
| "epoch": 1.1313639220615965, | |
| "grad_norm": 0.8540876507759094, | |
| "learning_rate": 7.159587822253733e-05, | |
| "loss": 0.3645, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 1.1345065996228787, | |
| "grad_norm": 0.7688354849815369, | |
| "learning_rate": 7.14427494867512e-05, | |
| "loss": 0.3683, | |
| "step": 5415 | |
| }, | |
| { | |
| "epoch": 1.1376492771841609, | |
| "grad_norm": 0.6950829029083252, | |
| "learning_rate": 7.128937390822261e-05, | |
| "loss": 0.3347, | |
| "step": 5430 | |
| }, | |
| { | |
| "epoch": 1.140791954745443, | |
| "grad_norm": 0.8212427496910095, | |
| "learning_rate": 7.113575325256694e-05, | |
| "loss": 0.3775, | |
| "step": 5445 | |
| }, | |
| { | |
| "epoch": 1.1439346323067254, | |
| "grad_norm": 0.8312988877296448, | |
| "learning_rate": 7.098188928822084e-05, | |
| "loss": 0.4325, | |
| "step": 5460 | |
| }, | |
| { | |
| "epoch": 1.1470773098680076, | |
| "grad_norm": 0.9646623134613037, | |
| "learning_rate": 7.082778378642184e-05, | |
| "loss": 0.3898, | |
| "step": 5475 | |
| }, | |
| { | |
| "epoch": 1.1502199874292898, | |
| "grad_norm": 0.8333424925804138, | |
| "learning_rate": 7.0673438521188e-05, | |
| "loss": 0.4068, | |
| "step": 5490 | |
| }, | |
| { | |
| "epoch": 1.153362664990572, | |
| "grad_norm": 0.918892502784729, | |
| "learning_rate": 7.051885526929747e-05, | |
| "loss": 0.3968, | |
| "step": 5505 | |
| }, | |
| { | |
| "epoch": 1.156505342551854, | |
| "grad_norm": 0.5460782647132874, | |
| "learning_rate": 7.0364035810268e-05, | |
| "loss": 0.3672, | |
| "step": 5520 | |
| }, | |
| { | |
| "epoch": 1.1596480201131363, | |
| "grad_norm": 0.876811683177948, | |
| "learning_rate": 7.020898192633655e-05, | |
| "loss": 0.408, | |
| "step": 5535 | |
| }, | |
| { | |
| "epoch": 1.1627906976744187, | |
| "grad_norm": 0.6740222573280334, | |
| "learning_rate": 7.005369540243864e-05, | |
| "loss": 0.2995, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 1.1659333752357008, | |
| "grad_norm": 0.8702965378761292, | |
| "learning_rate": 6.989817802618792e-05, | |
| "loss": 0.3307, | |
| "step": 5565 | |
| }, | |
| { | |
| "epoch": 1.169076052796983, | |
| "grad_norm": 0.8837511539459229, | |
| "learning_rate": 6.974243158785554e-05, | |
| "loss": 0.3864, | |
| "step": 5580 | |
| }, | |
| { | |
| "epoch": 1.1722187303582652, | |
| "grad_norm": 0.4050454795360565, | |
| "learning_rate": 6.958645788034952e-05, | |
| "loss": 0.3525, | |
| "step": 5595 | |
| }, | |
| { | |
| "epoch": 1.1753614079195476, | |
| "grad_norm": 0.8361005187034607, | |
| "learning_rate": 6.943025869919418e-05, | |
| "loss": 0.3747, | |
| "step": 5610 | |
| }, | |
| { | |
| "epoch": 1.1785040854808297, | |
| "grad_norm": 0.841556191444397, | |
| "learning_rate": 6.92738358425094e-05, | |
| "loss": 0.406, | |
| "step": 5625 | |
| }, | |
| { | |
| "epoch": 1.181646763042112, | |
| "grad_norm": 0.629443883895874, | |
| "learning_rate": 6.911719111098996e-05, | |
| "loss": 0.4175, | |
| "step": 5640 | |
| }, | |
| { | |
| "epoch": 1.184789440603394, | |
| "grad_norm": 0.7146449685096741, | |
| "learning_rate": 6.896032630788476e-05, | |
| "loss": 0.3511, | |
| "step": 5655 | |
| }, | |
| { | |
| "epoch": 1.1879321181646763, | |
| "grad_norm": 0.8358393311500549, | |
| "learning_rate": 6.880324323897617e-05, | |
| "loss": 0.3851, | |
| "step": 5670 | |
| }, | |
| { | |
| "epoch": 1.1910747957259584, | |
| "grad_norm": 0.742857813835144, | |
| "learning_rate": 6.864594371255913e-05, | |
| "loss": 0.3821, | |
| "step": 5685 | |
| }, | |
| { | |
| "epoch": 1.1942174732872408, | |
| "grad_norm": 0.7099196910858154, | |
| "learning_rate": 6.848842953942036e-05, | |
| "loss": 0.3789, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 1.197360150848523, | |
| "grad_norm": 0.754542350769043, | |
| "learning_rate": 6.83307025328176e-05, | |
| "loss": 0.3472, | |
| "step": 5715 | |
| }, | |
| { | |
| "epoch": 1.2005028284098052, | |
| "grad_norm": 0.7466986775398254, | |
| "learning_rate": 6.817276450845856e-05, | |
| "loss": 0.3393, | |
| "step": 5730 | |
| }, | |
| { | |
| "epoch": 1.2036455059710873, | |
| "grad_norm": 0.7026840448379517, | |
| "learning_rate": 6.801461728448022e-05, | |
| "loss": 0.3891, | |
| "step": 5745 | |
| }, | |
| { | |
| "epoch": 1.2067881835323695, | |
| "grad_norm": 1.1348669528961182, | |
| "learning_rate": 6.785626268142777e-05, | |
| "loss": 0.3802, | |
| "step": 5760 | |
| }, | |
| { | |
| "epoch": 1.2099308610936519, | |
| "grad_norm": 0.7511578798294067, | |
| "learning_rate": 6.769770252223369e-05, | |
| "loss": 0.4252, | |
| "step": 5775 | |
| }, | |
| { | |
| "epoch": 1.213073538654934, | |
| "grad_norm": 0.8412914276123047, | |
| "learning_rate": 6.753893863219675e-05, | |
| "loss": 0.3813, | |
| "step": 5790 | |
| }, | |
| { | |
| "epoch": 1.2162162162162162, | |
| "grad_norm": 0.8765383958816528, | |
| "learning_rate": 6.737997283896103e-05, | |
| "loss": 0.3712, | |
| "step": 5805 | |
| }, | |
| { | |
| "epoch": 1.2193588937774984, | |
| "grad_norm": 0.7843053340911865, | |
| "learning_rate": 6.722080697249487e-05, | |
| "loss": 0.3776, | |
| "step": 5820 | |
| }, | |
| { | |
| "epoch": 1.2225015713387806, | |
| "grad_norm": 1.0745536088943481, | |
| "learning_rate": 6.706144286506978e-05, | |
| "loss": 0.3499, | |
| "step": 5835 | |
| }, | |
| { | |
| "epoch": 1.2256442489000627, | |
| "grad_norm": 0.7722020745277405, | |
| "learning_rate": 6.690188235123934e-05, | |
| "loss": 0.4211, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 1.2287869264613451, | |
| "grad_norm": 0.9631087183952332, | |
| "learning_rate": 6.674212726781814e-05, | |
| "loss": 0.3772, | |
| "step": 5865 | |
| }, | |
| { | |
| "epoch": 1.2319296040226273, | |
| "grad_norm": 0.8981698751449585, | |
| "learning_rate": 6.65821794538606e-05, | |
| "loss": 0.4598, | |
| "step": 5880 | |
| }, | |
| { | |
| "epoch": 1.2350722815839095, | |
| "grad_norm": 0.778362512588501, | |
| "learning_rate": 6.642204075063974e-05, | |
| "loss": 0.4179, | |
| "step": 5895 | |
| }, | |
| { | |
| "epoch": 1.2382149591451916, | |
| "grad_norm": 0.8421118259429932, | |
| "learning_rate": 6.626171300162615e-05, | |
| "loss": 0.3583, | |
| "step": 5910 | |
| }, | |
| { | |
| "epoch": 1.241357636706474, | |
| "grad_norm": 1.0227240324020386, | |
| "learning_rate": 6.610119805246653e-05, | |
| "loss": 0.3919, | |
| "step": 5925 | |
| }, | |
| { | |
| "epoch": 1.2445003142677562, | |
| "grad_norm": 0.5748106837272644, | |
| "learning_rate": 6.594049775096268e-05, | |
| "loss": 0.3571, | |
| "step": 5940 | |
| }, | |
| { | |
| "epoch": 1.2476429918290384, | |
| "grad_norm": 0.6924661993980408, | |
| "learning_rate": 6.577961394705008e-05, | |
| "loss": 0.3812, | |
| "step": 5955 | |
| }, | |
| { | |
| "epoch": 1.2507856693903205, | |
| "grad_norm": 0.7702043056488037, | |
| "learning_rate": 6.561854849277664e-05, | |
| "loss": 0.331, | |
| "step": 5970 | |
| }, | |
| { | |
| "epoch": 1.2539283469516027, | |
| "grad_norm": 0.6666329503059387, | |
| "learning_rate": 6.545730324228136e-05, | |
| "loss": 0.3266, | |
| "step": 5985 | |
| }, | |
| { | |
| "epoch": 1.2570710245128849, | |
| "grad_norm": 0.9120034575462341, | |
| "learning_rate": 6.529588005177305e-05, | |
| "loss": 0.4188, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 1.260213702074167, | |
| "grad_norm": 0.7251651287078857, | |
| "learning_rate": 6.513428077950886e-05, | |
| "loss": 0.4067, | |
| "step": 6015 | |
| }, | |
| { | |
| "epoch": 1.2633563796354494, | |
| "grad_norm": 0.6845729947090149, | |
| "learning_rate": 6.497250728577296e-05, | |
| "loss": 0.4266, | |
| "step": 6030 | |
| }, | |
| { | |
| "epoch": 1.2664990571967316, | |
| "grad_norm": 0.7530787587165833, | |
| "learning_rate": 6.481056143285512e-05, | |
| "loss": 0.3302, | |
| "step": 6045 | |
| }, | |
| { | |
| "epoch": 1.2696417347580138, | |
| "grad_norm": 0.7474608421325684, | |
| "learning_rate": 6.464844508502927e-05, | |
| "loss": 0.4305, | |
| "step": 6060 | |
| }, | |
| { | |
| "epoch": 1.2727844123192962, | |
| "grad_norm": 0.8672669529914856, | |
| "learning_rate": 6.448616010853199e-05, | |
| "loss": 0.4267, | |
| "step": 6075 | |
| }, | |
| { | |
| "epoch": 1.2759270898805783, | |
| "grad_norm": 0.7703887224197388, | |
| "learning_rate": 6.432370837154109e-05, | |
| "loss": 0.3531, | |
| "step": 6090 | |
| }, | |
| { | |
| "epoch": 1.2790697674418605, | |
| "grad_norm": 0.7432886958122253, | |
| "learning_rate": 6.416109174415406e-05, | |
| "loss": 0.3189, | |
| "step": 6105 | |
| }, | |
| { | |
| "epoch": 1.2822124450031427, | |
| "grad_norm": 0.9600912928581238, | |
| "learning_rate": 6.399831209836659e-05, | |
| "loss": 0.4036, | |
| "step": 6120 | |
| }, | |
| { | |
| "epoch": 1.2853551225644249, | |
| "grad_norm": 0.7727882862091064, | |
| "learning_rate": 6.383537130805098e-05, | |
| "loss": 0.3857, | |
| "step": 6135 | |
| }, | |
| { | |
| "epoch": 1.288497800125707, | |
| "grad_norm": 0.7628008723258972, | |
| "learning_rate": 6.367227124893455e-05, | |
| "loss": 0.4229, | |
| "step": 6150 | |
| }, | |
| { | |
| "epoch": 1.2916404776869892, | |
| "grad_norm": 0.9682219624519348, | |
| "learning_rate": 6.350901379857814e-05, | |
| "loss": 0.3544, | |
| "step": 6165 | |
| }, | |
| { | |
| "epoch": 1.2947831552482716, | |
| "grad_norm": 0.7553837895393372, | |
| "learning_rate": 6.334560083635434e-05, | |
| "loss": 0.3968, | |
| "step": 6180 | |
| }, | |
| { | |
| "epoch": 1.2979258328095538, | |
| "grad_norm": 0.7951422333717346, | |
| "learning_rate": 6.318203424342605e-05, | |
| "loss": 0.2946, | |
| "step": 6195 | |
| }, | |
| { | |
| "epoch": 1.301068510370836, | |
| "grad_norm": 0.9351706504821777, | |
| "learning_rate": 6.301831590272465e-05, | |
| "loss": 0.4203, | |
| "step": 6210 | |
| }, | |
| { | |
| "epoch": 1.304211187932118, | |
| "grad_norm": 0.8283166289329529, | |
| "learning_rate": 6.28544476989284e-05, | |
| "loss": 0.4166, | |
| "step": 6225 | |
| }, | |
| { | |
| "epoch": 1.3073538654934005, | |
| "grad_norm": 0.7889246940612793, | |
| "learning_rate": 6.269043151844081e-05, | |
| "loss": 0.4084, | |
| "step": 6240 | |
| }, | |
| { | |
| "epoch": 1.3104965430546827, | |
| "grad_norm": 0.7893148064613342, | |
| "learning_rate": 6.252626924936876e-05, | |
| "loss": 0.3327, | |
| "step": 6255 | |
| }, | |
| { | |
| "epoch": 1.3136392206159648, | |
| "grad_norm": 0.9599968194961548, | |
| "learning_rate": 6.236196278150092e-05, | |
| "loss": 0.3987, | |
| "step": 6270 | |
| }, | |
| { | |
| "epoch": 1.316781898177247, | |
| "grad_norm": 0.7326962351799011, | |
| "learning_rate": 6.219751400628593e-05, | |
| "loss": 0.3872, | |
| "step": 6285 | |
| }, | |
| { | |
| "epoch": 1.3199245757385292, | |
| "grad_norm": 0.7666275501251221, | |
| "learning_rate": 6.203292481681061e-05, | |
| "loss": 0.2906, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 1.3230672532998113, | |
| "grad_norm": 0.7648006081581116, | |
| "learning_rate": 6.186819710777819e-05, | |
| "loss": 0.4077, | |
| "step": 6315 | |
| }, | |
| { | |
| "epoch": 1.3262099308610937, | |
| "grad_norm": 0.8993086218833923, | |
| "learning_rate": 6.170333277548653e-05, | |
| "loss": 0.3334, | |
| "step": 6330 | |
| }, | |
| { | |
| "epoch": 1.329352608422376, | |
| "grad_norm": 0.8966405987739563, | |
| "learning_rate": 6.153833371780622e-05, | |
| "loss": 0.3772, | |
| "step": 6345 | |
| }, | |
| { | |
| "epoch": 1.332495285983658, | |
| "grad_norm": 0.955697774887085, | |
| "learning_rate": 6.137320183415877e-05, | |
| "loss": 0.3652, | |
| "step": 6360 | |
| }, | |
| { | |
| "epoch": 1.3356379635449402, | |
| "grad_norm": 0.913931667804718, | |
| "learning_rate": 6.120793902549478e-05, | |
| "loss": 0.3943, | |
| "step": 6375 | |
| }, | |
| { | |
| "epoch": 1.3387806411062226, | |
| "grad_norm": 0.471160352230072, | |
| "learning_rate": 6.1042547194272e-05, | |
| "loss": 0.3656, | |
| "step": 6390 | |
| }, | |
| { | |
| "epoch": 1.3419233186675048, | |
| "grad_norm": 0.7883521914482117, | |
| "learning_rate": 6.0877028244433444e-05, | |
| "loss": 0.3494, | |
| "step": 6405 | |
| }, | |
| { | |
| "epoch": 1.345065996228787, | |
| "grad_norm": 0.8015203475952148, | |
| "learning_rate": 6.071138408138547e-05, | |
| "loss": 0.3498, | |
| "step": 6420 | |
| }, | |
| { | |
| "epoch": 1.3482086737900691, | |
| "grad_norm": 0.8431302905082703, | |
| "learning_rate": 6.0545616611975886e-05, | |
| "loss": 0.3726, | |
| "step": 6435 | |
| }, | |
| { | |
| "epoch": 1.3513513513513513, | |
| "grad_norm": 0.6410717964172363, | |
| "learning_rate": 6.0379727744471936e-05, | |
| "loss": 0.3793, | |
| "step": 6450 | |
| }, | |
| { | |
| "epoch": 1.3544940289126335, | |
| "grad_norm": 0.8410218358039856, | |
| "learning_rate": 6.021371938853839e-05, | |
| "loss": 0.4294, | |
| "step": 6465 | |
| }, | |
| { | |
| "epoch": 1.3576367064739157, | |
| "grad_norm": 0.622178852558136, | |
| "learning_rate": 6.004759345521552e-05, | |
| "loss": 0.3373, | |
| "step": 6480 | |
| }, | |
| { | |
| "epoch": 1.360779384035198, | |
| "grad_norm": 0.8277848362922668, | |
| "learning_rate": 5.988135185689712e-05, | |
| "loss": 0.3796, | |
| "step": 6495 | |
| }, | |
| { | |
| "epoch": 1.3639220615964802, | |
| "grad_norm": 0.799150824546814, | |
| "learning_rate": 5.9714996507308465e-05, | |
| "loss": 0.3361, | |
| "step": 6510 | |
| }, | |
| { | |
| "epoch": 1.3670647391577624, | |
| "grad_norm": 0.8518102765083313, | |
| "learning_rate": 5.954852932148433e-05, | |
| "loss": 0.3913, | |
| "step": 6525 | |
| }, | |
| { | |
| "epoch": 1.3702074167190446, | |
| "grad_norm": 0.7465687990188599, | |
| "learning_rate": 5.9381952215746905e-05, | |
| "loss": 0.3546, | |
| "step": 6540 | |
| }, | |
| { | |
| "epoch": 1.373350094280327, | |
| "grad_norm": 0.7342978119850159, | |
| "learning_rate": 5.921526710768376e-05, | |
| "loss": 0.3832, | |
| "step": 6555 | |
| }, | |
| { | |
| "epoch": 1.3764927718416091, | |
| "grad_norm": 0.6754856109619141, | |
| "learning_rate": 5.9048475916125723e-05, | |
| "loss": 0.4051, | |
| "step": 6570 | |
| }, | |
| { | |
| "epoch": 1.3796354494028913, | |
| "grad_norm": 0.6392863988876343, | |
| "learning_rate": 5.888158056112486e-05, | |
| "loss": 0.3828, | |
| "step": 6585 | |
| }, | |
| { | |
| "epoch": 1.3827781269641735, | |
| "grad_norm": 0.897132933139801, | |
| "learning_rate": 5.871458296393231e-05, | |
| "loss": 0.405, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 1.3859208045254556, | |
| "grad_norm": 0.7124328017234802, | |
| "learning_rate": 5.854748504697624e-05, | |
| "loss": 0.3712, | |
| "step": 6615 | |
| }, | |
| { | |
| "epoch": 1.3890634820867378, | |
| "grad_norm": 0.8436194062232971, | |
| "learning_rate": 5.8380288733839585e-05, | |
| "loss": 0.3773, | |
| "step": 6630 | |
| }, | |
| { | |
| "epoch": 1.3922061596480202, | |
| "grad_norm": 0.780944287776947, | |
| "learning_rate": 5.8212995949238083e-05, | |
| "loss": 0.3529, | |
| "step": 6645 | |
| }, | |
| { | |
| "epoch": 1.3953488372093024, | |
| "grad_norm": 1.0335406064987183, | |
| "learning_rate": 5.804560861899795e-05, | |
| "loss": 0.4262, | |
| "step": 6660 | |
| }, | |
| { | |
| "epoch": 1.3984915147705845, | |
| "grad_norm": 0.7593971490859985, | |
| "learning_rate": 5.7878128670033826e-05, | |
| "loss": 0.4079, | |
| "step": 6675 | |
| }, | |
| { | |
| "epoch": 1.4016341923318667, | |
| "grad_norm": 0.7240027189254761, | |
| "learning_rate": 5.7710558030326545e-05, | |
| "loss": 0.3835, | |
| "step": 6690 | |
| }, | |
| { | |
| "epoch": 1.404776869893149, | |
| "grad_norm": 1.530868411064148, | |
| "learning_rate": 5.754289862890093e-05, | |
| "loss": 0.4294, | |
| "step": 6705 | |
| }, | |
| { | |
| "epoch": 1.4079195474544313, | |
| "grad_norm": 0.6043078899383545, | |
| "learning_rate": 5.7375152395803624e-05, | |
| "loss": 0.3343, | |
| "step": 6720 | |
| }, | |
| { | |
| "epoch": 1.4110622250157134, | |
| "grad_norm": 0.8058659434318542, | |
| "learning_rate": 5.720732126208082e-05, | |
| "loss": 0.4533, | |
| "step": 6735 | |
| }, | |
| { | |
| "epoch": 1.4142049025769956, | |
| "grad_norm": 0.7185141444206238, | |
| "learning_rate": 5.7039407159756106e-05, | |
| "loss": 0.42, | |
| "step": 6750 | |
| }, | |
| { | |
| "epoch": 1.4173475801382778, | |
| "grad_norm": 1.0086369514465332, | |
| "learning_rate": 5.687141202180817e-05, | |
| "loss": 0.3701, | |
| "step": 6765 | |
| }, | |
| { | |
| "epoch": 1.42049025769956, | |
| "grad_norm": 1.0289742946624756, | |
| "learning_rate": 5.67033377821485e-05, | |
| "loss": 0.4565, | |
| "step": 6780 | |
| }, | |
| { | |
| "epoch": 1.4236329352608421, | |
| "grad_norm": 1.1389039754867554, | |
| "learning_rate": 5.6535186375599266e-05, | |
| "loss": 0.3555, | |
| "step": 6795 | |
| }, | |
| { | |
| "epoch": 1.4267756128221245, | |
| "grad_norm": 0.887610673904419, | |
| "learning_rate": 5.636695973787093e-05, | |
| "loss": 0.368, | |
| "step": 6810 | |
| }, | |
| { | |
| "epoch": 1.4299182903834067, | |
| "grad_norm": 0.9625629186630249, | |
| "learning_rate": 5.619865980553994e-05, | |
| "loss": 0.3962, | |
| "step": 6825 | |
| }, | |
| { | |
| "epoch": 1.4330609679446888, | |
| "grad_norm": 0.8793766498565674, | |
| "learning_rate": 5.6030288516026564e-05, | |
| "loss": 0.3979, | |
| "step": 6840 | |
| }, | |
| { | |
| "epoch": 1.436203645505971, | |
| "grad_norm": 0.7626388669013977, | |
| "learning_rate": 5.586184780757251e-05, | |
| "loss": 0.345, | |
| "step": 6855 | |
| }, | |
| { | |
| "epoch": 1.4393463230672534, | |
| "grad_norm": 1.109713077545166, | |
| "learning_rate": 5.5693339619218534e-05, | |
| "loss": 0.4446, | |
| "step": 6870 | |
| }, | |
| { | |
| "epoch": 1.4424890006285356, | |
| "grad_norm": 0.9758956432342529, | |
| "learning_rate": 5.552476589078231e-05, | |
| "loss": 0.401, | |
| "step": 6885 | |
| }, | |
| { | |
| "epoch": 1.4456316781898177, | |
| "grad_norm": 0.923329770565033, | |
| "learning_rate": 5.5356128562835904e-05, | |
| "loss": 0.385, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 1.4487743557511, | |
| "grad_norm": 0.7539265155792236, | |
| "learning_rate": 5.518742957668359e-05, | |
| "loss": 0.3274, | |
| "step": 6915 | |
| }, | |
| { | |
| "epoch": 1.451917033312382, | |
| "grad_norm": 0.8187793493270874, | |
| "learning_rate": 5.5018670874339386e-05, | |
| "loss": 0.3677, | |
| "step": 6930 | |
| }, | |
| { | |
| "epoch": 1.4550597108736643, | |
| "grad_norm": 0.9522603750228882, | |
| "learning_rate": 5.484985439850473e-05, | |
| "loss": 0.3319, | |
| "step": 6945 | |
| }, | |
| { | |
| "epoch": 1.4582023884349467, | |
| "grad_norm": 0.8808611631393433, | |
| "learning_rate": 5.468098209254622e-05, | |
| "loss": 0.4311, | |
| "step": 6960 | |
| }, | |
| { | |
| "epoch": 1.4613450659962288, | |
| "grad_norm": 0.6949836611747742, | |
| "learning_rate": 5.4512055900473035e-05, | |
| "loss": 0.3679, | |
| "step": 6975 | |
| }, | |
| { | |
| "epoch": 1.464487743557511, | |
| "grad_norm": 0.783545196056366, | |
| "learning_rate": 5.434307776691479e-05, | |
| "loss": 0.3552, | |
| "step": 6990 | |
| }, | |
| { | |
| "epoch": 1.4676304211187932, | |
| "grad_norm": 0.8342312574386597, | |
| "learning_rate": 5.417404963709894e-05, | |
| "loss": 0.3755, | |
| "step": 7005 | |
| }, | |
| { | |
| "epoch": 1.4707730986800756, | |
| "grad_norm": 0.7615540027618408, | |
| "learning_rate": 5.400497345682857e-05, | |
| "loss": 0.3605, | |
| "step": 7020 | |
| }, | |
| { | |
| "epoch": 1.4739157762413577, | |
| "grad_norm": 0.8944594860076904, | |
| "learning_rate": 5.3835851172459794e-05, | |
| "loss": 0.3948, | |
| "step": 7035 | |
| }, | |
| { | |
| "epoch": 1.47705845380264, | |
| "grad_norm": 0.8412215113639832, | |
| "learning_rate": 5.36666847308796e-05, | |
| "loss": 0.3658, | |
| "step": 7050 | |
| }, | |
| { | |
| "epoch": 1.480201131363922, | |
| "grad_norm": 0.8457724452018738, | |
| "learning_rate": 5.34974760794832e-05, | |
| "loss": 0.4327, | |
| "step": 7065 | |
| }, | |
| { | |
| "epoch": 1.4833438089252042, | |
| "grad_norm": 0.7231891751289368, | |
| "learning_rate": 5.332822716615172e-05, | |
| "loss": 0.3489, | |
| "step": 7080 | |
| }, | |
| { | |
| "epoch": 1.4864864864864864, | |
| "grad_norm": 0.8975026607513428, | |
| "learning_rate": 5.315893993922986e-05, | |
| "loss": 0.331, | |
| "step": 7095 | |
| }, | |
| { | |
| "epoch": 1.4896291640477686, | |
| "grad_norm": 0.871842086315155, | |
| "learning_rate": 5.2989616347503244e-05, | |
| "loss": 0.4056, | |
| "step": 7110 | |
| }, | |
| { | |
| "epoch": 1.492771841609051, | |
| "grad_norm": 0.5846161246299744, | |
| "learning_rate": 5.282025834017623e-05, | |
| "loss": 0.381, | |
| "step": 7125 | |
| }, | |
| { | |
| "epoch": 1.4959145191703331, | |
| "grad_norm": 0.6650387644767761, | |
| "learning_rate": 5.265086786684929e-05, | |
| "loss": 0.34, | |
| "step": 7140 | |
| }, | |
| { | |
| "epoch": 1.4990571967316153, | |
| "grad_norm": 0.862241804599762, | |
| "learning_rate": 5.2481446877496665e-05, | |
| "loss": 0.354, | |
| "step": 7155 | |
| }, | |
| { | |
| "epoch": 1.5021998742928977, | |
| "grad_norm": 0.8328828811645508, | |
| "learning_rate": 5.231199732244386e-05, | |
| "loss": 0.3772, | |
| "step": 7170 | |
| }, | |
| { | |
| "epoch": 1.5053425518541799, | |
| "grad_norm": 0.5438669323921204, | |
| "learning_rate": 5.214252115234527e-05, | |
| "loss": 0.3493, | |
| "step": 7185 | |
| }, | |
| { | |
| "epoch": 1.508485229415462, | |
| "grad_norm": 0.7722681760787964, | |
| "learning_rate": 5.197302031816165e-05, | |
| "loss": 0.3494, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 1.5116279069767442, | |
| "grad_norm": 0.9693325161933899, | |
| "learning_rate": 5.180349677113762e-05, | |
| "loss": 0.3512, | |
| "step": 7215 | |
| }, | |
| { | |
| "epoch": 1.5147705845380264, | |
| "grad_norm": 1.0208348035812378, | |
| "learning_rate": 5.163395246277938e-05, | |
| "loss": 0.2772, | |
| "step": 7230 | |
| }, | |
| { | |
| "epoch": 1.5179132620993085, | |
| "grad_norm": 0.8255509734153748, | |
| "learning_rate": 5.1464389344832024e-05, | |
| "loss": 0.3491, | |
| "step": 7245 | |
| }, | |
| { | |
| "epoch": 1.5210559396605907, | |
| "grad_norm": 0.723574697971344, | |
| "learning_rate": 5.1294809369257244e-05, | |
| "loss": 0.3894, | |
| "step": 7260 | |
| }, | |
| { | |
| "epoch": 1.5241986172218729, | |
| "grad_norm": 0.8955418467521667, | |
| "learning_rate": 5.112521448821076e-05, | |
| "loss": 0.3722, | |
| "step": 7275 | |
| }, | |
| { | |
| "epoch": 1.5273412947831553, | |
| "grad_norm": 0.9446234703063965, | |
| "learning_rate": 5.0955606654019895e-05, | |
| "loss": 0.3602, | |
| "step": 7290 | |
| }, | |
| { | |
| "epoch": 1.5304839723444374, | |
| "grad_norm": 0.7256786227226257, | |
| "learning_rate": 5.078598781916107e-05, | |
| "loss": 0.3488, | |
| "step": 7305 | |
| }, | |
| { | |
| "epoch": 1.5336266499057196, | |
| "grad_norm": 0.775834858417511, | |
| "learning_rate": 5.0616359936237355e-05, | |
| "loss": 0.3983, | |
| "step": 7320 | |
| }, | |
| { | |
| "epoch": 1.536769327467002, | |
| "grad_norm": 0.7684575915336609, | |
| "learning_rate": 5.044672495795598e-05, | |
| "loss": 0.3992, | |
| "step": 7335 | |
| }, | |
| { | |
| "epoch": 1.5399120050282842, | |
| "grad_norm": 0.7569010853767395, | |
| "learning_rate": 5.0277084837105826e-05, | |
| "loss": 0.352, | |
| "step": 7350 | |
| }, | |
| { | |
| "epoch": 1.5430546825895664, | |
| "grad_norm": 0.7330282926559448, | |
| "learning_rate": 5.010744152653501e-05, | |
| "loss": 0.3486, | |
| "step": 7365 | |
| }, | |
| { | |
| "epoch": 1.5461973601508485, | |
| "grad_norm": 0.8921106457710266, | |
| "learning_rate": 4.993779697912837e-05, | |
| "loss": 0.3107, | |
| "step": 7380 | |
| }, | |
| { | |
| "epoch": 1.5493400377121307, | |
| "grad_norm": 0.7190592288970947, | |
| "learning_rate": 4.976815314778493e-05, | |
| "loss": 0.3429, | |
| "step": 7395 | |
| }, | |
| { | |
| "epoch": 1.5524827152734129, | |
| "grad_norm": 0.8145999312400818, | |
| "learning_rate": 4.9598511985395535e-05, | |
| "loss": 0.3455, | |
| "step": 7410 | |
| }, | |
| { | |
| "epoch": 1.555625392834695, | |
| "grad_norm": 0.7628950476646423, | |
| "learning_rate": 4.942887544482029e-05, | |
| "loss": 0.3362, | |
| "step": 7425 | |
| }, | |
| { | |
| "epoch": 1.5587680703959774, | |
| "grad_norm": 0.5859194993972778, | |
| "learning_rate": 4.925924547886603e-05, | |
| "loss": 0.3723, | |
| "step": 7440 | |
| }, | |
| { | |
| "epoch": 1.5619107479572596, | |
| "grad_norm": 0.7906526327133179, | |
| "learning_rate": 4.9089624040264013e-05, | |
| "loss": 0.3511, | |
| "step": 7455 | |
| }, | |
| { | |
| "epoch": 1.5650534255185418, | |
| "grad_norm": 0.7591722011566162, | |
| "learning_rate": 4.892001308164727e-05, | |
| "loss": 0.4439, | |
| "step": 7470 | |
| }, | |
| { | |
| "epoch": 1.5681961030798242, | |
| "grad_norm": 0.9237760901451111, | |
| "learning_rate": 4.875041455552817e-05, | |
| "loss": 0.3638, | |
| "step": 7485 | |
| }, | |
| { | |
| "epoch": 1.5713387806411063, | |
| "grad_norm": 0.734752893447876, | |
| "learning_rate": 4.858083041427599e-05, | |
| "loss": 0.4047, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 1.5744814582023885, | |
| "grad_norm": 0.676703155040741, | |
| "learning_rate": 4.8411262610094445e-05, | |
| "loss": 0.3566, | |
| "step": 7515 | |
| }, | |
| { | |
| "epoch": 1.5776241357636707, | |
| "grad_norm": 0.8751126527786255, | |
| "learning_rate": 4.824171309499913e-05, | |
| "loss": 0.3743, | |
| "step": 7530 | |
| }, | |
| { | |
| "epoch": 1.5807668133249528, | |
| "grad_norm": 0.6884835958480835, | |
| "learning_rate": 4.807218382079511e-05, | |
| "loss": 0.3821, | |
| "step": 7545 | |
| }, | |
| { | |
| "epoch": 1.583909490886235, | |
| "grad_norm": 0.8230961561203003, | |
| "learning_rate": 4.790267673905447e-05, | |
| "loss": 0.3193, | |
| "step": 7560 | |
| }, | |
| { | |
| "epoch": 1.5870521684475172, | |
| "grad_norm": 0.8046270608901978, | |
| "learning_rate": 4.7733193801093803e-05, | |
| "loss": 0.3714, | |
| "step": 7575 | |
| }, | |
| { | |
| "epoch": 1.5901948460087993, | |
| "grad_norm": 0.895897626876831, | |
| "learning_rate": 4.756373695795177e-05, | |
| "loss": 0.386, | |
| "step": 7590 | |
| }, | |
| { | |
| "epoch": 1.5933375235700817, | |
| "grad_norm": 0.8858537077903748, | |
| "learning_rate": 4.7394308160366617e-05, | |
| "loss": 0.3755, | |
| "step": 7605 | |
| }, | |
| { | |
| "epoch": 1.596480201131364, | |
| "grad_norm": 0.6874979138374329, | |
| "learning_rate": 4.722490935875377e-05, | |
| "loss": 0.3547, | |
| "step": 7620 | |
| }, | |
| { | |
| "epoch": 1.5996228786926463, | |
| "grad_norm": 0.8027022480964661, | |
| "learning_rate": 4.705554250318335e-05, | |
| "loss": 0.3702, | |
| "step": 7635 | |
| }, | |
| { | |
| "epoch": 1.6027655562539285, | |
| "grad_norm": 0.9383290410041809, | |
| "learning_rate": 4.688620954335766e-05, | |
| "loss": 0.4038, | |
| "step": 7650 | |
| }, | |
| { | |
| "epoch": 1.6059082338152106, | |
| "grad_norm": 0.8475779294967651, | |
| "learning_rate": 4.671691242858891e-05, | |
| "loss": 0.3257, | |
| "step": 7665 | |
| }, | |
| { | |
| "epoch": 1.6090509113764928, | |
| "grad_norm": 0.702893853187561, | |
| "learning_rate": 4.654765310777659e-05, | |
| "loss": 0.3642, | |
| "step": 7680 | |
| }, | |
| { | |
| "epoch": 1.612193588937775, | |
| "grad_norm": 0.7762289047241211, | |
| "learning_rate": 4.6378433529385157e-05, | |
| "loss": 0.3859, | |
| "step": 7695 | |
| }, | |
| { | |
| "epoch": 1.6153362664990571, | |
| "grad_norm": 0.7309826016426086, | |
| "learning_rate": 4.620925564142151e-05, | |
| "loss": 0.3427, | |
| "step": 7710 | |
| }, | |
| { | |
| "epoch": 1.6184789440603393, | |
| "grad_norm": 0.655974805355072, | |
| "learning_rate": 4.60401213914127e-05, | |
| "loss": 0.3893, | |
| "step": 7725 | |
| }, | |
| { | |
| "epoch": 1.6216216216216215, | |
| "grad_norm": 0.7434260845184326, | |
| "learning_rate": 4.5871032726383386e-05, | |
| "loss": 0.3528, | |
| "step": 7740 | |
| }, | |
| { | |
| "epoch": 1.6247642991829039, | |
| "grad_norm": 0.981696605682373, | |
| "learning_rate": 4.570199159283345e-05, | |
| "loss": 0.3792, | |
| "step": 7755 | |
| }, | |
| { | |
| "epoch": 1.627906976744186, | |
| "grad_norm": 0.5884058475494385, | |
| "learning_rate": 4.553299993671567e-05, | |
| "loss": 0.3082, | |
| "step": 7770 | |
| }, | |
| { | |
| "epoch": 1.6310496543054682, | |
| "grad_norm": 0.9349349737167358, | |
| "learning_rate": 4.536405970341317e-05, | |
| "loss": 0.3736, | |
| "step": 7785 | |
| }, | |
| { | |
| "epoch": 1.6341923318667506, | |
| "grad_norm": 0.8422302603721619, | |
| "learning_rate": 4.519517283771717e-05, | |
| "loss": 0.3897, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 1.6373350094280328, | |
| "grad_norm": 0.7569222450256348, | |
| "learning_rate": 4.502634128380448e-05, | |
| "loss": 0.3581, | |
| "step": 7815 | |
| }, | |
| { | |
| "epoch": 1.640477686989315, | |
| "grad_norm": 0.8034069538116455, | |
| "learning_rate": 4.4857566985215276e-05, | |
| "loss": 0.3542, | |
| "step": 7830 | |
| }, | |
| { | |
| "epoch": 1.6436203645505971, | |
| "grad_norm": 0.5547857284545898, | |
| "learning_rate": 4.4688851884830516e-05, | |
| "loss": 0.3089, | |
| "step": 7845 | |
| }, | |
| { | |
| "epoch": 1.6467630421118793, | |
| "grad_norm": 0.8145669102668762, | |
| "learning_rate": 4.452019792484975e-05, | |
| "loss": 0.3391, | |
| "step": 7860 | |
| }, | |
| { | |
| "epoch": 1.6499057196731615, | |
| "grad_norm": 0.672332227230072, | |
| "learning_rate": 4.4351607046768704e-05, | |
| "loss": 0.3866, | |
| "step": 7875 | |
| }, | |
| { | |
| "epoch": 1.6530483972344436, | |
| "grad_norm": 0.7952318787574768, | |
| "learning_rate": 4.418308119135686e-05, | |
| "loss": 0.4221, | |
| "step": 7890 | |
| }, | |
| { | |
| "epoch": 1.6561910747957258, | |
| "grad_norm": 0.7489158511161804, | |
| "learning_rate": 4.401462229863526e-05, | |
| "loss": 0.3687, | |
| "step": 7905 | |
| }, | |
| { | |
| "epoch": 1.6593337523570082, | |
| "grad_norm": 0.8457122445106506, | |
| "learning_rate": 4.3846232307854e-05, | |
| "loss": 0.3888, | |
| "step": 7920 | |
| }, | |
| { | |
| "epoch": 1.6624764299182904, | |
| "grad_norm": 0.7040199637413025, | |
| "learning_rate": 4.36779131574701e-05, | |
| "loss": 0.3437, | |
| "step": 7935 | |
| }, | |
| { | |
| "epoch": 1.6656191074795728, | |
| "grad_norm": 1.0369516611099243, | |
| "learning_rate": 4.3509666785125005e-05, | |
| "loss": 0.3557, | |
| "step": 7950 | |
| }, | |
| { | |
| "epoch": 1.668761785040855, | |
| "grad_norm": 0.7418217062950134, | |
| "learning_rate": 4.334149512762238e-05, | |
| "loss": 0.351, | |
| "step": 7965 | |
| }, | |
| { | |
| "epoch": 1.671904462602137, | |
| "grad_norm": 0.6527841687202454, | |
| "learning_rate": 4.3173400120905824e-05, | |
| "loss": 0.3286, | |
| "step": 7980 | |
| }, | |
| { | |
| "epoch": 1.6750471401634193, | |
| "grad_norm": 0.9062017798423767, | |
| "learning_rate": 4.3005383700036525e-05, | |
| "loss": 0.3828, | |
| "step": 7995 | |
| }, | |
| { | |
| "epoch": 1.6781898177247014, | |
| "grad_norm": 0.6981047987937927, | |
| "learning_rate": 4.283744779917102e-05, | |
| "loss": 0.3689, | |
| "step": 8010 | |
| }, | |
| { | |
| "epoch": 1.6813324952859836, | |
| "grad_norm": 0.8865767121315002, | |
| "learning_rate": 4.26695943515389e-05, | |
| "loss": 0.3912, | |
| "step": 8025 | |
| }, | |
| { | |
| "epoch": 1.6844751728472658, | |
| "grad_norm": 0.5835604667663574, | |
| "learning_rate": 4.250182528942065e-05, | |
| "loss": 0.317, | |
| "step": 8040 | |
| }, | |
| { | |
| "epoch": 1.687617850408548, | |
| "grad_norm": 0.869529128074646, | |
| "learning_rate": 4.233414254412525e-05, | |
| "loss": 0.4031, | |
| "step": 8055 | |
| }, | |
| { | |
| "epoch": 1.6907605279698303, | |
| "grad_norm": 0.7666299939155579, | |
| "learning_rate": 4.216654804596808e-05, | |
| "loss": 0.3635, | |
| "step": 8070 | |
| }, | |
| { | |
| "epoch": 1.6939032055311125, | |
| "grad_norm": 0.6868289709091187, | |
| "learning_rate": 4.199904372424858e-05, | |
| "loss": 0.3554, | |
| "step": 8085 | |
| }, | |
| { | |
| "epoch": 1.6970458830923947, | |
| "grad_norm": 0.7406291961669922, | |
| "learning_rate": 4.183163150722822e-05, | |
| "loss": 0.3216, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 1.700188560653677, | |
| "grad_norm": 0.7962248921394348, | |
| "learning_rate": 4.166431332210807e-05, | |
| "loss": 0.3398, | |
| "step": 8115 | |
| }, | |
| { | |
| "epoch": 1.7033312382149592, | |
| "grad_norm": 1.02495276927948, | |
| "learning_rate": 4.149709109500678e-05, | |
| "loss": 0.3817, | |
| "step": 8130 | |
| }, | |
| { | |
| "epoch": 1.7064739157762414, | |
| "grad_norm": 0.7741113305091858, | |
| "learning_rate": 4.13299667509384e-05, | |
| "loss": 0.4072, | |
| "step": 8145 | |
| }, | |
| { | |
| "epoch": 1.7096165933375236, | |
| "grad_norm": 0.7952526807785034, | |
| "learning_rate": 4.1162942213790086e-05, | |
| "loss": 0.3441, | |
| "step": 8160 | |
| }, | |
| { | |
| "epoch": 1.7127592708988058, | |
| "grad_norm": 0.7849689722061157, | |
| "learning_rate": 4.0996019406300126e-05, | |
| "loss": 0.3417, | |
| "step": 8175 | |
| }, | |
| { | |
| "epoch": 1.715901948460088, | |
| "grad_norm": 0.7431788444519043, | |
| "learning_rate": 4.082920025003567e-05, | |
| "loss": 0.3995, | |
| "step": 8190 | |
| }, | |
| { | |
| "epoch": 1.71904462602137, | |
| "grad_norm": 0.7709872126579285, | |
| "learning_rate": 4.0662486665370734e-05, | |
| "loss": 0.4069, | |
| "step": 8205 | |
| }, | |
| { | |
| "epoch": 1.7221873035826523, | |
| "grad_norm": 0.6013693809509277, | |
| "learning_rate": 4.049588057146394e-05, | |
| "loss": 0.3877, | |
| "step": 8220 | |
| }, | |
| { | |
| "epoch": 1.7253299811439347, | |
| "grad_norm": 0.7985032796859741, | |
| "learning_rate": 4.032938388623657e-05, | |
| "loss": 0.3407, | |
| "step": 8235 | |
| }, | |
| { | |
| "epoch": 1.7284726587052168, | |
| "grad_norm": 0.6259362101554871, | |
| "learning_rate": 4.01629985263504e-05, | |
| "loss": 0.3167, | |
| "step": 8250 | |
| }, | |
| { | |
| "epoch": 1.7316153362664992, | |
| "grad_norm": 0.7632457613945007, | |
| "learning_rate": 3.999672640718567e-05, | |
| "loss": 0.365, | |
| "step": 8265 | |
| }, | |
| { | |
| "epoch": 1.7347580138277814, | |
| "grad_norm": 0.9532593488693237, | |
| "learning_rate": 3.983056944281901e-05, | |
| "loss": 0.427, | |
| "step": 8280 | |
| }, | |
| { | |
| "epoch": 1.7379006913890636, | |
| "grad_norm": 0.7168596386909485, | |
| "learning_rate": 3.966452954600142e-05, | |
| "loss": 0.3776, | |
| "step": 8295 | |
| }, | |
| { | |
| "epoch": 1.7410433689503457, | |
| "grad_norm": 0.753966748714447, | |
| "learning_rate": 3.94986086281363e-05, | |
| "loss": 0.3792, | |
| "step": 8310 | |
| }, | |
| { | |
| "epoch": 1.744186046511628, | |
| "grad_norm": 0.38063740730285645, | |
| "learning_rate": 3.933280859925734e-05, | |
| "loss": 0.3499, | |
| "step": 8325 | |
| }, | |
| { | |
| "epoch": 1.74732872407291, | |
| "grad_norm": 0.8001086711883545, | |
| "learning_rate": 3.916713136800659e-05, | |
| "loss": 0.3491, | |
| "step": 8340 | |
| }, | |
| { | |
| "epoch": 1.7504714016341922, | |
| "grad_norm": 0.7394033074378967, | |
| "learning_rate": 3.900157884161255e-05, | |
| "loss": 0.3383, | |
| "step": 8355 | |
| }, | |
| { | |
| "epoch": 1.7536140791954744, | |
| "grad_norm": 0.7337818741798401, | |
| "learning_rate": 3.8836152925868114e-05, | |
| "loss": 0.3705, | |
| "step": 8370 | |
| }, | |
| { | |
| "epoch": 1.7567567567567568, | |
| "grad_norm": 0.7671971917152405, | |
| "learning_rate": 3.867085552510864e-05, | |
| "loss": 0.3125, | |
| "step": 8385 | |
| }, | |
| { | |
| "epoch": 1.759899434318039, | |
| "grad_norm": 0.8018542528152466, | |
| "learning_rate": 3.850568854219011e-05, | |
| "loss": 0.3678, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 1.7630421118793211, | |
| "grad_norm": 0.8364083766937256, | |
| "learning_rate": 3.834065387846718e-05, | |
| "loss": 0.4179, | |
| "step": 8415 | |
| }, | |
| { | |
| "epoch": 1.7661847894406035, | |
| "grad_norm": 0.8526837825775146, | |
| "learning_rate": 3.817575343377122e-05, | |
| "loss": 0.3881, | |
| "step": 8430 | |
| }, | |
| { | |
| "epoch": 1.7693274670018857, | |
| "grad_norm": 0.6416676640510559, | |
| "learning_rate": 3.8010989106388554e-05, | |
| "loss": 0.3099, | |
| "step": 8445 | |
| }, | |
| { | |
| "epoch": 1.7724701445631679, | |
| "grad_norm": 0.7990739941596985, | |
| "learning_rate": 3.784636279303858e-05, | |
| "loss": 0.3598, | |
| "step": 8460 | |
| }, | |
| { | |
| "epoch": 1.77561282212445, | |
| "grad_norm": 0.8872657418251038, | |
| "learning_rate": 3.76818763888519e-05, | |
| "loss": 0.3882, | |
| "step": 8475 | |
| }, | |
| { | |
| "epoch": 1.7787554996857322, | |
| "grad_norm": 0.8712546229362488, | |
| "learning_rate": 3.7517531787348484e-05, | |
| "loss": 0.3773, | |
| "step": 8490 | |
| }, | |
| { | |
| "epoch": 1.7818981772470144, | |
| "grad_norm": 0.7423908710479736, | |
| "learning_rate": 3.735333088041596e-05, | |
| "loss": 0.3777, | |
| "step": 8505 | |
| }, | |
| { | |
| "epoch": 1.7850408548082966, | |
| "grad_norm": 0.9166727066040039, | |
| "learning_rate": 3.718927555828779e-05, | |
| "loss": 0.4059, | |
| "step": 8520 | |
| }, | |
| { | |
| "epoch": 1.7881835323695787, | |
| "grad_norm": 0.7207896113395691, | |
| "learning_rate": 3.702536770952148e-05, | |
| "loss": 0.3754, | |
| "step": 8535 | |
| }, | |
| { | |
| "epoch": 1.7913262099308611, | |
| "grad_norm": 0.844727635383606, | |
| "learning_rate": 3.6861609220976846e-05, | |
| "loss": 0.3328, | |
| "step": 8550 | |
| }, | |
| { | |
| "epoch": 1.7944688874921433, | |
| "grad_norm": 0.7674320340156555, | |
| "learning_rate": 3.6698001977794366e-05, | |
| "loss": 0.3806, | |
| "step": 8565 | |
| }, | |
| { | |
| "epoch": 1.7976115650534257, | |
| "grad_norm": 0.6307094693183899, | |
| "learning_rate": 3.6534547863373394e-05, | |
| "loss": 0.3694, | |
| "step": 8580 | |
| }, | |
| { | |
| "epoch": 1.8007542426147078, | |
| "grad_norm": 0.767432451248169, | |
| "learning_rate": 3.63712487593505e-05, | |
| "loss": 0.4028, | |
| "step": 8595 | |
| }, | |
| { | |
| "epoch": 1.80389692017599, | |
| "grad_norm": 0.8937990665435791, | |
| "learning_rate": 3.6208106545577824e-05, | |
| "loss": 0.3372, | |
| "step": 8610 | |
| }, | |
| { | |
| "epoch": 1.8070395977372722, | |
| "grad_norm": 0.590930163860321, | |
| "learning_rate": 3.604512310010146e-05, | |
| "loss": 0.3684, | |
| "step": 8625 | |
| }, | |
| { | |
| "epoch": 1.8101822752985544, | |
| "grad_norm": 0.8184636831283569, | |
| "learning_rate": 3.58823002991398e-05, | |
| "loss": 0.373, | |
| "step": 8640 | |
| }, | |
| { | |
| "epoch": 1.8133249528598365, | |
| "grad_norm": 0.9741955399513245, | |
| "learning_rate": 3.5719640017061885e-05, | |
| "loss": 0.3374, | |
| "step": 8655 | |
| }, | |
| { | |
| "epoch": 1.8164676304211187, | |
| "grad_norm": 1.0014973878860474, | |
| "learning_rate": 3.555714412636595e-05, | |
| "loss": 0.3848, | |
| "step": 8670 | |
| }, | |
| { | |
| "epoch": 1.8196103079824009, | |
| "grad_norm": 0.6335365772247314, | |
| "learning_rate": 3.53948144976578e-05, | |
| "loss": 0.3689, | |
| "step": 8685 | |
| }, | |
| { | |
| "epoch": 1.8227529855436833, | |
| "grad_norm": 0.5687909722328186, | |
| "learning_rate": 3.523265299962924e-05, | |
| "loss": 0.4178, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 1.8258956631049654, | |
| "grad_norm": 0.8622750043869019, | |
| "learning_rate": 3.507066149903662e-05, | |
| "loss": 0.3899, | |
| "step": 8715 | |
| }, | |
| { | |
| "epoch": 1.8290383406662476, | |
| "grad_norm": 0.7984293699264526, | |
| "learning_rate": 3.490884186067935e-05, | |
| "loss": 0.4353, | |
| "step": 8730 | |
| }, | |
| { | |
| "epoch": 1.83218101822753, | |
| "grad_norm": 0.7962972521781921, | |
| "learning_rate": 3.474719594737842e-05, | |
| "loss": 0.3324, | |
| "step": 8745 | |
| }, | |
| { | |
| "epoch": 1.8353236957888122, | |
| "grad_norm": 0.7194257974624634, | |
| "learning_rate": 3.4585725619954864e-05, | |
| "loss": 0.3765, | |
| "step": 8760 | |
| }, | |
| { | |
| "epoch": 1.8384663733500943, | |
| "grad_norm": 0.6931387782096863, | |
| "learning_rate": 3.442443273720853e-05, | |
| "loss": 0.3183, | |
| "step": 8775 | |
| }, | |
| { | |
| "epoch": 1.8416090509113765, | |
| "grad_norm": 0.7540430426597595, | |
| "learning_rate": 3.426331915589651e-05, | |
| "loss": 0.3975, | |
| "step": 8790 | |
| }, | |
| { | |
| "epoch": 1.8447517284726587, | |
| "grad_norm": 0.7310993671417236, | |
| "learning_rate": 3.410238673071185e-05, | |
| "loss": 0.3975, | |
| "step": 8805 | |
| }, | |
| { | |
| "epoch": 1.8478944060339408, | |
| "grad_norm": 0.7351768612861633, | |
| "learning_rate": 3.394163731426216e-05, | |
| "loss": 0.3558, | |
| "step": 8820 | |
| }, | |
| { | |
| "epoch": 1.851037083595223, | |
| "grad_norm": 0.7860934138298035, | |
| "learning_rate": 3.378107275704834e-05, | |
| "loss": 0.3601, | |
| "step": 8835 | |
| }, | |
| { | |
| "epoch": 1.8541797611565052, | |
| "grad_norm": 0.6049594283103943, | |
| "learning_rate": 3.362069490744322e-05, | |
| "loss": 0.3692, | |
| "step": 8850 | |
| }, | |
| { | |
| "epoch": 1.8573224387177876, | |
| "grad_norm": 0.9184178709983826, | |
| "learning_rate": 3.346050561167029e-05, | |
| "loss": 0.3518, | |
| "step": 8865 | |
| }, | |
| { | |
| "epoch": 1.8604651162790697, | |
| "grad_norm": 0.7558075189590454, | |
| "learning_rate": 3.3300506713782495e-05, | |
| "loss": 0.3587, | |
| "step": 8880 | |
| }, | |
| { | |
| "epoch": 1.8636077938403521, | |
| "grad_norm": 0.7545658349990845, | |
| "learning_rate": 3.314070005564097e-05, | |
| "loss": 0.3679, | |
| "step": 8895 | |
| }, | |
| { | |
| "epoch": 1.8667504714016343, | |
| "grad_norm": 0.9135695695877075, | |
| "learning_rate": 3.2981087476893853e-05, | |
| "loss": 0.3725, | |
| "step": 8910 | |
| }, | |
| { | |
| "epoch": 1.8698931489629165, | |
| "grad_norm": 0.9788998961448669, | |
| "learning_rate": 3.2821670814955026e-05, | |
| "loss": 0.3149, | |
| "step": 8925 | |
| }, | |
| { | |
| "epoch": 1.8730358265241986, | |
| "grad_norm": 0.7953155636787415, | |
| "learning_rate": 3.266245190498311e-05, | |
| "loss": 0.3461, | |
| "step": 8940 | |
| }, | |
| { | |
| "epoch": 1.8761785040854808, | |
| "grad_norm": 0.9166163802146912, | |
| "learning_rate": 3.250343257986027e-05, | |
| "loss": 0.3866, | |
| "step": 8955 | |
| }, | |
| { | |
| "epoch": 1.879321181646763, | |
| "grad_norm": 0.9379754066467285, | |
| "learning_rate": 3.2344614670171025e-05, | |
| "loss": 0.3928, | |
| "step": 8970 | |
| }, | |
| { | |
| "epoch": 1.8824638592080452, | |
| "grad_norm": 0.8782539963722229, | |
| "learning_rate": 3.2186000004181314e-05, | |
| "loss": 0.3959, | |
| "step": 8985 | |
| }, | |
| { | |
| "epoch": 1.8856065367693273, | |
| "grad_norm": 0.7237117886543274, | |
| "learning_rate": 3.2027590407817407e-05, | |
| "loss": 0.3458, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 1.8887492143306097, | |
| "grad_norm": 0.8787809014320374, | |
| "learning_rate": 3.186938770464486e-05, | |
| "loss": 0.4081, | |
| "step": 9015 | |
| }, | |
| { | |
| "epoch": 1.8918918918918919, | |
| "grad_norm": 0.7628602981567383, | |
| "learning_rate": 3.1711393715847476e-05, | |
| "loss": 0.3928, | |
| "step": 9030 | |
| }, | |
| { | |
| "epoch": 1.895034569453174, | |
| "grad_norm": 0.9172194600105286, | |
| "learning_rate": 3.15536102602065e-05, | |
| "loss": 0.3777, | |
| "step": 9045 | |
| }, | |
| { | |
| "epoch": 1.8981772470144564, | |
| "grad_norm": 0.8413445353507996, | |
| "learning_rate": 3.13960391540795e-05, | |
| "loss": 0.36, | |
| "step": 9060 | |
| }, | |
| { | |
| "epoch": 1.9013199245757386, | |
| "grad_norm": 0.9793257117271423, | |
| "learning_rate": 3.1238682211379586e-05, | |
| "loss": 0.3801, | |
| "step": 9075 | |
| }, | |
| { | |
| "epoch": 1.9044626021370208, | |
| "grad_norm": 0.7620652318000793, | |
| "learning_rate": 3.1081541243554427e-05, | |
| "loss": 0.3689, | |
| "step": 9090 | |
| }, | |
| { | |
| "epoch": 1.907605279698303, | |
| "grad_norm": 0.8353012800216675, | |
| "learning_rate": 3.092461805956551e-05, | |
| "loss": 0.3961, | |
| "step": 9105 | |
| }, | |
| { | |
| "epoch": 1.9107479572595851, | |
| "grad_norm": 0.8704758882522583, | |
| "learning_rate": 3.0767914465867246e-05, | |
| "loss": 0.3168, | |
| "step": 9120 | |
| }, | |
| { | |
| "epoch": 1.9138906348208673, | |
| "grad_norm": 0.6754759550094604, | |
| "learning_rate": 3.061143226638611e-05, | |
| "loss": 0.3407, | |
| "step": 9135 | |
| }, | |
| { | |
| "epoch": 1.9170333123821495, | |
| "grad_norm": 0.9682889580726624, | |
| "learning_rate": 3.0455173262500093e-05, | |
| "loss": 0.4251, | |
| "step": 9150 | |
| }, | |
| { | |
| "epoch": 1.9201759899434316, | |
| "grad_norm": 0.8114556670188904, | |
| "learning_rate": 3.0299139253017695e-05, | |
| "loss": 0.3397, | |
| "step": 9165 | |
| }, | |
| { | |
| "epoch": 1.923318667504714, | |
| "grad_norm": 0.8123522996902466, | |
| "learning_rate": 3.014333203415741e-05, | |
| "loss": 0.3372, | |
| "step": 9180 | |
| }, | |
| { | |
| "epoch": 1.9264613450659962, | |
| "grad_norm": 0.6080268025398254, | |
| "learning_rate": 2.9987753399526934e-05, | |
| "loss": 0.3506, | |
| "step": 9195 | |
| }, | |
| { | |
| "epoch": 1.9296040226272786, | |
| "grad_norm": 0.8804168701171875, | |
| "learning_rate": 2.9832405140102637e-05, | |
| "loss": 0.3689, | |
| "step": 9210 | |
| }, | |
| { | |
| "epoch": 1.9327467001885608, | |
| "grad_norm": 0.8579033613204956, | |
| "learning_rate": 2.9677289044208833e-05, | |
| "loss": 0.3875, | |
| "step": 9225 | |
| }, | |
| { | |
| "epoch": 1.935889377749843, | |
| "grad_norm": 0.9520317316055298, | |
| "learning_rate": 2.952240689749722e-05, | |
| "loss": 0.422, | |
| "step": 9240 | |
| }, | |
| { | |
| "epoch": 1.939032055311125, | |
| "grad_norm": 0.9517824053764343, | |
| "learning_rate": 2.9367760482926393e-05, | |
| "loss": 0.3917, | |
| "step": 9255 | |
| }, | |
| { | |
| "epoch": 1.9421747328724073, | |
| "grad_norm": 0.8813058733940125, | |
| "learning_rate": 2.921335158074122e-05, | |
| "loss": 0.3551, | |
| "step": 9270 | |
| }, | |
| { | |
| "epoch": 1.9453174104336894, | |
| "grad_norm": 0.8402652144432068, | |
| "learning_rate": 2.905918196845242e-05, | |
| "loss": 0.3468, | |
| "step": 9285 | |
| }, | |
| { | |
| "epoch": 1.9484600879949716, | |
| "grad_norm": 0.855032205581665, | |
| "learning_rate": 2.8905253420816035e-05, | |
| "loss": 0.3534, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 1.9516027655562538, | |
| "grad_norm": 0.7760915756225586, | |
| "learning_rate": 2.875156770981311e-05, | |
| "loss": 0.348, | |
| "step": 9315 | |
| }, | |
| { | |
| "epoch": 1.9547454431175362, | |
| "grad_norm": 0.946934163570404, | |
| "learning_rate": 2.8598126604629195e-05, | |
| "loss": 0.3556, | |
| "step": 9330 | |
| }, | |
| { | |
| "epoch": 1.9578881206788183, | |
| "grad_norm": 0.7589976191520691, | |
| "learning_rate": 2.844493187163395e-05, | |
| "loss": 0.3944, | |
| "step": 9345 | |
| }, | |
| { | |
| "epoch": 1.9610307982401005, | |
| "grad_norm": 0.8831868171691895, | |
| "learning_rate": 2.8291985274360983e-05, | |
| "loss": 0.3192, | |
| "step": 9360 | |
| }, | |
| { | |
| "epoch": 1.964173475801383, | |
| "grad_norm": 0.8260477781295776, | |
| "learning_rate": 2.8139288573487337e-05, | |
| "loss": 0.3476, | |
| "step": 9375 | |
| }, | |
| { | |
| "epoch": 1.967316153362665, | |
| "grad_norm": 0.9583712816238403, | |
| "learning_rate": 2.7986843526813343e-05, | |
| "loss": 0.3112, | |
| "step": 9390 | |
| }, | |
| { | |
| "epoch": 1.9704588309239472, | |
| "grad_norm": 0.8534590005874634, | |
| "learning_rate": 2.783465188924239e-05, | |
| "loss": 0.3738, | |
| "step": 9405 | |
| }, | |
| { | |
| "epoch": 1.9736015084852294, | |
| "grad_norm": 0.8562766909599304, | |
| "learning_rate": 2.7682715412760696e-05, | |
| "loss": 0.3831, | |
| "step": 9420 | |
| }, | |
| { | |
| "epoch": 1.9767441860465116, | |
| "grad_norm": 0.649868905544281, | |
| "learning_rate": 2.7531035846417107e-05, | |
| "loss": 0.379, | |
| "step": 9435 | |
| }, | |
| { | |
| "epoch": 1.9798868636077938, | |
| "grad_norm": 0.7702896595001221, | |
| "learning_rate": 2.7379614936302982e-05, | |
| "loss": 0.3617, | |
| "step": 9450 | |
| }, | |
| { | |
| "epoch": 1.983029541169076, | |
| "grad_norm": 0.9378584623336792, | |
| "learning_rate": 2.7228454425532157e-05, | |
| "loss": 0.3681, | |
| "step": 9465 | |
| }, | |
| { | |
| "epoch": 1.9861722187303583, | |
| "grad_norm": 1.0069222450256348, | |
| "learning_rate": 2.7077556054220804e-05, | |
| "loss": 0.3356, | |
| "step": 9480 | |
| }, | |
| { | |
| "epoch": 1.9893148962916405, | |
| "grad_norm": 0.9345496892929077, | |
| "learning_rate": 2.6926921559467412e-05, | |
| "loss": 0.3974, | |
| "step": 9495 | |
| }, | |
| { | |
| "epoch": 1.9924575738529227, | |
| "grad_norm": 0.8090453147888184, | |
| "learning_rate": 2.6776552675332768e-05, | |
| "loss": 0.3397, | |
| "step": 9510 | |
| }, | |
| { | |
| "epoch": 1.995600251414205, | |
| "grad_norm": 0.647416353225708, | |
| "learning_rate": 2.6626451132820085e-05, | |
| "loss": 0.3259, | |
| "step": 9525 | |
| }, | |
| { | |
| "epoch": 1.9987429289754872, | |
| "grad_norm": 0.7810280323028564, | |
| "learning_rate": 2.6476618659855023e-05, | |
| "loss": 0.3234, | |
| "step": 9540 | |
| }, | |
| { | |
| "epoch": 2.0018856065367694, | |
| "grad_norm": 0.7231355309486389, | |
| "learning_rate": 2.6327056981265708e-05, | |
| "loss": 0.3276, | |
| "step": 9555 | |
| }, | |
| { | |
| "epoch": 2.0050282840980516, | |
| "grad_norm": 0.7072864174842834, | |
| "learning_rate": 2.6177767818763062e-05, | |
| "loss": 0.2683, | |
| "step": 9570 | |
| }, | |
| { | |
| "epoch": 2.0081709616593337, | |
| "grad_norm": 0.8502817749977112, | |
| "learning_rate": 2.6028752890920783e-05, | |
| "loss": 0.2844, | |
| "step": 9585 | |
| }, | |
| { | |
| "epoch": 2.011313639220616, | |
| "grad_norm": 0.6001257300376892, | |
| "learning_rate": 2.5880013913155743e-05, | |
| "loss": 0.2582, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 2.014456316781898, | |
| "grad_norm": 1.037467360496521, | |
| "learning_rate": 2.5731552597708086e-05, | |
| "loss": 0.2666, | |
| "step": 9615 | |
| }, | |
| { | |
| "epoch": 2.0175989943431802, | |
| "grad_norm": 0.990047812461853, | |
| "learning_rate": 2.5583370653621652e-05, | |
| "loss": 0.3042, | |
| "step": 9630 | |
| }, | |
| { | |
| "epoch": 2.0207416719044624, | |
| "grad_norm": 1.0518317222595215, | |
| "learning_rate": 2.5435469786724204e-05, | |
| "loss": 0.2543, | |
| "step": 9645 | |
| }, | |
| { | |
| "epoch": 2.023884349465745, | |
| "grad_norm": 1.225774884223938, | |
| "learning_rate": 2.528785169960779e-05, | |
| "loss": 0.3183, | |
| "step": 9660 | |
| }, | |
| { | |
| "epoch": 2.027027027027027, | |
| "grad_norm": 0.9525572061538696, | |
| "learning_rate": 2.5140518091609256e-05, | |
| "loss": 0.3426, | |
| "step": 9675 | |
| }, | |
| { | |
| "epoch": 2.0301697045883094, | |
| "grad_norm": 1.0750566720962524, | |
| "learning_rate": 2.4993470658790573e-05, | |
| "loss": 0.3172, | |
| "step": 9690 | |
| }, | |
| { | |
| "epoch": 2.0333123821495915, | |
| "grad_norm": 0.8268773555755615, | |
| "learning_rate": 2.484671109391933e-05, | |
| "loss": 0.31, | |
| "step": 9705 | |
| }, | |
| { | |
| "epoch": 2.0364550597108737, | |
| "grad_norm": 0.679678201675415, | |
| "learning_rate": 2.470024108644925e-05, | |
| "loss": 0.2868, | |
| "step": 9720 | |
| }, | |
| { | |
| "epoch": 2.039597737272156, | |
| "grad_norm": 0.997440755367279, | |
| "learning_rate": 2.4554062322500797e-05, | |
| "loss": 0.3291, | |
| "step": 9735 | |
| }, | |
| { | |
| "epoch": 2.042740414833438, | |
| "grad_norm": 0.9968817830085754, | |
| "learning_rate": 2.4408176484841732e-05, | |
| "loss": 0.2664, | |
| "step": 9750 | |
| }, | |
| { | |
| "epoch": 2.04588309239472, | |
| "grad_norm": 1.0939124822616577, | |
| "learning_rate": 2.4262585252867686e-05, | |
| "loss": 0.2895, | |
| "step": 9765 | |
| }, | |
| { | |
| "epoch": 2.0490257699560024, | |
| "grad_norm": 1.0220900774002075, | |
| "learning_rate": 2.4117290302582872e-05, | |
| "loss": 0.3191, | |
| "step": 9780 | |
| }, | |
| { | |
| "epoch": 2.0521684475172846, | |
| "grad_norm": 0.635898768901825, | |
| "learning_rate": 2.397229330658084e-05, | |
| "loss": 0.307, | |
| "step": 9795 | |
| }, | |
| { | |
| "epoch": 2.0553111250785667, | |
| "grad_norm": 1.112257719039917, | |
| "learning_rate": 2.382759593402517e-05, | |
| "loss": 0.2748, | |
| "step": 9810 | |
| }, | |
| { | |
| "epoch": 2.0584538026398493, | |
| "grad_norm": 0.9440275430679321, | |
| "learning_rate": 2.3683199850630213e-05, | |
| "loss": 0.2893, | |
| "step": 9825 | |
| }, | |
| { | |
| "epoch": 2.0615964802011315, | |
| "grad_norm": 1.2118226289749146, | |
| "learning_rate": 2.3539106718642034e-05, | |
| "loss": 0.2791, | |
| "step": 9840 | |
| }, | |
| { | |
| "epoch": 2.0647391577624137, | |
| "grad_norm": 1.1374374628067017, | |
| "learning_rate": 2.339531819681914e-05, | |
| "loss": 0.2777, | |
| "step": 9855 | |
| }, | |
| { | |
| "epoch": 2.067881835323696, | |
| "grad_norm": 0.6932136416435242, | |
| "learning_rate": 2.3251835940413517e-05, | |
| "loss": 0.2828, | |
| "step": 9870 | |
| }, | |
| { | |
| "epoch": 2.071024512884978, | |
| "grad_norm": 1.0308489799499512, | |
| "learning_rate": 2.310866160115146e-05, | |
| "loss": 0.2947, | |
| "step": 9885 | |
| }, | |
| { | |
| "epoch": 2.07416719044626, | |
| "grad_norm": 1.063235878944397, | |
| "learning_rate": 2.2965796827214665e-05, | |
| "loss": 0.3204, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 2.0773098680075424, | |
| "grad_norm": 1.1612193584442139, | |
| "learning_rate": 2.282324326322115e-05, | |
| "loss": 0.2976, | |
| "step": 9915 | |
| }, | |
| { | |
| "epoch": 2.0804525455688245, | |
| "grad_norm": 0.8928938508033752, | |
| "learning_rate": 2.2681002550206355e-05, | |
| "loss": 0.2921, | |
| "step": 9930 | |
| }, | |
| { | |
| "epoch": 2.0835952231301067, | |
| "grad_norm": 1.066124677658081, | |
| "learning_rate": 2.253907632560439e-05, | |
| "loss": 0.298, | |
| "step": 9945 | |
| }, | |
| { | |
| "epoch": 2.086737900691389, | |
| "grad_norm": 0.8713576197624207, | |
| "learning_rate": 2.2397466223228947e-05, | |
| "loss": 0.275, | |
| "step": 9960 | |
| }, | |
| { | |
| "epoch": 2.0898805782526715, | |
| "grad_norm": 1.1056296825408936, | |
| "learning_rate": 2.2256173873254643e-05, | |
| "loss": 0.3266, | |
| "step": 9975 | |
| }, | |
| { | |
| "epoch": 2.0930232558139537, | |
| "grad_norm": 0.9172502160072327, | |
| "learning_rate": 2.211520090219821e-05, | |
| "loss": 0.2731, | |
| "step": 9990 | |
| }, | |
| { | |
| "epoch": 2.0951183741881416, | |
| "eval_accuracy": 0.009820309467613697, | |
| "eval_loss": 0.4190310835838318, | |
| "eval_runtime": 424.9528, | |
| "eval_samples_per_second": 11.26, | |
| "eval_steps_per_second": 2.817, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 2.096165933375236, | |
| "grad_norm": 0.9003602862358093, | |
| "learning_rate": 2.1974548932899814e-05, | |
| "loss": 0.2534, | |
| "step": 10005 | |
| }, | |
| { | |
| "epoch": 2.099308610936518, | |
| "grad_norm": 1.0138850212097168, | |
| "learning_rate": 2.1834219584504345e-05, | |
| "loss": 0.2847, | |
| "step": 10020 | |
| }, | |
| { | |
| "epoch": 2.1024512884978, | |
| "grad_norm": 0.8467048406600952, | |
| "learning_rate": 2.169421447244272e-05, | |
| "loss": 0.3011, | |
| "step": 10035 | |
| }, | |
| { | |
| "epoch": 2.1055939660590823, | |
| "grad_norm": 1.1273193359375, | |
| "learning_rate": 2.1554535208413406e-05, | |
| "loss": 0.3181, | |
| "step": 10050 | |
| }, | |
| { | |
| "epoch": 2.1087366436203645, | |
| "grad_norm": 1.1201776266098022, | |
| "learning_rate": 2.1415183400363748e-05, | |
| "loss": 0.3122, | |
| "step": 10065 | |
| }, | |
| { | |
| "epoch": 2.1118793211816467, | |
| "grad_norm": 1.0749905109405518, | |
| "learning_rate": 2.1276160652471555e-05, | |
| "loss": 0.3357, | |
| "step": 10080 | |
| }, | |
| { | |
| "epoch": 2.115021998742929, | |
| "grad_norm": 0.874462366104126, | |
| "learning_rate": 2.1137468565126543e-05, | |
| "loss": 0.3014, | |
| "step": 10095 | |
| }, | |
| { | |
| "epoch": 2.118164676304211, | |
| "grad_norm": 1.0569285154342651, | |
| "learning_rate": 2.099910873491202e-05, | |
| "loss": 0.2945, | |
| "step": 10110 | |
| }, | |
| { | |
| "epoch": 2.121307353865493, | |
| "grad_norm": 0.9067788124084473, | |
| "learning_rate": 2.0861082754586382e-05, | |
| "loss": 0.3218, | |
| "step": 10125 | |
| }, | |
| { | |
| "epoch": 2.124450031426776, | |
| "grad_norm": 1.2187013626098633, | |
| "learning_rate": 2.0723392213064884e-05, | |
| "loss": 0.3065, | |
| "step": 10140 | |
| }, | |
| { | |
| "epoch": 2.127592708988058, | |
| "grad_norm": 1.0931589603424072, | |
| "learning_rate": 2.0586038695401317e-05, | |
| "loss": 0.2792, | |
| "step": 10155 | |
| }, | |
| { | |
| "epoch": 2.13073538654934, | |
| "grad_norm": 1.2825082540512085, | |
| "learning_rate": 2.0449023782769706e-05, | |
| "loss": 0.3138, | |
| "step": 10170 | |
| }, | |
| { | |
| "epoch": 2.1338780641106223, | |
| "grad_norm": 1.0086079835891724, | |
| "learning_rate": 2.031234905244618e-05, | |
| "loss": 0.3079, | |
| "step": 10185 | |
| }, | |
| { | |
| "epoch": 2.1370207416719045, | |
| "grad_norm": 0.7740280032157898, | |
| "learning_rate": 2.017601607779074e-05, | |
| "loss": 0.2704, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 2.1401634192331866, | |
| "grad_norm": 0.7861264944076538, | |
| "learning_rate": 2.0040026428229313e-05, | |
| "loss": 0.296, | |
| "step": 10215 | |
| }, | |
| { | |
| "epoch": 2.143306096794469, | |
| "grad_norm": 0.8179210424423218, | |
| "learning_rate": 1.9904381669235456e-05, | |
| "loss": 0.296, | |
| "step": 10230 | |
| }, | |
| { | |
| "epoch": 2.146448774355751, | |
| "grad_norm": 1.410079002380371, | |
| "learning_rate": 1.976908336231245e-05, | |
| "loss": 0.2836, | |
| "step": 10245 | |
| }, | |
| { | |
| "epoch": 2.149591451917033, | |
| "grad_norm": 1.082899570465088, | |
| "learning_rate": 1.9634133064975402e-05, | |
| "loss": 0.2848, | |
| "step": 10260 | |
| }, | |
| { | |
| "epoch": 2.1527341294783153, | |
| "grad_norm": 0.9219628572463989, | |
| "learning_rate": 1.9499532330733135e-05, | |
| "loss": 0.3255, | |
| "step": 10275 | |
| }, | |
| { | |
| "epoch": 2.155876807039598, | |
| "grad_norm": 0.9849101901054382, | |
| "learning_rate": 1.9365282709070487e-05, | |
| "loss": 0.3336, | |
| "step": 10290 | |
| }, | |
| { | |
| "epoch": 2.15901948460088, | |
| "grad_norm": 0.8761511445045471, | |
| "learning_rate": 1.9231385745430308e-05, | |
| "loss": 0.3128, | |
| "step": 10305 | |
| }, | |
| { | |
| "epoch": 2.1621621621621623, | |
| "grad_norm": 1.1564205884933472, | |
| "learning_rate": 1.9097842981195834e-05, | |
| "loss": 0.291, | |
| "step": 10320 | |
| }, | |
| { | |
| "epoch": 2.1653048397234445, | |
| "grad_norm": 0.6984158158302307, | |
| "learning_rate": 1.8964655953672784e-05, | |
| "loss": 0.2761, | |
| "step": 10335 | |
| }, | |
| { | |
| "epoch": 2.1684475172847266, | |
| "grad_norm": 0.7349433898925781, | |
| "learning_rate": 1.883182619607179e-05, | |
| "loss": 0.3066, | |
| "step": 10350 | |
| }, | |
| { | |
| "epoch": 2.171590194846009, | |
| "grad_norm": 0.9663205742835999, | |
| "learning_rate": 1.8699355237490694e-05, | |
| "loss": 0.2644, | |
| "step": 10365 | |
| }, | |
| { | |
| "epoch": 2.174732872407291, | |
| "grad_norm": 1.194226861000061, | |
| "learning_rate": 1.856724460289692e-05, | |
| "loss": 0.3112, | |
| "step": 10380 | |
| }, | |
| { | |
| "epoch": 2.177875549968573, | |
| "grad_norm": 1.0187724828720093, | |
| "learning_rate": 1.8435495813109938e-05, | |
| "loss": 0.2779, | |
| "step": 10395 | |
| }, | |
| { | |
| "epoch": 2.1810182275298553, | |
| "grad_norm": 0.7448340654373169, | |
| "learning_rate": 1.8304110384783806e-05, | |
| "loss": 0.2723, | |
| "step": 10410 | |
| }, | |
| { | |
| "epoch": 2.1841609050911375, | |
| "grad_norm": 1.0969903469085693, | |
| "learning_rate": 1.8173089830389662e-05, | |
| "loss": 0.2824, | |
| "step": 10425 | |
| }, | |
| { | |
| "epoch": 2.1873035826524196, | |
| "grad_norm": 1.0222073793411255, | |
| "learning_rate": 1.8042435658198286e-05, | |
| "loss": 0.303, | |
| "step": 10440 | |
| }, | |
| { | |
| "epoch": 2.1904462602137023, | |
| "grad_norm": 0.9316915273666382, | |
| "learning_rate": 1.7912149372262793e-05, | |
| "loss": 0.2562, | |
| "step": 10455 | |
| }, | |
| { | |
| "epoch": 2.1935889377749844, | |
| "grad_norm": 0.6998715996742249, | |
| "learning_rate": 1.77822324724013e-05, | |
| "loss": 0.298, | |
| "step": 10470 | |
| }, | |
| { | |
| "epoch": 2.1967316153362666, | |
| "grad_norm": 0.9719591736793518, | |
| "learning_rate": 1.7652686454179686e-05, | |
| "loss": 0.2887, | |
| "step": 10485 | |
| }, | |
| { | |
| "epoch": 2.1998742928975488, | |
| "grad_norm": 0.8645143508911133, | |
| "learning_rate": 1.7523512808894288e-05, | |
| "loss": 0.2532, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 2.203016970458831, | |
| "grad_norm": 1.1070195436477661, | |
| "learning_rate": 1.739471302355482e-05, | |
| "loss": 0.2999, | |
| "step": 10515 | |
| }, | |
| { | |
| "epoch": 2.206159648020113, | |
| "grad_norm": 0.8601672053337097, | |
| "learning_rate": 1.7266288580867258e-05, | |
| "loss": 0.3209, | |
| "step": 10530 | |
| }, | |
| { | |
| "epoch": 2.2093023255813953, | |
| "grad_norm": 1.0818884372711182, | |
| "learning_rate": 1.713824095921668e-05, | |
| "loss": 0.3079, | |
| "step": 10545 | |
| }, | |
| { | |
| "epoch": 2.2124450031426774, | |
| "grad_norm": 0.7250615954399109, | |
| "learning_rate": 1.701057163265038e-05, | |
| "loss": 0.3364, | |
| "step": 10560 | |
| }, | |
| { | |
| "epoch": 2.2155876807039596, | |
| "grad_norm": 0.9716282486915588, | |
| "learning_rate": 1.6883282070860763e-05, | |
| "loss": 0.2898, | |
| "step": 10575 | |
| }, | |
| { | |
| "epoch": 2.218730358265242, | |
| "grad_norm": 1.0294605493545532, | |
| "learning_rate": 1.675637373916855e-05, | |
| "loss": 0.3075, | |
| "step": 10590 | |
| }, | |
| { | |
| "epoch": 2.2218730358265244, | |
| "grad_norm": 1.0724180936813354, | |
| "learning_rate": 1.662984809850579e-05, | |
| "loss": 0.3068, | |
| "step": 10605 | |
| }, | |
| { | |
| "epoch": 2.2250157133878066, | |
| "grad_norm": 0.9719418883323669, | |
| "learning_rate": 1.6503706605399156e-05, | |
| "loss": 0.3153, | |
| "step": 10620 | |
| }, | |
| { | |
| "epoch": 2.2281583909490887, | |
| "grad_norm": 0.8698229193687439, | |
| "learning_rate": 1.6377950711953115e-05, | |
| "loss": 0.2597, | |
| "step": 10635 | |
| }, | |
| { | |
| "epoch": 2.231301068510371, | |
| "grad_norm": 0.9012719988822937, | |
| "learning_rate": 1.6252581865833198e-05, | |
| "loss": 0.3284, | |
| "step": 10650 | |
| }, | |
| { | |
| "epoch": 2.234443746071653, | |
| "grad_norm": 0.8515365123748779, | |
| "learning_rate": 1.612760151024936e-05, | |
| "loss": 0.3147, | |
| "step": 10665 | |
| }, | |
| { | |
| "epoch": 2.2375864236329353, | |
| "grad_norm": 1.1416083574295044, | |
| "learning_rate": 1.6003011083939396e-05, | |
| "loss": 0.2958, | |
| "step": 10680 | |
| }, | |
| { | |
| "epoch": 2.2407291011942174, | |
| "grad_norm": 0.9006314873695374, | |
| "learning_rate": 1.5878812021152334e-05, | |
| "loss": 0.2757, | |
| "step": 10695 | |
| }, | |
| { | |
| "epoch": 2.2438717787554996, | |
| "grad_norm": 1.1663639545440674, | |
| "learning_rate": 1.5755005751631922e-05, | |
| "loss": 0.3064, | |
| "step": 10710 | |
| }, | |
| { | |
| "epoch": 2.2470144563167818, | |
| "grad_norm": 1.0664478540420532, | |
| "learning_rate": 1.563159370060019e-05, | |
| "loss": 0.2878, | |
| "step": 10725 | |
| }, | |
| { | |
| "epoch": 2.250157133878064, | |
| "grad_norm": 0.7780718207359314, | |
| "learning_rate": 1.5508577288741056e-05, | |
| "loss": 0.3065, | |
| "step": 10740 | |
| }, | |
| { | |
| "epoch": 2.253299811439346, | |
| "grad_norm": 1.1266307830810547, | |
| "learning_rate": 1.5385957932183954e-05, | |
| "loss": 0.3004, | |
| "step": 10755 | |
| }, | |
| { | |
| "epoch": 2.2564424890006287, | |
| "grad_norm": 0.7767760157585144, | |
| "learning_rate": 1.5263737042487514e-05, | |
| "loss": 0.291, | |
| "step": 10770 | |
| }, | |
| { | |
| "epoch": 2.259585166561911, | |
| "grad_norm": 0.6928930878639221, | |
| "learning_rate": 1.514191602662332e-05, | |
| "loss": 0.2945, | |
| "step": 10785 | |
| }, | |
| { | |
| "epoch": 2.262727844123193, | |
| "grad_norm": 1.177262544631958, | |
| "learning_rate": 1.5020496286959752e-05, | |
| "loss": 0.3168, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 2.2658705216844752, | |
| "grad_norm": 1.1784379482269287, | |
| "learning_rate": 1.4899479221245827e-05, | |
| "loss": 0.342, | |
| "step": 10815 | |
| }, | |
| { | |
| "epoch": 2.2690131992457574, | |
| "grad_norm": 1.4985358715057373, | |
| "learning_rate": 1.477886622259504e-05, | |
| "loss": 0.3073, | |
| "step": 10830 | |
| }, | |
| { | |
| "epoch": 2.2721558768070396, | |
| "grad_norm": 1.0009207725524902, | |
| "learning_rate": 1.4658658679469445e-05, | |
| "loss": 0.2888, | |
| "step": 10845 | |
| }, | |
| { | |
| "epoch": 2.2752985543683217, | |
| "grad_norm": 1.0263885259628296, | |
| "learning_rate": 1.4538857975663567e-05, | |
| "loss": 0.3153, | |
| "step": 10860 | |
| }, | |
| { | |
| "epoch": 2.278441231929604, | |
| "grad_norm": 0.8072161078453064, | |
| "learning_rate": 1.4419465490288508e-05, | |
| "loss": 0.2481, | |
| "step": 10875 | |
| }, | |
| { | |
| "epoch": 2.281583909490886, | |
| "grad_norm": 0.8211586475372314, | |
| "learning_rate": 1.430048259775611e-05, | |
| "loss": 0.2738, | |
| "step": 10890 | |
| }, | |
| { | |
| "epoch": 2.2847265870521687, | |
| "grad_norm": 1.0490375757217407, | |
| "learning_rate": 1.418191066776311e-05, | |
| "loss": 0.3005, | |
| "step": 10905 | |
| }, | |
| { | |
| "epoch": 2.287869264613451, | |
| "grad_norm": 0.9059322476387024, | |
| "learning_rate": 1.4063751065275315e-05, | |
| "loss": 0.2578, | |
| "step": 10920 | |
| }, | |
| { | |
| "epoch": 2.291011942174733, | |
| "grad_norm": 0.9448453187942505, | |
| "learning_rate": 1.3946005150511948e-05, | |
| "loss": 0.3033, | |
| "step": 10935 | |
| }, | |
| { | |
| "epoch": 2.294154619736015, | |
| "grad_norm": 0.9595757126808167, | |
| "learning_rate": 1.3828674278930009e-05, | |
| "loss": 0.3092, | |
| "step": 10950 | |
| }, | |
| { | |
| "epoch": 2.2972972972972974, | |
| "grad_norm": 0.6836899518966675, | |
| "learning_rate": 1.371175980120864e-05, | |
| "loss": 0.2354, | |
| "step": 10965 | |
| }, | |
| { | |
| "epoch": 2.3004399748585795, | |
| "grad_norm": 1.1870014667510986, | |
| "learning_rate": 1.3595263063233538e-05, | |
| "loss": 0.339, | |
| "step": 10980 | |
| }, | |
| { | |
| "epoch": 2.3035826524198617, | |
| "grad_norm": 0.9335547685623169, | |
| "learning_rate": 1.3479185406081519e-05, | |
| "loss": 0.2667, | |
| "step": 10995 | |
| }, | |
| { | |
| "epoch": 2.306725329981144, | |
| "grad_norm": 1.0864135026931763, | |
| "learning_rate": 1.3363528166005068e-05, | |
| "loss": 0.2993, | |
| "step": 11010 | |
| }, | |
| { | |
| "epoch": 2.309868007542426, | |
| "grad_norm": 1.3026399612426758, | |
| "learning_rate": 1.3248292674416968e-05, | |
| "loss": 0.2838, | |
| "step": 11025 | |
| }, | |
| { | |
| "epoch": 2.313010685103708, | |
| "grad_norm": 0.7582332491874695, | |
| "learning_rate": 1.3133480257874902e-05, | |
| "loss": 0.2746, | |
| "step": 11040 | |
| }, | |
| { | |
| "epoch": 2.3161533626649904, | |
| "grad_norm": 1.0766429901123047, | |
| "learning_rate": 1.3019092238066304e-05, | |
| "loss": 0.2915, | |
| "step": 11055 | |
| }, | |
| { | |
| "epoch": 2.3192960402262726, | |
| "grad_norm": 0.7966647148132324, | |
| "learning_rate": 1.2905129931793009e-05, | |
| "loss": 0.2586, | |
| "step": 11070 | |
| }, | |
| { | |
| "epoch": 2.322438717787555, | |
| "grad_norm": 1.0455411672592163, | |
| "learning_rate": 1.2791594650956212e-05, | |
| "loss": 0.2867, | |
| "step": 11085 | |
| }, | |
| { | |
| "epoch": 2.3255813953488373, | |
| "grad_norm": 0.9847836494445801, | |
| "learning_rate": 1.267848770254127e-05, | |
| "loss": 0.3219, | |
| "step": 11100 | |
| }, | |
| { | |
| "epoch": 2.3287240729101195, | |
| "grad_norm": 0.9694182276725769, | |
| "learning_rate": 1.256581038860275e-05, | |
| "loss": 0.2558, | |
| "step": 11115 | |
| }, | |
| { | |
| "epoch": 2.3318667504714017, | |
| "grad_norm": 1.4064688682556152, | |
| "learning_rate": 1.2453564006249352e-05, | |
| "loss": 0.2609, | |
| "step": 11130 | |
| }, | |
| { | |
| "epoch": 2.335009428032684, | |
| "grad_norm": 0.8352707028388977, | |
| "learning_rate": 1.2341749847628997e-05, | |
| "loss": 0.2985, | |
| "step": 11145 | |
| }, | |
| { | |
| "epoch": 2.338152105593966, | |
| "grad_norm": 1.016571044921875, | |
| "learning_rate": 1.2230369199914066e-05, | |
| "loss": 0.2673, | |
| "step": 11160 | |
| }, | |
| { | |
| "epoch": 2.341294783155248, | |
| "grad_norm": 0.9296002984046936, | |
| "learning_rate": 1.211942334528639e-05, | |
| "loss": 0.2685, | |
| "step": 11175 | |
| }, | |
| { | |
| "epoch": 2.3444374607165304, | |
| "grad_norm": 1.4591748714447021, | |
| "learning_rate": 1.200891356092263e-05, | |
| "loss": 0.2773, | |
| "step": 11190 | |
| }, | |
| { | |
| "epoch": 2.3475801382778125, | |
| "grad_norm": 0.9775596261024475, | |
| "learning_rate": 1.1898841118979504e-05, | |
| "loss": 0.2976, | |
| "step": 11205 | |
| }, | |
| { | |
| "epoch": 2.350722815839095, | |
| "grad_norm": 1.2126258611679077, | |
| "learning_rate": 1.1789207286579201e-05, | |
| "loss": 0.3298, | |
| "step": 11220 | |
| }, | |
| { | |
| "epoch": 2.3538654934003773, | |
| "grad_norm": 1.3125213384628296, | |
| "learning_rate": 1.1680013325794776e-05, | |
| "loss": 0.2639, | |
| "step": 11235 | |
| }, | |
| { | |
| "epoch": 2.3570081709616595, | |
| "grad_norm": 1.0396140813827515, | |
| "learning_rate": 1.1571260493635561e-05, | |
| "loss": 0.292, | |
| "step": 11250 | |
| }, | |
| { | |
| "epoch": 2.3601508485229417, | |
| "grad_norm": 0.9269897937774658, | |
| "learning_rate": 1.1462950042032767e-05, | |
| "loss": 0.3426, | |
| "step": 11265 | |
| }, | |
| { | |
| "epoch": 2.363293526084224, | |
| "grad_norm": 1.1665176153182983, | |
| "learning_rate": 1.1355083217825052e-05, | |
| "loss": 0.2794, | |
| "step": 11280 | |
| }, | |
| { | |
| "epoch": 2.366436203645506, | |
| "grad_norm": 1.0097540616989136, | |
| "learning_rate": 1.1247661262744175e-05, | |
| "loss": 0.2986, | |
| "step": 11295 | |
| }, | |
| { | |
| "epoch": 2.369578881206788, | |
| "grad_norm": 1.1132863759994507, | |
| "learning_rate": 1.1140685413400648e-05, | |
| "loss": 0.3229, | |
| "step": 11310 | |
| }, | |
| { | |
| "epoch": 2.3727215587680703, | |
| "grad_norm": 1.2184104919433594, | |
| "learning_rate": 1.1034156901269598e-05, | |
| "loss": 0.2708, | |
| "step": 11325 | |
| }, | |
| { | |
| "epoch": 2.3758642363293525, | |
| "grad_norm": 1.0664645433425903, | |
| "learning_rate": 1.0928076952676474e-05, | |
| "loss": 0.2728, | |
| "step": 11340 | |
| }, | |
| { | |
| "epoch": 2.3790069138906347, | |
| "grad_norm": 1.2971463203430176, | |
| "learning_rate": 1.0822446788783058e-05, | |
| "loss": 0.3048, | |
| "step": 11355 | |
| }, | |
| { | |
| "epoch": 2.382149591451917, | |
| "grad_norm": 0.9727672338485718, | |
| "learning_rate": 1.0717267625573279e-05, | |
| "loss": 0.2918, | |
| "step": 11370 | |
| }, | |
| { | |
| "epoch": 2.385292269013199, | |
| "grad_norm": 1.0206960439682007, | |
| "learning_rate": 1.0612540673839322e-05, | |
| "loss": 0.2885, | |
| "step": 11385 | |
| }, | |
| { | |
| "epoch": 2.3884349465744816, | |
| "grad_norm": 1.1079341173171997, | |
| "learning_rate": 1.0508267139167615e-05, | |
| "loss": 0.309, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 2.391577624135764, | |
| "grad_norm": 1.1144444942474365, | |
| "learning_rate": 1.0404448221924961e-05, | |
| "loss": 0.2268, | |
| "step": 11415 | |
| }, | |
| { | |
| "epoch": 2.394720301697046, | |
| "grad_norm": 1.1846858263015747, | |
| "learning_rate": 1.030108511724483e-05, | |
| "loss": 0.2822, | |
| "step": 11430 | |
| }, | |
| { | |
| "epoch": 2.397862979258328, | |
| "grad_norm": 1.063310146331787, | |
| "learning_rate": 1.019817901501341e-05, | |
| "loss": 0.2883, | |
| "step": 11445 | |
| }, | |
| { | |
| "epoch": 2.4010056568196103, | |
| "grad_norm": 1.1355246305465698, | |
| "learning_rate": 1.0095731099856049e-05, | |
| "loss": 0.2975, | |
| "step": 11460 | |
| }, | |
| { | |
| "epoch": 2.4041483343808925, | |
| "grad_norm": 1.017663836479187, | |
| "learning_rate": 9.993742551123558e-06, | |
| "loss": 0.2883, | |
| "step": 11475 | |
| }, | |
| { | |
| "epoch": 2.4072910119421747, | |
| "grad_norm": 1.3695423603057861, | |
| "learning_rate": 9.892214542878686e-06, | |
| "loss": 0.343, | |
| "step": 11490 | |
| }, | |
| { | |
| "epoch": 2.410433689503457, | |
| "grad_norm": 1.0663484334945679, | |
| "learning_rate": 9.79114824388257e-06, | |
| "loss": 0.26, | |
| "step": 11505 | |
| }, | |
| { | |
| "epoch": 2.413576367064739, | |
| "grad_norm": 1.0500160455703735, | |
| "learning_rate": 9.690544817581243e-06, | |
| "loss": 0.2877, | |
| "step": 11520 | |
| }, | |
| { | |
| "epoch": 2.4167190446260216, | |
| "grad_norm": 1.0720367431640625, | |
| "learning_rate": 9.590405422092336e-06, | |
| "loss": 0.2561, | |
| "step": 11535 | |
| }, | |
| { | |
| "epoch": 2.4198617221873038, | |
| "grad_norm": 0.9935043454170227, | |
| "learning_rate": 9.49073121019164e-06, | |
| "loss": 0.2764, | |
| "step": 11550 | |
| }, | |
| { | |
| "epoch": 2.423004399748586, | |
| "grad_norm": 1.2285892963409424, | |
| "learning_rate": 9.391523329299928e-06, | |
| "loss": 0.303, | |
| "step": 11565 | |
| }, | |
| { | |
| "epoch": 2.426147077309868, | |
| "grad_norm": 1.2495083808898926, | |
| "learning_rate": 9.292782921469673e-06, | |
| "loss": 0.3252, | |
| "step": 11580 | |
| }, | |
| { | |
| "epoch": 2.4292897548711503, | |
| "grad_norm": 1.0354247093200684, | |
| "learning_rate": 9.194511123371963e-06, | |
| "loss": 0.2692, | |
| "step": 11595 | |
| }, | |
| { | |
| "epoch": 2.4324324324324325, | |
| "grad_norm": 1.0744938850402832, | |
| "learning_rate": 9.096709066283354e-06, | |
| "loss": 0.2793, | |
| "step": 11610 | |
| }, | |
| { | |
| "epoch": 2.4355751099937146, | |
| "grad_norm": 1.145193338394165, | |
| "learning_rate": 8.9993778760729e-06, | |
| "loss": 0.3108, | |
| "step": 11625 | |
| }, | |
| { | |
| "epoch": 2.438717787554997, | |
| "grad_norm": 0.7168245911598206, | |
| "learning_rate": 8.902518673189192e-06, | |
| "loss": 0.3088, | |
| "step": 11640 | |
| }, | |
| { | |
| "epoch": 2.441860465116279, | |
| "grad_norm": 0.9759941697120667, | |
| "learning_rate": 8.806132572647386e-06, | |
| "loss": 0.2771, | |
| "step": 11655 | |
| }, | |
| { | |
| "epoch": 2.445003142677561, | |
| "grad_norm": 0.9443902373313904, | |
| "learning_rate": 8.710220684016462e-06, | |
| "loss": 0.2593, | |
| "step": 11670 | |
| }, | |
| { | |
| "epoch": 2.4481458202388433, | |
| "grad_norm": 0.9628651142120361, | |
| "learning_rate": 8.614784111406365e-06, | |
| "loss": 0.267, | |
| "step": 11685 | |
| }, | |
| { | |
| "epoch": 2.4512884978001255, | |
| "grad_norm": 1.0149531364440918, | |
| "learning_rate": 8.519823953455424e-06, | |
| "loss": 0.2929, | |
| "step": 11700 | |
| }, | |
| { | |
| "epoch": 2.454431175361408, | |
| "grad_norm": 0.9107941389083862, | |
| "learning_rate": 8.425341303317536e-06, | |
| "loss": 0.2911, | |
| "step": 11715 | |
| }, | |
| { | |
| "epoch": 2.4575738529226903, | |
| "grad_norm": 1.1681251525878906, | |
| "learning_rate": 8.33133724864969e-06, | |
| "loss": 0.2939, | |
| "step": 11730 | |
| }, | |
| { | |
| "epoch": 2.4607165304839724, | |
| "grad_norm": 0.8774799704551697, | |
| "learning_rate": 8.237812871599448e-06, | |
| "loss": 0.2612, | |
| "step": 11745 | |
| }, | |
| { | |
| "epoch": 2.4638592080452546, | |
| "grad_norm": 0.8654860854148865, | |
| "learning_rate": 8.144769248792417e-06, | |
| "loss": 0.2924, | |
| "step": 11760 | |
| }, | |
| { | |
| "epoch": 2.4670018856065368, | |
| "grad_norm": 1.062782645225525, | |
| "learning_rate": 8.052207451319954e-06, | |
| "loss": 0.2466, | |
| "step": 11775 | |
| }, | |
| { | |
| "epoch": 2.470144563167819, | |
| "grad_norm": 0.8732921481132507, | |
| "learning_rate": 7.960128544726724e-06, | |
| "loss": 0.2318, | |
| "step": 11790 | |
| }, | |
| { | |
| "epoch": 2.473287240729101, | |
| "grad_norm": 1.191798210144043, | |
| "learning_rate": 7.86853358899855e-06, | |
| "loss": 0.3097, | |
| "step": 11805 | |
| }, | |
| { | |
| "epoch": 2.4764299182903833, | |
| "grad_norm": 0.9445894360542297, | |
| "learning_rate": 7.777423638550096e-06, | |
| "loss": 0.2935, | |
| "step": 11820 | |
| }, | |
| { | |
| "epoch": 2.4795725958516655, | |
| "grad_norm": 0.9677672386169434, | |
| "learning_rate": 7.68679974221282e-06, | |
| "loss": 0.2949, | |
| "step": 11835 | |
| }, | |
| { | |
| "epoch": 2.482715273412948, | |
| "grad_norm": 0.756100058555603, | |
| "learning_rate": 7.596662943222877e-06, | |
| "loss": 0.2685, | |
| "step": 11850 | |
| }, | |
| { | |
| "epoch": 2.4858579509742302, | |
| "grad_norm": 1.2218337059020996, | |
| "learning_rate": 7.507014279209057e-06, | |
| "loss": 0.3395, | |
| "step": 11865 | |
| }, | |
| { | |
| "epoch": 2.4890006285355124, | |
| "grad_norm": 1.1206847429275513, | |
| "learning_rate": 7.417854782180894e-06, | |
| "loss": 0.2641, | |
| "step": 11880 | |
| }, | |
| { | |
| "epoch": 2.4921433060967946, | |
| "grad_norm": 1.095615029335022, | |
| "learning_rate": 7.329185478516798e-06, | |
| "loss": 0.3021, | |
| "step": 11895 | |
| }, | |
| { | |
| "epoch": 2.4952859836580767, | |
| "grad_norm": 0.9641756415367126, | |
| "learning_rate": 7.241007388952209e-06, | |
| "loss": 0.2847, | |
| "step": 11910 | |
| }, | |
| { | |
| "epoch": 2.498428661219359, | |
| "grad_norm": 0.9637003540992737, | |
| "learning_rate": 7.153321528567819e-06, | |
| "loss": 0.2775, | |
| "step": 11925 | |
| }, | |
| { | |
| "epoch": 2.501571338780641, | |
| "grad_norm": 0.8976852297782898, | |
| "learning_rate": 7.066128906777941e-06, | |
| "loss": 0.2636, | |
| "step": 11940 | |
| }, | |
| { | |
| "epoch": 2.5047140163419233, | |
| "grad_norm": 1.006549596786499, | |
| "learning_rate": 6.97943052731887e-06, | |
| "loss": 0.2616, | |
| "step": 11955 | |
| }, | |
| { | |
| "epoch": 2.5078566939032054, | |
| "grad_norm": 1.004257321357727, | |
| "learning_rate": 6.893227388237345e-06, | |
| "loss": 0.2579, | |
| "step": 11970 | |
| }, | |
| { | |
| "epoch": 2.5109993714644876, | |
| "grad_norm": 0.8972447514533997, | |
| "learning_rate": 6.807520481879004e-06, | |
| "loss": 0.2469, | |
| "step": 11985 | |
| }, | |
| { | |
| "epoch": 2.5141420490257698, | |
| "grad_norm": 0.8245068192481995, | |
| "learning_rate": 6.722310794877002e-06, | |
| "loss": 0.3258, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 2.517284726587052, | |
| "grad_norm": 1.2819231748580933, | |
| "learning_rate": 6.637599308140685e-06, | |
| "loss": 0.2503, | |
| "step": 12015 | |
| }, | |
| { | |
| "epoch": 2.520427404148334, | |
| "grad_norm": 0.9961299896240234, | |
| "learning_rate": 6.553386996844208e-06, | |
| "loss": 0.2766, | |
| "step": 12030 | |
| }, | |
| { | |
| "epoch": 2.5235700817096167, | |
| "grad_norm": 0.7203584909439087, | |
| "learning_rate": 6.469674830415412e-06, | |
| "loss": 0.3168, | |
| "step": 12045 | |
| }, | |
| { | |
| "epoch": 2.526712759270899, | |
| "grad_norm": 0.8977159261703491, | |
| "learning_rate": 6.386463772524576e-06, | |
| "loss": 0.2573, | |
| "step": 12060 | |
| }, | |
| { | |
| "epoch": 2.529855436832181, | |
| "grad_norm": 1.2124725580215454, | |
| "learning_rate": 6.303754781073395e-06, | |
| "loss": 0.3008, | |
| "step": 12075 | |
| }, | |
| { | |
| "epoch": 2.5329981143934632, | |
| "grad_norm": 0.7577414512634277, | |
| "learning_rate": 6.2215488081838854e-06, | |
| "loss": 0.2492, | |
| "step": 12090 | |
| }, | |
| { | |
| "epoch": 2.5361407919547454, | |
| "grad_norm": 1.308779001235962, | |
| "learning_rate": 6.139846800187493e-06, | |
| "loss": 0.3002, | |
| "step": 12105 | |
| }, | |
| { | |
| "epoch": 2.5392834695160276, | |
| "grad_norm": 1.0538486242294312, | |
| "learning_rate": 6.058649697614149e-06, | |
| "loss": 0.3068, | |
| "step": 12120 | |
| }, | |
| { | |
| "epoch": 2.5424261470773097, | |
| "grad_norm": 1.1852937936782837, | |
| "learning_rate": 5.9779584351814636e-06, | |
| "loss": 0.308, | |
| "step": 12135 | |
| }, | |
| { | |
| "epoch": 2.5455688246385924, | |
| "grad_norm": 0.9339080452919006, | |
| "learning_rate": 5.897773941783935e-06, | |
| "loss": 0.297, | |
| "step": 12150 | |
| }, | |
| { | |
| "epoch": 2.5487115021998745, | |
| "grad_norm": 0.8344528079032898, | |
| "learning_rate": 5.8180971404823205e-06, | |
| "loss": 0.2789, | |
| "step": 12165 | |
| }, | |
| { | |
| "epoch": 2.5518541797611567, | |
| "grad_norm": 1.3588929176330566, | |
| "learning_rate": 5.738928948492966e-06, | |
| "loss": 0.296, | |
| "step": 12180 | |
| }, | |
| { | |
| "epoch": 2.554996857322439, | |
| "grad_norm": 1.0490657091140747, | |
| "learning_rate": 5.660270277177243e-06, | |
| "loss": 0.2864, | |
| "step": 12195 | |
| }, | |
| { | |
| "epoch": 2.558139534883721, | |
| "grad_norm": 1.2904434204101562, | |
| "learning_rate": 5.582122032031051e-06, | |
| "loss": 0.2966, | |
| "step": 12210 | |
| }, | |
| { | |
| "epoch": 2.561282212445003, | |
| "grad_norm": 0.7123144268989563, | |
| "learning_rate": 5.5044851126744404e-06, | |
| "loss": 0.2733, | |
| "step": 12225 | |
| }, | |
| { | |
| "epoch": 2.5644248900062854, | |
| "grad_norm": 1.2593188285827637, | |
| "learning_rate": 5.4273604128412315e-06, | |
| "loss": 0.2873, | |
| "step": 12240 | |
| }, | |
| { | |
| "epoch": 2.5675675675675675, | |
| "grad_norm": 0.9681785106658936, | |
| "learning_rate": 5.35074882036869e-06, | |
| "loss": 0.2596, | |
| "step": 12255 | |
| }, | |
| { | |
| "epoch": 2.5707102451288497, | |
| "grad_norm": 0.944814145565033, | |
| "learning_rate": 5.2746512171873485e-06, | |
| "loss": 0.2871, | |
| "step": 12270 | |
| }, | |
| { | |
| "epoch": 2.573852922690132, | |
| "grad_norm": 1.0654292106628418, | |
| "learning_rate": 5.199068479310865e-06, | |
| "loss": 0.2856, | |
| "step": 12285 | |
| }, | |
| { | |
| "epoch": 2.576995600251414, | |
| "grad_norm": 1.4697771072387695, | |
| "learning_rate": 5.12400147682589e-06, | |
| "loss": 0.3125, | |
| "step": 12300 | |
| }, | |
| { | |
| "epoch": 2.5801382778126962, | |
| "grad_norm": 1.1471614837646484, | |
| "learning_rate": 5.0494510738820836e-06, | |
| "loss": 0.2712, | |
| "step": 12315 | |
| }, | |
| { | |
| "epoch": 2.5832809553739784, | |
| "grad_norm": 1.2926499843597412, | |
| "learning_rate": 4.9754181286821855e-06, | |
| "loss": 0.2721, | |
| "step": 12330 | |
| }, | |
| { | |
| "epoch": 2.586423632935261, | |
| "grad_norm": 1.1065871715545654, | |
| "learning_rate": 4.901903493472071e-06, | |
| "loss": 0.3443, | |
| "step": 12345 | |
| }, | |
| { | |
| "epoch": 2.589566310496543, | |
| "grad_norm": 1.0714068412780762, | |
| "learning_rate": 4.8289080145309974e-06, | |
| "loss": 0.2963, | |
| "step": 12360 | |
| }, | |
| { | |
| "epoch": 2.5927089880578253, | |
| "grad_norm": 0.8245282769203186, | |
| "learning_rate": 4.756432532161858e-06, | |
| "loss": 0.2564, | |
| "step": 12375 | |
| }, | |
| { | |
| "epoch": 2.5958516656191075, | |
| "grad_norm": 1.266921043395996, | |
| "learning_rate": 4.684477880681492e-06, | |
| "loss": 0.2712, | |
| "step": 12390 | |
| }, | |
| { | |
| "epoch": 2.5989943431803897, | |
| "grad_norm": 1.2646595239639282, | |
| "learning_rate": 4.613044888411067e-06, | |
| "loss": 0.2845, | |
| "step": 12405 | |
| }, | |
| { | |
| "epoch": 2.602137020741672, | |
| "grad_norm": 1.0433062314987183, | |
| "learning_rate": 4.542134377666562e-06, | |
| "loss": 0.309, | |
| "step": 12420 | |
| }, | |
| { | |
| "epoch": 2.605279698302954, | |
| "grad_norm": 0.9236804246902466, | |
| "learning_rate": 4.471747164749318e-06, | |
| "loss": 0.2576, | |
| "step": 12435 | |
| }, | |
| { | |
| "epoch": 2.608422375864236, | |
| "grad_norm": 0.8656274676322937, | |
| "learning_rate": 4.401884059936618e-06, | |
| "loss": 0.2695, | |
| "step": 12450 | |
| }, | |
| { | |
| "epoch": 2.611565053425519, | |
| "grad_norm": 1.226678729057312, | |
| "learning_rate": 4.332545867472354e-06, | |
| "loss": 0.2993, | |
| "step": 12465 | |
| }, | |
| { | |
| "epoch": 2.614707730986801, | |
| "grad_norm": 1.1997127532958984, | |
| "learning_rate": 4.263733385557767e-06, | |
| "loss": 0.2832, | |
| "step": 12480 | |
| }, | |
| { | |
| "epoch": 2.617850408548083, | |
| "grad_norm": 1.113054871559143, | |
| "learning_rate": 4.195447406342301e-06, | |
| "loss": 0.2429, | |
| "step": 12495 | |
| }, | |
| { | |
| "epoch": 2.6209930861093653, | |
| "grad_norm": 1.1524410247802734, | |
| "learning_rate": 4.127688715914446e-06, | |
| "loss": 0.3216, | |
| "step": 12510 | |
| }, | |
| { | |
| "epoch": 2.6241357636706475, | |
| "grad_norm": 1.1508104801177979, | |
| "learning_rate": 4.060458094292663e-06, | |
| "loss": 0.2685, | |
| "step": 12525 | |
| }, | |
| { | |
| "epoch": 2.6272784412319297, | |
| "grad_norm": 1.1233001947402954, | |
| "learning_rate": 3.993756315416486e-06, | |
| "loss": 0.2525, | |
| "step": 12540 | |
| }, | |
| { | |
| "epoch": 2.630421118793212, | |
| "grad_norm": 1.041908621788025, | |
| "learning_rate": 3.927584147137514e-06, | |
| "loss": 0.2833, | |
| "step": 12555 | |
| }, | |
| { | |
| "epoch": 2.633563796354494, | |
| "grad_norm": 1.2598505020141602, | |
| "learning_rate": 3.8619423512106734e-06, | |
| "loss": 0.2895, | |
| "step": 12570 | |
| }, | |
| { | |
| "epoch": 2.636706473915776, | |
| "grad_norm": 1.137080430984497, | |
| "learning_rate": 3.7968316832853456e-06, | |
| "loss": 0.29, | |
| "step": 12585 | |
| }, | |
| { | |
| "epoch": 2.6398491514770583, | |
| "grad_norm": 1.0239893198013306, | |
| "learning_rate": 3.7322528928967703e-06, | |
| "loss": 0.2548, | |
| "step": 12600 | |
| }, | |
| { | |
| "epoch": 2.6429918290383405, | |
| "grad_norm": 0.9820106625556946, | |
| "learning_rate": 3.668206723457329e-06, | |
| "loss": 0.3135, | |
| "step": 12615 | |
| }, | |
| { | |
| "epoch": 2.6461345065996227, | |
| "grad_norm": 0.8583505153656006, | |
| "learning_rate": 3.604693912248025e-06, | |
| "loss": 0.2581, | |
| "step": 12630 | |
| }, | |
| { | |
| "epoch": 2.649277184160905, | |
| "grad_norm": 1.1391513347625732, | |
| "learning_rate": 3.541715190410022e-06, | |
| "loss": 0.2878, | |
| "step": 12645 | |
| }, | |
| { | |
| "epoch": 2.6524198617221875, | |
| "grad_norm": 1.0786199569702148, | |
| "learning_rate": 3.4792712829361917e-06, | |
| "loss": 0.2667, | |
| "step": 12660 | |
| }, | |
| { | |
| "epoch": 2.6555625392834696, | |
| "grad_norm": 0.9973167777061462, | |
| "learning_rate": 3.4173629086627633e-06, | |
| "loss": 0.2455, | |
| "step": 12675 | |
| }, | |
| { | |
| "epoch": 2.658705216844752, | |
| "grad_norm": 0.8622914552688599, | |
| "learning_rate": 3.355990780261059e-06, | |
| "loss": 0.2264, | |
| "step": 12690 | |
| }, | |
| { | |
| "epoch": 2.661847894406034, | |
| "grad_norm": 0.9155644774436951, | |
| "learning_rate": 3.295155604229322e-06, | |
| "loss": 0.3147, | |
| "step": 12705 | |
| }, | |
| { | |
| "epoch": 2.664990571967316, | |
| "grad_norm": 1.313897728919983, | |
| "learning_rate": 3.234858080884545e-06, | |
| "loss": 0.2793, | |
| "step": 12720 | |
| }, | |
| { | |
| "epoch": 2.6681332495285983, | |
| "grad_norm": 1.0417330265045166, | |
| "learning_rate": 3.1750989043543843e-06, | |
| "loss": 0.3048, | |
| "step": 12735 | |
| }, | |
| { | |
| "epoch": 2.6712759270898805, | |
| "grad_norm": 1.175787091255188, | |
| "learning_rate": 3.1158787625692632e-06, | |
| "loss": 0.2897, | |
| "step": 12750 | |
| }, | |
| { | |
| "epoch": 2.6744186046511627, | |
| "grad_norm": 1.1047790050506592, | |
| "learning_rate": 3.05719833725433e-06, | |
| "loss": 0.3, | |
| "step": 12765 | |
| }, | |
| { | |
| "epoch": 2.6775612822124453, | |
| "grad_norm": 0.8376184701919556, | |
| "learning_rate": 2.9990583039217203e-06, | |
| "loss": 0.2654, | |
| "step": 12780 | |
| }, | |
| { | |
| "epoch": 2.6807039597737274, | |
| "grad_norm": 0.6929535269737244, | |
| "learning_rate": 2.941459331862706e-06, | |
| "loss": 0.3012, | |
| "step": 12795 | |
| }, | |
| { | |
| "epoch": 2.6838466373350096, | |
| "grad_norm": 0.832949161529541, | |
| "learning_rate": 2.8844020841400364e-06, | |
| "loss": 0.2765, | |
| "step": 12810 | |
| }, | |
| { | |
| "epoch": 2.686989314896292, | |
| "grad_norm": 0.9470664858818054, | |
| "learning_rate": 2.827887217580266e-06, | |
| "loss": 0.2729, | |
| "step": 12825 | |
| }, | |
| { | |
| "epoch": 2.690131992457574, | |
| "grad_norm": 0.7952046394348145, | |
| "learning_rate": 2.771915382766238e-06, | |
| "loss": 0.2464, | |
| "step": 12840 | |
| }, | |
| { | |
| "epoch": 2.693274670018856, | |
| "grad_norm": 1.0609912872314453, | |
| "learning_rate": 2.7164872240295458e-06, | |
| "loss": 0.3087, | |
| "step": 12855 | |
| }, | |
| { | |
| "epoch": 2.6964173475801383, | |
| "grad_norm": 0.9275609850883484, | |
| "learning_rate": 2.6616033794431614e-06, | |
| "loss": 0.2575, | |
| "step": 12870 | |
| }, | |
| { | |
| "epoch": 2.6995600251414205, | |
| "grad_norm": 1.464107871055603, | |
| "learning_rate": 2.607264480814059e-06, | |
| "loss": 0.2919, | |
| "step": 12885 | |
| }, | |
| { | |
| "epoch": 2.7027027027027026, | |
| "grad_norm": 1.1258777379989624, | |
| "learning_rate": 2.5534711536759404e-06, | |
| "loss": 0.265, | |
| "step": 12900 | |
| }, | |
| { | |
| "epoch": 2.705845380263985, | |
| "grad_norm": 1.169700264930725, | |
| "learning_rate": 2.5002240172820823e-06, | |
| "loss": 0.2849, | |
| "step": 12915 | |
| }, | |
| { | |
| "epoch": 2.708988057825267, | |
| "grad_norm": 1.3186782598495483, | |
| "learning_rate": 2.4475236845981465e-06, | |
| "loss": 0.2806, | |
| "step": 12930 | |
| }, | |
| { | |
| "epoch": 2.712130735386549, | |
| "grad_norm": 1.4104660749435425, | |
| "learning_rate": 2.395370762295135e-06, | |
| "loss": 0.3004, | |
| "step": 12945 | |
| }, | |
| { | |
| "epoch": 2.7152734129478313, | |
| "grad_norm": 1.2798209190368652, | |
| "learning_rate": 2.343765850742441e-06, | |
| "loss": 0.2887, | |
| "step": 12960 | |
| }, | |
| { | |
| "epoch": 2.718416090509114, | |
| "grad_norm": 1.0648716688156128, | |
| "learning_rate": 2.2927095440009093e-06, | |
| "loss": 0.2842, | |
| "step": 12975 | |
| }, | |
| { | |
| "epoch": 2.721558768070396, | |
| "grad_norm": 1.0158684253692627, | |
| "learning_rate": 2.2422024298160147e-06, | |
| "loss": 0.2977, | |
| "step": 12990 | |
| }, | |
| { | |
| "epoch": 2.7247014456316783, | |
| "grad_norm": 0.6185563802719116, | |
| "learning_rate": 2.1922450896110614e-06, | |
| "loss": 0.2967, | |
| "step": 13005 | |
| }, | |
| { | |
| "epoch": 2.7278441231929604, | |
| "grad_norm": 1.0942654609680176, | |
| "learning_rate": 2.142838098480543e-06, | |
| "loss": 0.277, | |
| "step": 13020 | |
| }, | |
| { | |
| "epoch": 2.7309868007542426, | |
| "grad_norm": 1.0424152612686157, | |
| "learning_rate": 2.0939820251834717e-06, | |
| "loss": 0.2908, | |
| "step": 13035 | |
| }, | |
| { | |
| "epoch": 2.7341294783155248, | |
| "grad_norm": 1.048524022102356, | |
| "learning_rate": 2.0456774321368666e-06, | |
| "loss": 0.3442, | |
| "step": 13050 | |
| }, | |
| { | |
| "epoch": 2.737272155876807, | |
| "grad_norm": 0.8081900477409363, | |
| "learning_rate": 1.9979248754092517e-06, | |
| "loss": 0.2707, | |
| "step": 13065 | |
| }, | |
| { | |
| "epoch": 2.740414833438089, | |
| "grad_norm": 1.3440662622451782, | |
| "learning_rate": 1.950724904714285e-06, | |
| "loss": 0.3337, | |
| "step": 13080 | |
| }, | |
| { | |
| "epoch": 2.7435575109993717, | |
| "grad_norm": 0.9911431670188904, | |
| "learning_rate": 1.904078063404391e-06, | |
| "loss": 0.2852, | |
| "step": 13095 | |
| }, | |
| { | |
| "epoch": 2.746700188560654, | |
| "grad_norm": 1.150423526763916, | |
| "learning_rate": 1.8579848884645534e-06, | |
| "loss": 0.2571, | |
| "step": 13110 | |
| }, | |
| { | |
| "epoch": 2.749842866121936, | |
| "grad_norm": 1.1156803369522095, | |
| "learning_rate": 1.8124459105060942e-06, | |
| "loss": 0.2896, | |
| "step": 13125 | |
| }, | |
| { | |
| "epoch": 2.7529855436832182, | |
| "grad_norm": 1.040390133857727, | |
| "learning_rate": 1.767461653760588e-06, | |
| "loss": 0.278, | |
| "step": 13140 | |
| }, | |
| { | |
| "epoch": 2.7561282212445004, | |
| "grad_norm": 1.0304458141326904, | |
| "learning_rate": 1.723032636073807e-06, | |
| "loss": 0.2613, | |
| "step": 13155 | |
| }, | |
| { | |
| "epoch": 2.7592708988057826, | |
| "grad_norm": 1.1717437505722046, | |
| "learning_rate": 1.679159368899763e-06, | |
| "loss": 0.3064, | |
| "step": 13170 | |
| }, | |
| { | |
| "epoch": 2.7624135763670647, | |
| "grad_norm": 0.9141078591346741, | |
| "learning_rate": 1.63584235729487e-06, | |
| "loss": 0.2837, | |
| "step": 13185 | |
| }, | |
| { | |
| "epoch": 2.765556253928347, | |
| "grad_norm": 1.1188409328460693, | |
| "learning_rate": 1.593082099912052e-06, | |
| "loss": 0.2932, | |
| "step": 13200 | |
| }, | |
| { | |
| "epoch": 2.768698931489629, | |
| "grad_norm": 1.0684481859207153, | |
| "learning_rate": 1.5508790889950441e-06, | |
| "loss": 0.267, | |
| "step": 13215 | |
| }, | |
| { | |
| "epoch": 2.7718416090509113, | |
| "grad_norm": 0.976677417755127, | |
| "learning_rate": 1.5092338103727344e-06, | |
| "loss": 0.2897, | |
| "step": 13230 | |
| }, | |
| { | |
| "epoch": 2.7749842866121934, | |
| "grad_norm": 1.081978678703308, | |
| "learning_rate": 1.4681467434535356e-06, | |
| "loss": 0.2592, | |
| "step": 13245 | |
| }, | |
| { | |
| "epoch": 2.7781269641734756, | |
| "grad_norm": 1.090117335319519, | |
| "learning_rate": 1.4276183612199178e-06, | |
| "loss": 0.2923, | |
| "step": 13260 | |
| }, | |
| { | |
| "epoch": 2.7812696417347578, | |
| "grad_norm": 1.1117249727249146, | |
| "learning_rate": 1.3876491302229011e-06, | |
| "loss": 0.2701, | |
| "step": 13275 | |
| }, | |
| { | |
| "epoch": 2.7844123192960404, | |
| "grad_norm": 1.4228675365447998, | |
| "learning_rate": 1.3482395105767543e-06, | |
| "loss": 0.3066, | |
| "step": 13290 | |
| }, | |
| { | |
| "epoch": 2.7875549968573226, | |
| "grad_norm": 0.9276790618896484, | |
| "learning_rate": 1.3093899559536272e-06, | |
| "loss": 0.2437, | |
| "step": 13305 | |
| }, | |
| { | |
| "epoch": 2.7906976744186047, | |
| "grad_norm": 1.1724159717559814, | |
| "learning_rate": 1.2711009135783825e-06, | |
| "loss": 0.3051, | |
| "step": 13320 | |
| }, | |
| { | |
| "epoch": 2.793840351979887, | |
| "grad_norm": 0.9188593029975891, | |
| "learning_rate": 1.2333728242234333e-06, | |
| "loss": 0.3214, | |
| "step": 13335 | |
| }, | |
| { | |
| "epoch": 2.796983029541169, | |
| "grad_norm": 1.084934949874878, | |
| "learning_rate": 1.196206122203647e-06, | |
| "loss": 0.2653, | |
| "step": 13350 | |
| }, | |
| { | |
| "epoch": 2.8001257071024512, | |
| "grad_norm": 1.041142225265503, | |
| "learning_rate": 1.1596012353713604e-06, | |
| "loss": 0.2879, | |
| "step": 13365 | |
| }, | |
| { | |
| "epoch": 2.8032683846637334, | |
| "grad_norm": 1.026824951171875, | |
| "learning_rate": 1.1235585851114726e-06, | |
| "loss": 0.3006, | |
| "step": 13380 | |
| }, | |
| { | |
| "epoch": 2.8064110622250156, | |
| "grad_norm": 1.143835425376892, | |
| "learning_rate": 1.0880785863365718e-06, | |
| "loss": 0.305, | |
| "step": 13395 | |
| }, | |
| { | |
| "epoch": 2.809553739786298, | |
| "grad_norm": 0.5169873833656311, | |
| "learning_rate": 1.0531616474821649e-06, | |
| "loss": 0.2878, | |
| "step": 13410 | |
| }, | |
| { | |
| "epoch": 2.8126964173475804, | |
| "grad_norm": 1.1536767482757568, | |
| "learning_rate": 1.0188081705019558e-06, | |
| "loss": 0.2877, | |
| "step": 13425 | |
| }, | |
| { | |
| "epoch": 2.8158390949088625, | |
| "grad_norm": 0.9985389113426208, | |
| "learning_rate": 9.850185508632704e-07, | |
| "loss": 0.3113, | |
| "step": 13440 | |
| }, | |
| { | |
| "epoch": 2.8189817724701447, | |
| "grad_norm": 0.9148264527320862, | |
| "learning_rate": 9.517931775424593e-07, | |
| "loss": 0.3117, | |
| "step": 13455 | |
| }, | |
| { | |
| "epoch": 2.822124450031427, | |
| "grad_norm": 1.1424579620361328, | |
| "learning_rate": 9.191324330204199e-07, | |
| "loss": 0.2721, | |
| "step": 13470 | |
| }, | |
| { | |
| "epoch": 2.825267127592709, | |
| "grad_norm": 1.054230809211731, | |
| "learning_rate": 8.870366932782093e-07, | |
| "loss": 0.303, | |
| "step": 13485 | |
| }, | |
| { | |
| "epoch": 2.828409805153991, | |
| "grad_norm": 1.211416482925415, | |
| "learning_rate": 8.555063277927378e-07, | |
| "loss": 0.2932, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 2.8315524827152734, | |
| "grad_norm": 1.4953478574752808, | |
| "learning_rate": 8.24541699532455e-07, | |
| "loss": 0.3246, | |
| "step": 13515 | |
| }, | |
| { | |
| "epoch": 2.8346951602765555, | |
| "grad_norm": 0.773501455783844, | |
| "learning_rate": 7.94143164953226e-07, | |
| "loss": 0.2777, | |
| "step": 13530 | |
| }, | |
| { | |
| "epoch": 2.8378378378378377, | |
| "grad_norm": 0.6173717379570007, | |
| "learning_rate": 7.643110739942172e-07, | |
| "loss": 0.3181, | |
| "step": 13545 | |
| }, | |
| { | |
| "epoch": 2.84098051539912, | |
| "grad_norm": 1.1255333423614502, | |
| "learning_rate": 7.350457700738389e-07, | |
| "loss": 0.2954, | |
| "step": 13560 | |
| }, | |
| { | |
| "epoch": 2.844123192960402, | |
| "grad_norm": 1.1932814121246338, | |
| "learning_rate": 7.063475900858263e-07, | |
| "loss": 0.314, | |
| "step": 13575 | |
| }, | |
| { | |
| "epoch": 2.8472658705216842, | |
| "grad_norm": 1.5271681547164917, | |
| "learning_rate": 6.782168643953312e-07, | |
| "loss": 0.3197, | |
| "step": 13590 | |
| }, | |
| { | |
| "epoch": 2.850408548082967, | |
| "grad_norm": 0.9488076567649841, | |
| "learning_rate": 6.506539168351699e-07, | |
| "loss": 0.2993, | |
| "step": 13605 | |
| }, | |
| { | |
| "epoch": 2.853551225644249, | |
| "grad_norm": 1.015404462814331, | |
| "learning_rate": 6.236590647020202e-07, | |
| "loss": 0.2831, | |
| "step": 13620 | |
| }, | |
| { | |
| "epoch": 2.856693903205531, | |
| "grad_norm": 0.6510112881660461, | |
| "learning_rate": 5.972326187528299e-07, | |
| "loss": 0.2806, | |
| "step": 13635 | |
| }, | |
| { | |
| "epoch": 2.8598365807668134, | |
| "grad_norm": 1.1119881868362427, | |
| "learning_rate": 5.7137488320122e-07, | |
| "loss": 0.2625, | |
| "step": 13650 | |
| }, | |
| { | |
| "epoch": 2.8629792583280955, | |
| "grad_norm": 1.0891669988632202, | |
| "learning_rate": 5.460861557139818e-07, | |
| "loss": 0.2913, | |
| "step": 13665 | |
| }, | |
| { | |
| "epoch": 2.8661219358893777, | |
| "grad_norm": 1.3575654029846191, | |
| "learning_rate": 5.213667274076461e-07, | |
| "loss": 0.3209, | |
| "step": 13680 | |
| }, | |
| { | |
| "epoch": 2.86926461345066, | |
| "grad_norm": 0.7372342944145203, | |
| "learning_rate": 4.972168828451251e-07, | |
| "loss": 0.2798, | |
| "step": 13695 | |
| }, | |
| { | |
| "epoch": 2.872407291011942, | |
| "grad_norm": 1.258745551109314, | |
| "learning_rate": 4.736369000324703e-07, | |
| "loss": 0.3125, | |
| "step": 13710 | |
| }, | |
| { | |
| "epoch": 2.8755499685732246, | |
| "grad_norm": 0.7658424973487854, | |
| "learning_rate": 4.506270504156307e-07, | |
| "loss": 0.2501, | |
| "step": 13725 | |
| }, | |
| { | |
| "epoch": 2.878692646134507, | |
| "grad_norm": 1.225644826889038, | |
| "learning_rate": 4.281875988773554e-07, | |
| "loss": 0.2975, | |
| "step": 13740 | |
| }, | |
| { | |
| "epoch": 2.881835323695789, | |
| "grad_norm": 1.0335606336593628, | |
| "learning_rate": 4.063188037341348e-07, | |
| "loss": 0.2852, | |
| "step": 13755 | |
| }, | |
| { | |
| "epoch": 2.884978001257071, | |
| "grad_norm": 0.8567134737968445, | |
| "learning_rate": 3.8502091673322526e-07, | |
| "loss": 0.2584, | |
| "step": 13770 | |
| }, | |
| { | |
| "epoch": 2.8881206788183533, | |
| "grad_norm": 0.8661710023880005, | |
| "learning_rate": 3.642941830497515e-07, | |
| "loss": 0.3128, | |
| "step": 13785 | |
| }, | |
| { | |
| "epoch": 2.8912633563796355, | |
| "grad_norm": 1.1629458665847778, | |
| "learning_rate": 3.441388412838864e-07, | |
| "loss": 0.2919, | |
| "step": 13800 | |
| }, | |
| { | |
| "epoch": 2.8944060339409177, | |
| "grad_norm": 0.9116327166557312, | |
| "learning_rate": 3.2455512345811457e-07, | |
| "loss": 0.2464, | |
| "step": 13815 | |
| }, | |
| { | |
| "epoch": 2.8975487115022, | |
| "grad_norm": 0.8351930975914001, | |
| "learning_rate": 3.055432550145398e-07, | |
| "loss": 0.3138, | |
| "step": 13830 | |
| }, | |
| { | |
| "epoch": 2.900691389063482, | |
| "grad_norm": 0.8611274361610413, | |
| "learning_rate": 2.871034548122986e-07, | |
| "loss": 0.2675, | |
| "step": 13845 | |
| }, | |
| { | |
| "epoch": 2.903834066624764, | |
| "grad_norm": 1.021216630935669, | |
| "learning_rate": 2.692359351250506e-07, | |
| "loss": 0.2545, | |
| "step": 13860 | |
| }, | |
| { | |
| "epoch": 2.9069767441860463, | |
| "grad_norm": 0.9064350128173828, | |
| "learning_rate": 2.5194090163853103e-07, | |
| "loss": 0.2813, | |
| "step": 13875 | |
| }, | |
| { | |
| "epoch": 2.9101194217473285, | |
| "grad_norm": 0.7603162527084351, | |
| "learning_rate": 2.3521855344816323e-07, | |
| "loss": 0.2837, | |
| "step": 13890 | |
| }, | |
| { | |
| "epoch": 2.9132620993086107, | |
| "grad_norm": 1.0929245948791504, | |
| "learning_rate": 2.1906908305679986e-07, | |
| "loss": 0.3017, | |
| "step": 13905 | |
| }, | |
| { | |
| "epoch": 2.9164047768698933, | |
| "grad_norm": 1.078133225440979, | |
| "learning_rate": 2.0349267637247982e-07, | |
| "loss": 0.2812, | |
| "step": 13920 | |
| }, | |
| { | |
| "epoch": 2.9195474544311755, | |
| "grad_norm": 0.6622474789619446, | |
| "learning_rate": 1.8848951270630244e-07, | |
| "loss": 0.2775, | |
| "step": 13935 | |
| }, | |
| { | |
| "epoch": 2.9226901319924576, | |
| "grad_norm": 0.8766260147094727, | |
| "learning_rate": 1.7405976477035124e-07, | |
| "loss": 0.2694, | |
| "step": 13950 | |
| }, | |
| { | |
| "epoch": 2.92583280955374, | |
| "grad_norm": 1.1658825874328613, | |
| "learning_rate": 1.6020359867572333e-07, | |
| "loss": 0.2946, | |
| "step": 13965 | |
| }, | |
| { | |
| "epoch": 2.928975487115022, | |
| "grad_norm": 1.0801419019699097, | |
| "learning_rate": 1.469211739306031e-07, | |
| "loss": 0.3458, | |
| "step": 13980 | |
| }, | |
| { | |
| "epoch": 2.932118164676304, | |
| "grad_norm": 1.0484652519226074, | |
| "learning_rate": 1.3421264343843054e-07, | |
| "loss": 0.3075, | |
| "step": 13995 | |
| }, | |
| { | |
| "epoch": 2.9352608422375863, | |
| "grad_norm": 1.168779730796814, | |
| "learning_rate": 1.2207815349614128e-07, | |
| "loss": 0.2848, | |
| "step": 14010 | |
| }, | |
| { | |
| "epoch": 2.9384035197988685, | |
| "grad_norm": 0.684332013130188, | |
| "learning_rate": 1.105178437924792e-07, | |
| "loss": 0.2766, | |
| "step": 14025 | |
| }, | |
| { | |
| "epoch": 2.941546197360151, | |
| "grad_norm": 0.8341169953346252, | |
| "learning_rate": 9.953184740639222e-08, | |
| "loss": 0.2733, | |
| "step": 14040 | |
| }, | |
| { | |
| "epoch": 2.9446888749214333, | |
| "grad_norm": 0.9254804849624634, | |
| "learning_rate": 8.91202908055e-08, | |
| "loss": 0.2501, | |
| "step": 14055 | |
| }, | |
| { | |
| "epoch": 2.9478315524827154, | |
| "grad_norm": 1.1126680374145508, | |
| "learning_rate": 7.928329384463418e-08, | |
| "loss": 0.3106, | |
| "step": 14070 | |
| }, | |
| { | |
| "epoch": 2.9509742300439976, | |
| "grad_norm": 0.7974324226379395, | |
| "learning_rate": 7.002096976446715e-08, | |
| "loss": 0.2767, | |
| "step": 14085 | |
| }, | |
| { | |
| "epoch": 2.95411690760528, | |
| "grad_norm": 1.1926546096801758, | |
| "learning_rate": 6.133342519020202e-08, | |
| "loss": 0.3035, | |
| "step": 14100 | |
| }, | |
| { | |
| "epoch": 2.957259585166562, | |
| "grad_norm": 1.0937379598617554, | |
| "learning_rate": 5.322076013034027e-08, | |
| "loss": 0.2464, | |
| "step": 14115 | |
| }, | |
| { | |
| "epoch": 2.960402262727844, | |
| "grad_norm": 0.7003195285797119, | |
| "learning_rate": 4.568306797554378e-08, | |
| "loss": 0.2879, | |
| "step": 14130 | |
| }, | |
| { | |
| "epoch": 2.9635449402891263, | |
| "grad_norm": 1.247180461883545, | |
| "learning_rate": 3.872043549754678e-08, | |
| "loss": 0.2745, | |
| "step": 14145 | |
| }, | |
| { | |
| "epoch": 2.9666876178504085, | |
| "grad_norm": 1.021088719367981, | |
| "learning_rate": 3.233294284816224e-08, | |
| "loss": 0.2661, | |
| "step": 14160 | |
| }, | |
| { | |
| "epoch": 2.9698302954116906, | |
| "grad_norm": 1.149630069732666, | |
| "learning_rate": 2.652066355836591e-08, | |
| "loss": 0.3032, | |
| "step": 14175 | |
| }, | |
| { | |
| "epoch": 2.972972972972973, | |
| "grad_norm": 1.1792501211166382, | |
| "learning_rate": 2.128366453743591e-08, | |
| "loss": 0.2652, | |
| "step": 14190 | |
| }, | |
| { | |
| "epoch": 2.976115650534255, | |
| "grad_norm": 1.1864453554153442, | |
| "learning_rate": 1.662200607219777e-08, | |
| "loss": 0.2712, | |
| "step": 14205 | |
| }, | |
| { | |
| "epoch": 2.979258328095537, | |
| "grad_norm": 1.6070250272750854, | |
| "learning_rate": 1.2535741826313897e-08, | |
| "loss": 0.2848, | |
| "step": 14220 | |
| }, | |
| { | |
| "epoch": 2.9824010056568198, | |
| "grad_norm": 0.9383937120437622, | |
| "learning_rate": 9.024918839678486e-09, | |
| "loss": 0.2689, | |
| "step": 14235 | |
| }, | |
| { | |
| "epoch": 2.985543683218102, | |
| "grad_norm": 0.9039358496665955, | |
| "learning_rate": 6.089577527873535e-09, | |
| "loss": 0.2109, | |
| "step": 14250 | |
| }, | |
| { | |
| "epoch": 2.988686360779384, | |
| "grad_norm": 0.8809177279472351, | |
| "learning_rate": 3.729751681702531e-09, | |
| "loss": 0.2992, | |
| "step": 14265 | |
| }, | |
| { | |
| "epoch": 2.9918290383406663, | |
| "grad_norm": 1.0034148693084717, | |
| "learning_rate": 1.94546846679633e-09, | |
| "loss": 0.2721, | |
| "step": 14280 | |
| }, | |
| { | |
| "epoch": 2.9949717159019484, | |
| "grad_norm": 0.8567355871200562, | |
| "learning_rate": 7.367484233133937e-10, | |
| "loss": 0.2449, | |
| "step": 14295 | |
| }, | |
| { | |
| "epoch": 2.9981143934632306, | |
| "grad_norm": 1.462417721748352, | |
| "learning_rate": 1.0360546568444207e-10, | |
| "loss": 0.2634, | |
| "step": 14310 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "step": 14319, | |
| "total_flos": 2.7323814530514944e+18, | |
| "train_loss": 0.39260489724686265, | |
| "train_runtime": 12699.4551, | |
| "train_samples_per_second": 4.51, | |
| "train_steps_per_second": 1.128 | |
| } | |
| ], | |
| "logging_steps": 15, | |
| "max_steps": 14319, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 10000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.7323814530514944e+18, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |