| { | |
| "best_metric": 0.8853943711763073, | |
| "best_model_checkpoint": "/workspace/previous_works/M3D/LaMed/output/LaMed-Llama3-8B-finetune-0000/checkpoint-12888", | |
| "epoch": 3.0, | |
| "eval_steps": 4296, | |
| "global_step": 14319, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0031426775612822125, | |
| "grad_norm": 6.532504558563232, | |
| "learning_rate": 1.744186046511628e-06, | |
| "loss": 1.9456, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.006285355122564425, | |
| "grad_norm": 4.389227867126465, | |
| "learning_rate": 3.488372093023256e-06, | |
| "loss": 1.8427, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.009428032683846637, | |
| "grad_norm": 3.4557132720947266, | |
| "learning_rate": 5.232558139534884e-06, | |
| "loss": 1.6337, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.01257071024512885, | |
| "grad_norm": 3.462625503540039, | |
| "learning_rate": 6.976744186046512e-06, | |
| "loss": 1.3449, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.01571338780641106, | |
| "grad_norm": 3.7610018253326416, | |
| "learning_rate": 8.72093023255814e-06, | |
| "loss": 1.1347, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.018856065367693273, | |
| "grad_norm": 3.2558743953704834, | |
| "learning_rate": 1.0465116279069768e-05, | |
| "loss": 0.9932, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.02199874292897549, | |
| "grad_norm": 4.160295486450195, | |
| "learning_rate": 1.2209302325581395e-05, | |
| "loss": 0.9954, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.0251414204902577, | |
| "grad_norm": 3.3803467750549316, | |
| "learning_rate": 1.3953488372093024e-05, | |
| "loss": 0.8322, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.028284098051539912, | |
| "grad_norm": 3.2412078380584717, | |
| "learning_rate": 1.569767441860465e-05, | |
| "loss": 0.8286, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.03142677561282212, | |
| "grad_norm": 3.4582881927490234, | |
| "learning_rate": 1.744186046511628e-05, | |
| "loss": 0.7777, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.034569453174104335, | |
| "grad_norm": 3.038137435913086, | |
| "learning_rate": 1.918604651162791e-05, | |
| "loss": 0.7253, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.03771213073538655, | |
| "grad_norm": 3.4821434020996094, | |
| "learning_rate": 2.0930232558139536e-05, | |
| "loss": 0.7581, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.04085480829666876, | |
| "grad_norm": 4.621170520782471, | |
| "learning_rate": 2.2674418604651163e-05, | |
| "loss": 0.7054, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.04399748585795098, | |
| "grad_norm": 2.803231716156006, | |
| "learning_rate": 2.441860465116279e-05, | |
| "loss": 0.7732, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.04714016341923319, | |
| "grad_norm": 3.1358466148376465, | |
| "learning_rate": 2.616279069767442e-05, | |
| "loss": 0.6582, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.0502828409805154, | |
| "grad_norm": 2.628765106201172, | |
| "learning_rate": 2.7906976744186048e-05, | |
| "loss": 0.6487, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.05342551854179761, | |
| "grad_norm": 3.6059532165527344, | |
| "learning_rate": 2.9651162790697678e-05, | |
| "loss": 0.589, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.056568196103079824, | |
| "grad_norm": 2.951493263244629, | |
| "learning_rate": 3.13953488372093e-05, | |
| "loss": 0.6081, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.059710873664362035, | |
| "grad_norm": 2.9226279258728027, | |
| "learning_rate": 3.313953488372093e-05, | |
| "loss": 0.6117, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.06285355122564425, | |
| "grad_norm": 3.403846263885498, | |
| "learning_rate": 3.488372093023256e-05, | |
| "loss": 0.6731, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.06599622878692646, | |
| "grad_norm": 2.577772617340088, | |
| "learning_rate": 3.662790697674418e-05, | |
| "loss": 0.6461, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.06913890634820867, | |
| "grad_norm": 3.0141305923461914, | |
| "learning_rate": 3.837209302325582e-05, | |
| "loss": 0.6386, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.07228158390949088, | |
| "grad_norm": 2.3152832984924316, | |
| "learning_rate": 4.0116279069767444e-05, | |
| "loss": 0.5524, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.0754242614707731, | |
| "grad_norm": 2.8160572052001953, | |
| "learning_rate": 4.186046511627907e-05, | |
| "loss": 0.6205, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.0785669390320553, | |
| "grad_norm": 2.3307974338531494, | |
| "learning_rate": 4.36046511627907e-05, | |
| "loss": 0.6004, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.08170961659333752, | |
| "grad_norm": 2.2888669967651367, | |
| "learning_rate": 4.5348837209302326e-05, | |
| "loss": 0.5461, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.08485229415461974, | |
| "grad_norm": 2.36181378364563, | |
| "learning_rate": 4.709302325581396e-05, | |
| "loss": 0.5971, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.08799497171590195, | |
| "grad_norm": 2.1626923084259033, | |
| "learning_rate": 4.883720930232558e-05, | |
| "loss": 0.5446, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.09113764927718417, | |
| "grad_norm": 2.3800854682922363, | |
| "learning_rate": 4.999998401149839e-05, | |
| "loss": 0.6413, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.09428032683846638, | |
| "grad_norm": 2.2933521270751953, | |
| "learning_rate": 4.999974418438328e-05, | |
| "loss": 0.5955, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.09742300439974859, | |
| "grad_norm": 2.338463306427002, | |
| "learning_rate": 4.999921656742949e-05, | |
| "loss": 0.5819, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.1005656819610308, | |
| "grad_norm": 2.9759883880615234, | |
| "learning_rate": 4.9998401166710804e-05, | |
| "loss": 0.5898, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.10370835952231301, | |
| "grad_norm": 2.243450880050659, | |
| "learning_rate": 4.999729799161389e-05, | |
| "loss": 0.623, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.10685103708359522, | |
| "grad_norm": 2.647433280944824, | |
| "learning_rate": 4.9995907054838166e-05, | |
| "loss": 0.5426, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.10999371464487744, | |
| "grad_norm": 2.0400497913360596, | |
| "learning_rate": 4.99942283723957e-05, | |
| "loss": 0.6028, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.11313639220615965, | |
| "grad_norm": 2.918405771255493, | |
| "learning_rate": 4.999226196361099e-05, | |
| "loss": 0.5556, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.11627906976744186, | |
| "grad_norm": 2.571192741394043, | |
| "learning_rate": 4.999000785112079e-05, | |
| "loss": 0.5625, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 0.11942174732872407, | |
| "grad_norm": 2.483920097351074, | |
| "learning_rate": 4.998746606087377e-05, | |
| "loss": 0.6185, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.12256442489000628, | |
| "grad_norm": 2.963257312774658, | |
| "learning_rate": 4.9984636622130285e-05, | |
| "loss": 0.5841, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 0.1257071024512885, | |
| "grad_norm": 2.1929099559783936, | |
| "learning_rate": 4.998151956746204e-05, | |
| "loss": 0.5831, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.12884978001257072, | |
| "grad_norm": 1.990614891052246, | |
| "learning_rate": 4.997811493275165e-05, | |
| "loss": 0.5116, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 0.13199245757385292, | |
| "grad_norm": 2.227179527282715, | |
| "learning_rate": 4.997442275719229e-05, | |
| "loss": 0.59, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.13513513513513514, | |
| "grad_norm": 1.7978647947311401, | |
| "learning_rate": 4.997044308328722e-05, | |
| "loss": 0.4995, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 0.13827781269641734, | |
| "grad_norm": 2.2707254886627197, | |
| "learning_rate": 4.9966175956849306e-05, | |
| "loss": 0.5299, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.14142049025769957, | |
| "grad_norm": 2.358933687210083, | |
| "learning_rate": 4.996162142700045e-05, | |
| "loss": 0.597, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.14456316781898176, | |
| "grad_norm": 2.036271333694458, | |
| "learning_rate": 4.995677954617112e-05, | |
| "loss": 0.5392, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.147705845380264, | |
| "grad_norm": 2.3753066062927246, | |
| "learning_rate": 4.995165037009962e-05, | |
| "loss": 0.5778, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 0.1508485229415462, | |
| "grad_norm": 1.849295973777771, | |
| "learning_rate": 4.994623395783157e-05, | |
| "loss": 0.6238, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.1539912005028284, | |
| "grad_norm": 2.010460376739502, | |
| "learning_rate": 4.994053037171912e-05, | |
| "loss": 0.4691, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 0.1571338780641106, | |
| "grad_norm": 2.023106575012207, | |
| "learning_rate": 4.993453967742032e-05, | |
| "loss": 0.5377, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.16027655562539284, | |
| "grad_norm": 2.195887804031372, | |
| "learning_rate": 4.9928261943898315e-05, | |
| "loss": 0.5639, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 0.16341923318667503, | |
| "grad_norm": 1.9283181428909302, | |
| "learning_rate": 4.9921697243420564e-05, | |
| "loss": 0.5141, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.16656191074795726, | |
| "grad_norm": 1.7017083168029785, | |
| "learning_rate": 4.9914845651557985e-05, | |
| "loss": 0.5132, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 0.16970458830923948, | |
| "grad_norm": 2.1977009773254395, | |
| "learning_rate": 4.990770724718415e-05, | |
| "loss": 0.5415, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.17284726587052168, | |
| "grad_norm": 1.9427462816238403, | |
| "learning_rate": 4.99002821124743e-05, | |
| "loss": 0.5381, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 0.1759899434318039, | |
| "grad_norm": 2.5321216583251953, | |
| "learning_rate": 4.989257033290443e-05, | |
| "loss": 0.5512, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.1791326209930861, | |
| "grad_norm": 1.7843250036239624, | |
| "learning_rate": 4.988457199725034e-05, | |
| "loss": 0.5028, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 0.18227529855436833, | |
| "grad_norm": 2.1043522357940674, | |
| "learning_rate": 4.987628719758655e-05, | |
| "loss": 0.5928, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.18541797611565053, | |
| "grad_norm": 2.0235021114349365, | |
| "learning_rate": 4.9867716029285284e-05, | |
| "loss": 0.5651, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 0.18856065367693275, | |
| "grad_norm": 1.885472059249878, | |
| "learning_rate": 4.985885859101536e-05, | |
| "loss": 0.4879, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.19170333123821495, | |
| "grad_norm": 1.9070786237716675, | |
| "learning_rate": 4.9849714984741046e-05, | |
| "loss": 0.4901, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 0.19484600879949718, | |
| "grad_norm": 2.001380681991577, | |
| "learning_rate": 4.984028531572091e-05, | |
| "loss": 0.574, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.19798868636077938, | |
| "grad_norm": 1.9602166414260864, | |
| "learning_rate": 4.9830569692506564e-05, | |
| "loss": 0.5307, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 0.2011313639220616, | |
| "grad_norm": 2.094599485397339, | |
| "learning_rate": 4.9820568226941466e-05, | |
| "loss": 0.5821, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.2042740414833438, | |
| "grad_norm": 2.0091841220855713, | |
| "learning_rate": 4.98102810341596e-05, | |
| "loss": 0.5969, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 0.20741671904462602, | |
| "grad_norm": 2.306108236312866, | |
| "learning_rate": 4.979970823258415e-05, | |
| "loss": 0.5745, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.21055939660590822, | |
| "grad_norm": 1.636775255203247, | |
| "learning_rate": 4.978884994392618e-05, | |
| "loss": 0.6422, | |
| "step": 1005 | |
| }, | |
| { | |
| "epoch": 0.21370207416719045, | |
| "grad_norm": 2.4798927307128906, | |
| "learning_rate": 4.9777706293183154e-05, | |
| "loss": 0.5046, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.21684475172847265, | |
| "grad_norm": 1.804826259613037, | |
| "learning_rate": 4.976627740863756e-05, | |
| "loss": 0.5399, | |
| "step": 1035 | |
| }, | |
| { | |
| "epoch": 0.21998742928975487, | |
| "grad_norm": 2.0178399085998535, | |
| "learning_rate": 4.975456342185544e-05, | |
| "loss": 0.5123, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.2231301068510371, | |
| "grad_norm": 2.50925350189209, | |
| "learning_rate": 4.9742564467684805e-05, | |
| "loss": 0.4928, | |
| "step": 1065 | |
| }, | |
| { | |
| "epoch": 0.2262727844123193, | |
| "grad_norm": 1.973009705543518, | |
| "learning_rate": 4.9730280684254166e-05, | |
| "loss": 0.5736, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.22941546197360152, | |
| "grad_norm": 1.8204375505447388, | |
| "learning_rate": 4.971771221297088e-05, | |
| "loss": 0.4693, | |
| "step": 1095 | |
| }, | |
| { | |
| "epoch": 0.23255813953488372, | |
| "grad_norm": 2.157780647277832, | |
| "learning_rate": 4.970485919851958e-05, | |
| "loss": 0.5993, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.23570081709616594, | |
| "grad_norm": 2.113952398300171, | |
| "learning_rate": 4.9691721788860433e-05, | |
| "loss": 0.5987, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 0.23884349465744814, | |
| "grad_norm": 2.577479124069214, | |
| "learning_rate": 4.967830013522753e-05, | |
| "loss": 0.5443, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.24198617221873037, | |
| "grad_norm": 1.7032134532928467, | |
| "learning_rate": 4.966459439212706e-05, | |
| "loss": 0.5301, | |
| "step": 1155 | |
| }, | |
| { | |
| "epoch": 0.24512884978001256, | |
| "grad_norm": 1.8560705184936523, | |
| "learning_rate": 4.965060471733559e-05, | |
| "loss": 0.5027, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.2482715273412948, | |
| "grad_norm": 1.7248977422714233, | |
| "learning_rate": 4.963633127189821e-05, | |
| "loss": 0.5522, | |
| "step": 1185 | |
| }, | |
| { | |
| "epoch": 0.251414204902577, | |
| "grad_norm": 1.6348320245742798, | |
| "learning_rate": 4.9621774220126694e-05, | |
| "loss": 0.48, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.2545568824638592, | |
| "grad_norm": 1.7352231740951538, | |
| "learning_rate": 4.960693372959764e-05, | |
| "loss": 0.5886, | |
| "step": 1215 | |
| }, | |
| { | |
| "epoch": 0.25769956002514144, | |
| "grad_norm": 2.1465370655059814, | |
| "learning_rate": 4.959180997115049e-05, | |
| "loss": 0.5238, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.2608422375864236, | |
| "grad_norm": 1.7073941230773926, | |
| "learning_rate": 4.957640311888557e-05, | |
| "loss": 0.487, | |
| "step": 1245 | |
| }, | |
| { | |
| "epoch": 0.26398491514770583, | |
| "grad_norm": 1.8688887357711792, | |
| "learning_rate": 4.9560713350162137e-05, | |
| "loss": 0.5792, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.26712759270898806, | |
| "grad_norm": 2.24149227142334, | |
| "learning_rate": 4.9544740845596254e-05, | |
| "loss": 0.4613, | |
| "step": 1275 | |
| }, | |
| { | |
| "epoch": 0.2702702702702703, | |
| "grad_norm": 1.6652510166168213, | |
| "learning_rate": 4.9528485789058805e-05, | |
| "loss": 0.4311, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.27341294783155246, | |
| "grad_norm": 1.6432390213012695, | |
| "learning_rate": 4.951194836767329e-05, | |
| "loss": 0.5199, | |
| "step": 1305 | |
| }, | |
| { | |
| "epoch": 0.2765556253928347, | |
| "grad_norm": 1.566832184791565, | |
| "learning_rate": 4.9495128771813755e-05, | |
| "loss": 0.4897, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.2796983029541169, | |
| "grad_norm": 1.6974416971206665, | |
| "learning_rate": 4.94780271951025e-05, | |
| "loss": 0.5192, | |
| "step": 1335 | |
| }, | |
| { | |
| "epoch": 0.28284098051539913, | |
| "grad_norm": 1.9494693279266357, | |
| "learning_rate": 4.946064383440798e-05, | |
| "loss": 0.4957, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.28598365807668136, | |
| "grad_norm": 2.093959331512451, | |
| "learning_rate": 4.944297888984239e-05, | |
| "loss": 0.5164, | |
| "step": 1365 | |
| }, | |
| { | |
| "epoch": 0.2891263356379635, | |
| "grad_norm": 1.9262990951538086, | |
| "learning_rate": 4.9425032564759485e-05, | |
| "loss": 0.504, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.29226901319924575, | |
| "grad_norm": 1.8158432245254517, | |
| "learning_rate": 4.940680506575218e-05, | |
| "loss": 0.4649, | |
| "step": 1395 | |
| }, | |
| { | |
| "epoch": 0.295411690760528, | |
| "grad_norm": 1.7862390279769897, | |
| "learning_rate": 4.9388296602650185e-05, | |
| "loss": 0.5356, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.2985543683218102, | |
| "grad_norm": 2.2066242694854736, | |
| "learning_rate": 4.936950738851758e-05, | |
| "loss": 0.5076, | |
| "step": 1425 | |
| }, | |
| { | |
| "epoch": 0.3016970458830924, | |
| "grad_norm": 2.2866694927215576, | |
| "learning_rate": 4.935043763965038e-05, | |
| "loss": 0.4621, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.3048397234443746, | |
| "grad_norm": 1.6391174793243408, | |
| "learning_rate": 4.933108757557402e-05, | |
| "loss": 0.4651, | |
| "step": 1455 | |
| }, | |
| { | |
| "epoch": 0.3079824010056568, | |
| "grad_norm": 2.0994527339935303, | |
| "learning_rate": 4.9311457419040866e-05, | |
| "loss": 0.5533, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.31112507856693905, | |
| "grad_norm": 1.7273298501968384, | |
| "learning_rate": 4.9291547396027594e-05, | |
| "loss": 0.5621, | |
| "step": 1485 | |
| }, | |
| { | |
| "epoch": 0.3142677561282212, | |
| "grad_norm": 2.017411470413208, | |
| "learning_rate": 4.9271357735732655e-05, | |
| "loss": 0.4768, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.31741043368950345, | |
| "grad_norm": 1.7073991298675537, | |
| "learning_rate": 4.925088867057359e-05, | |
| "loss": 0.4989, | |
| "step": 1515 | |
| }, | |
| { | |
| "epoch": 0.32055311125078567, | |
| "grad_norm": 2.071885585784912, | |
| "learning_rate": 4.9230140436184364e-05, | |
| "loss": 0.4984, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.3236957888120679, | |
| "grad_norm": 2.1429100036621094, | |
| "learning_rate": 4.9209113271412665e-05, | |
| "loss": 0.5494, | |
| "step": 1545 | |
| }, | |
| { | |
| "epoch": 0.32683846637335007, | |
| "grad_norm": 1.709663987159729, | |
| "learning_rate": 4.9187807418317144e-05, | |
| "loss": 0.5701, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.3299811439346323, | |
| "grad_norm": 1.9613614082336426, | |
| "learning_rate": 4.9166223122164635e-05, | |
| "loss": 0.4878, | |
| "step": 1575 | |
| }, | |
| { | |
| "epoch": 0.3331238214959145, | |
| "grad_norm": 1.7875553369522095, | |
| "learning_rate": 4.9144360631427325e-05, | |
| "loss": 0.4705, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.33626649905719674, | |
| "grad_norm": 1.9654724597930908, | |
| "learning_rate": 4.9122220197779886e-05, | |
| "loss": 0.4385, | |
| "step": 1605 | |
| }, | |
| { | |
| "epoch": 0.33940917661847897, | |
| "grad_norm": 1.4906249046325684, | |
| "learning_rate": 4.90998020760966e-05, | |
| "loss": 0.4427, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.34255185417976114, | |
| "grad_norm": 1.86861252784729, | |
| "learning_rate": 4.907710652444843e-05, | |
| "loss": 0.4817, | |
| "step": 1635 | |
| }, | |
| { | |
| "epoch": 0.34569453174104336, | |
| "grad_norm": 1.9250684976577759, | |
| "learning_rate": 4.90541338041e-05, | |
| "loss": 0.5351, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.3488372093023256, | |
| "grad_norm": 1.8099184036254883, | |
| "learning_rate": 4.903088417950664e-05, | |
| "loss": 0.5238, | |
| "step": 1665 | |
| }, | |
| { | |
| "epoch": 0.3519798868636078, | |
| "grad_norm": 1.4055452346801758, | |
| "learning_rate": 4.9007357918311315e-05, | |
| "loss": 0.5157, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.35512256442489, | |
| "grad_norm": 1.7121083736419678, | |
| "learning_rate": 4.898355529134156e-05, | |
| "loss": 0.5087, | |
| "step": 1695 | |
| }, | |
| { | |
| "epoch": 0.3582652419861722, | |
| "grad_norm": 1.7254718542099, | |
| "learning_rate": 4.895947657260633e-05, | |
| "loss": 0.482, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.36140791954745444, | |
| "grad_norm": 1.7115743160247803, | |
| "learning_rate": 4.893512203929291e-05, | |
| "loss": 0.5415, | |
| "step": 1725 | |
| }, | |
| { | |
| "epoch": 0.36455059710873666, | |
| "grad_norm": 1.5224454402923584, | |
| "learning_rate": 4.8910491971763625e-05, | |
| "loss": 0.5531, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.36769327467001883, | |
| "grad_norm": 1.4693105220794678, | |
| "learning_rate": 4.888558665355273e-05, | |
| "loss": 0.5007, | |
| "step": 1755 | |
| }, | |
| { | |
| "epoch": 0.37083595223130106, | |
| "grad_norm": 1.823201298713684, | |
| "learning_rate": 4.8860406371363056e-05, | |
| "loss": 0.4568, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.3739786297925833, | |
| "grad_norm": 1.6682394742965698, | |
| "learning_rate": 4.883495141506272e-05, | |
| "loss": 0.5111, | |
| "step": 1785 | |
| }, | |
| { | |
| "epoch": 0.3771213073538655, | |
| "grad_norm": 1.9045063257217407, | |
| "learning_rate": 4.880922207768186e-05, | |
| "loss": 0.5081, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.3802639849151477, | |
| "grad_norm": 1.9026966094970703, | |
| "learning_rate": 4.8783218655409165e-05, | |
| "loss": 0.5094, | |
| "step": 1815 | |
| }, | |
| { | |
| "epoch": 0.3834066624764299, | |
| "grad_norm": 2.230048418045044, | |
| "learning_rate": 4.875694144758852e-05, | |
| "loss": 0.4501, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.38654934003771213, | |
| "grad_norm": 1.8619111776351929, | |
| "learning_rate": 4.873039075671558e-05, | |
| "loss": 0.5595, | |
| "step": 1845 | |
| }, | |
| { | |
| "epoch": 0.38969201759899436, | |
| "grad_norm": 1.0510592460632324, | |
| "learning_rate": 4.8703566888434216e-05, | |
| "loss": 0.4494, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.3928346951602766, | |
| "grad_norm": 1.61916983127594, | |
| "learning_rate": 4.8676470151533054e-05, | |
| "loss": 0.5619, | |
| "step": 1875 | |
| }, | |
| { | |
| "epoch": 0.39597737272155875, | |
| "grad_norm": 2.1640028953552246, | |
| "learning_rate": 4.864910085794192e-05, | |
| "loss": 0.4624, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 0.399120050282841, | |
| "grad_norm": 1.8915683031082153, | |
| "learning_rate": 4.8621459322728216e-05, | |
| "loss": 0.4953, | |
| "step": 1905 | |
| }, | |
| { | |
| "epoch": 0.4022627278441232, | |
| "grad_norm": 1.5854873657226562, | |
| "learning_rate": 4.859354586409331e-05, | |
| "loss": 0.4952, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.40540540540540543, | |
| "grad_norm": 1.8864436149597168, | |
| "learning_rate": 4.8565360803368885e-05, | |
| "loss": 0.4643, | |
| "step": 1935 | |
| }, | |
| { | |
| "epoch": 0.4085480829666876, | |
| "grad_norm": 1.7292683124542236, | |
| "learning_rate": 4.853690446501323e-05, | |
| "loss": 0.4995, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.4116907605279698, | |
| "grad_norm": 1.1200498342514038, | |
| "learning_rate": 4.85081771766075e-05, | |
| "loss": 0.4397, | |
| "step": 1965 | |
| }, | |
| { | |
| "epoch": 0.41483343808925205, | |
| "grad_norm": 1.6311380863189697, | |
| "learning_rate": 4.8479179268851934e-05, | |
| "loss": 0.5041, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 0.4179761156505343, | |
| "grad_norm": 1.5585182905197144, | |
| "learning_rate": 4.844991107556208e-05, | |
| "loss": 0.4968, | |
| "step": 1995 | |
| }, | |
| { | |
| "epoch": 0.42111879321181644, | |
| "grad_norm": 1.9798181056976318, | |
| "learning_rate": 4.8420372933664934e-05, | |
| "loss": 0.5101, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 0.42426147077309867, | |
| "grad_norm": 1.5805935859680176, | |
| "learning_rate": 4.839056518319507e-05, | |
| "loss": 0.5093, | |
| "step": 2025 | |
| }, | |
| { | |
| "epoch": 0.4274041483343809, | |
| "grad_norm": 1.8099379539489746, | |
| "learning_rate": 4.836048816729068e-05, | |
| "loss": 0.4841, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 0.4305468258956631, | |
| "grad_norm": 1.294607400894165, | |
| "learning_rate": 4.833014223218971e-05, | |
| "loss": 0.5417, | |
| "step": 2055 | |
| }, | |
| { | |
| "epoch": 0.4336895034569453, | |
| "grad_norm": 1.446961760520935, | |
| "learning_rate": 4.8299527727225796e-05, | |
| "loss": 0.4639, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 0.4368321810182275, | |
| "grad_norm": 1.460518479347229, | |
| "learning_rate": 4.826864500482428e-05, | |
| "loss": 0.4648, | |
| "step": 2085 | |
| }, | |
| { | |
| "epoch": 0.43997485857950974, | |
| "grad_norm": 1.3880281448364258, | |
| "learning_rate": 4.823749442049817e-05, | |
| "loss": 0.4185, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.44311753614079197, | |
| "grad_norm": 1.6404091119766235, | |
| "learning_rate": 4.820607633284397e-05, | |
| "loss": 0.4007, | |
| "step": 2115 | |
| }, | |
| { | |
| "epoch": 0.4462602137020742, | |
| "grad_norm": 1.201521873474121, | |
| "learning_rate": 4.8174391103537655e-05, | |
| "loss": 0.4781, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 0.44940289126335636, | |
| "grad_norm": 1.4873559474945068, | |
| "learning_rate": 4.814243909733043e-05, | |
| "loss": 0.4317, | |
| "step": 2145 | |
| }, | |
| { | |
| "epoch": 0.4525455688246386, | |
| "grad_norm": 1.9189249277114868, | |
| "learning_rate": 4.811022068204457e-05, | |
| "loss": 0.5085, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 0.4556882463859208, | |
| "grad_norm": 1.4758615493774414, | |
| "learning_rate": 4.807773622856918e-05, | |
| "loss": 0.4815, | |
| "step": 2175 | |
| }, | |
| { | |
| "epoch": 0.45883092394720304, | |
| "grad_norm": 1.6353334188461304, | |
| "learning_rate": 4.804498611085589e-05, | |
| "loss": 0.4794, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 0.4619736015084852, | |
| "grad_norm": 1.4237501621246338, | |
| "learning_rate": 4.8011970705914634e-05, | |
| "loss": 0.4593, | |
| "step": 2205 | |
| }, | |
| { | |
| "epoch": 0.46511627906976744, | |
| "grad_norm": 1.6772956848144531, | |
| "learning_rate": 4.7978690393809186e-05, | |
| "loss": 0.486, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 0.46825895663104966, | |
| "grad_norm": 1.553051233291626, | |
| "learning_rate": 4.794514555765293e-05, | |
| "loss": 0.4658, | |
| "step": 2235 | |
| }, | |
| { | |
| "epoch": 0.4714016341923319, | |
| "grad_norm": 1.8338069915771484, | |
| "learning_rate": 4.7911336583604306e-05, | |
| "loss": 0.4953, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.47454431175361406, | |
| "grad_norm": 1.431541919708252, | |
| "learning_rate": 4.7877263860862477e-05, | |
| "loss": 0.4442, | |
| "step": 2265 | |
| }, | |
| { | |
| "epoch": 0.4776869893148963, | |
| "grad_norm": 1.120583415031433, | |
| "learning_rate": 4.7842927781662796e-05, | |
| "loss": 0.4537, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 0.4808296668761785, | |
| "grad_norm": 1.380642056465149, | |
| "learning_rate": 4.780832874127228e-05, | |
| "loss": 0.4621, | |
| "step": 2295 | |
| }, | |
| { | |
| "epoch": 0.48397234443746073, | |
| "grad_norm": 1.1469544172286987, | |
| "learning_rate": 4.777346713798512e-05, | |
| "loss": 0.5226, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 0.4871150219987429, | |
| "grad_norm": 1.483512043952942, | |
| "learning_rate": 4.7738343373118e-05, | |
| "loss": 0.5479, | |
| "step": 2325 | |
| }, | |
| { | |
| "epoch": 0.49025769956002513, | |
| "grad_norm": 1.610948920249939, | |
| "learning_rate": 4.770295785100558e-05, | |
| "loss": 0.5046, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 0.49340037712130735, | |
| "grad_norm": 1.3163951635360718, | |
| "learning_rate": 4.7667310978995785e-05, | |
| "loss": 0.4603, | |
| "step": 2355 | |
| }, | |
| { | |
| "epoch": 0.4965430546825896, | |
| "grad_norm": 1.4908734560012817, | |
| "learning_rate": 4.763140316744509e-05, | |
| "loss": 0.4806, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 0.4996857322438718, | |
| "grad_norm": 1.3357776403427124, | |
| "learning_rate": 4.759523482971388e-05, | |
| "loss": 0.471, | |
| "step": 2385 | |
| }, | |
| { | |
| "epoch": 0.502828409805154, | |
| "grad_norm": 1.4438153505325317, | |
| "learning_rate": 4.755880638216161e-05, | |
| "loss": 0.443, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.5059710873664363, | |
| "grad_norm": 1.4169646501541138, | |
| "learning_rate": 4.752211824414205e-05, | |
| "loss": 0.4842, | |
| "step": 2415 | |
| }, | |
| { | |
| "epoch": 0.5091137649277184, | |
| "grad_norm": 1.4930610656738281, | |
| "learning_rate": 4.7485170837998455e-05, | |
| "loss": 0.4815, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 0.5122564424890006, | |
| "grad_norm": 1.5918561220169067, | |
| "learning_rate": 4.74479645890587e-05, | |
| "loss": 0.4372, | |
| "step": 2445 | |
| }, | |
| { | |
| "epoch": 0.5153991200502829, | |
| "grad_norm": 1.6254751682281494, | |
| "learning_rate": 4.7410499925630395e-05, | |
| "loss": 0.4187, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 0.518541797611565, | |
| "grad_norm": 1.5545734167099, | |
| "learning_rate": 4.737277727899591e-05, | |
| "loss": 0.4743, | |
| "step": 2475 | |
| }, | |
| { | |
| "epoch": 0.5216844751728472, | |
| "grad_norm": 1.727158546447754, | |
| "learning_rate": 4.7334797083407475e-05, | |
| "loss": 0.4294, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 0.5248271527341295, | |
| "grad_norm": 1.7546805143356323, | |
| "learning_rate": 4.729655977608214e-05, | |
| "loss": 0.5043, | |
| "step": 2505 | |
| }, | |
| { | |
| "epoch": 0.5279698302954117, | |
| "grad_norm": 1.4232885837554932, | |
| "learning_rate": 4.7258065797196746e-05, | |
| "loss": 0.4729, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 0.531112507856694, | |
| "grad_norm": 1.391065239906311, | |
| "learning_rate": 4.721931558988286e-05, | |
| "loss": 0.4915, | |
| "step": 2535 | |
| }, | |
| { | |
| "epoch": 0.5342551854179761, | |
| "grad_norm": 1.7134276628494263, | |
| "learning_rate": 4.7180309600221706e-05, | |
| "loss": 0.5102, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.5373978629792583, | |
| "grad_norm": 1.5847156047821045, | |
| "learning_rate": 4.714104827723895e-05, | |
| "loss": 0.4785, | |
| "step": 2565 | |
| }, | |
| { | |
| "epoch": 0.5405405405405406, | |
| "grad_norm": 1.3267030715942383, | |
| "learning_rate": 4.7101532072899623e-05, | |
| "loss": 0.5135, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 0.5436832181018227, | |
| "grad_norm": 1.5763999223709106, | |
| "learning_rate": 4.706176144210286e-05, | |
| "loss": 0.4916, | |
| "step": 2595 | |
| }, | |
| { | |
| "epoch": 0.5468258956631049, | |
| "grad_norm": 1.4937148094177246, | |
| "learning_rate": 4.7021736842676687e-05, | |
| "loss": 0.4561, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 0.5499685732243872, | |
| "grad_norm": 1.6091326475143433, | |
| "learning_rate": 4.698145873537274e-05, | |
| "loss": 0.482, | |
| "step": 2625 | |
| }, | |
| { | |
| "epoch": 0.5531112507856694, | |
| "grad_norm": 1.5875076055526733, | |
| "learning_rate": 4.694092758386095e-05, | |
| "loss": 0.4104, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 0.5562539283469516, | |
| "grad_norm": 1.3293397426605225, | |
| "learning_rate": 4.690014385472424e-05, | |
| "loss": 0.4143, | |
| "step": 2655 | |
| }, | |
| { | |
| "epoch": 0.5593966059082338, | |
| "grad_norm": 1.1707426309585571, | |
| "learning_rate": 4.6859108017453136e-05, | |
| "loss": 0.4726, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 0.562539283469516, | |
| "grad_norm": 1.3706302642822266, | |
| "learning_rate": 4.6817820544440346e-05, | |
| "loss": 0.461, | |
| "step": 2685 | |
| }, | |
| { | |
| "epoch": 0.5656819610307983, | |
| "grad_norm": 1.7703521251678467, | |
| "learning_rate": 4.677628191097534e-05, | |
| "loss": 0.5042, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.5688246385920804, | |
| "grad_norm": 1.5359523296356201, | |
| "learning_rate": 4.6734492595238874e-05, | |
| "loss": 0.4192, | |
| "step": 2715 | |
| }, | |
| { | |
| "epoch": 0.5719673161533627, | |
| "grad_norm": 1.700126051902771, | |
| "learning_rate": 4.6692453078297495e-05, | |
| "loss": 0.5095, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 0.5751099937146449, | |
| "grad_norm": 1.4070463180541992, | |
| "learning_rate": 4.665016384409798e-05, | |
| "loss": 0.4779, | |
| "step": 2745 | |
| }, | |
| { | |
| "epoch": 0.578252671275927, | |
| "grad_norm": 1.2797980308532715, | |
| "learning_rate": 4.660762537946178e-05, | |
| "loss": 0.4351, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 0.5813953488372093, | |
| "grad_norm": 1.4518544673919678, | |
| "learning_rate": 4.656483817407944e-05, | |
| "loss": 0.448, | |
| "step": 2775 | |
| }, | |
| { | |
| "epoch": 0.5845380263984915, | |
| "grad_norm": 1.300370216369629, | |
| "learning_rate": 4.652180272050491e-05, | |
| "loss": 0.44, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 0.5876807039597737, | |
| "grad_norm": 1.4460704326629639, | |
| "learning_rate": 4.64785195141499e-05, | |
| "loss": 0.4565, | |
| "step": 2805 | |
| }, | |
| { | |
| "epoch": 0.590823381521056, | |
| "grad_norm": 1.5882294178009033, | |
| "learning_rate": 4.643498905327819e-05, | |
| "loss": 0.5078, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 0.5939660590823381, | |
| "grad_norm": 1.3055689334869385, | |
| "learning_rate": 4.639121183899989e-05, | |
| "loss": 0.5, | |
| "step": 2835 | |
| }, | |
| { | |
| "epoch": 0.5971087366436204, | |
| "grad_norm": 1.4545074701309204, | |
| "learning_rate": 4.6347188375265645e-05, | |
| "loss": 0.4767, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 0.6002514142049026, | |
| "grad_norm": 1.0975799560546875, | |
| "learning_rate": 4.630291916886086e-05, | |
| "loss": 0.4384, | |
| "step": 2865 | |
| }, | |
| { | |
| "epoch": 0.6033940917661847, | |
| "grad_norm": 1.6817741394042969, | |
| "learning_rate": 4.625840472939987e-05, | |
| "loss": 0.5, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 0.606536769327467, | |
| "grad_norm": 1.0438511371612549, | |
| "learning_rate": 4.621364556932005e-05, | |
| "loss": 0.4671, | |
| "step": 2895 | |
| }, | |
| { | |
| "epoch": 0.6096794468887492, | |
| "grad_norm": 1.1330349445343018, | |
| "learning_rate": 4.616864220387592e-05, | |
| "loss": 0.4275, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 0.6128221244500315, | |
| "grad_norm": 1.6542346477508545, | |
| "learning_rate": 4.612339515113324e-05, | |
| "loss": 0.4801, | |
| "step": 2925 | |
| }, | |
| { | |
| "epoch": 0.6159648020113137, | |
| "grad_norm": 1.1006687879562378, | |
| "learning_rate": 4.6077904931963036e-05, | |
| "loss": 0.4756, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 0.6191074795725958, | |
| "grad_norm": 1.3067682981491089, | |
| "learning_rate": 4.603217207003555e-05, | |
| "loss": 0.4416, | |
| "step": 2955 | |
| }, | |
| { | |
| "epoch": 0.6222501571338781, | |
| "grad_norm": 1.2261842489242554, | |
| "learning_rate": 4.598619709181431e-05, | |
| "loss": 0.4276, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 0.6253928346951603, | |
| "grad_norm": 1.4903597831726074, | |
| "learning_rate": 4.593998052654998e-05, | |
| "loss": 0.4972, | |
| "step": 2985 | |
| }, | |
| { | |
| "epoch": 0.6285355122564424, | |
| "grad_norm": 1.4376386404037476, | |
| "learning_rate": 4.589352290627433e-05, | |
| "loss": 0.4568, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.6316781898177247, | |
| "grad_norm": 1.351223111152649, | |
| "learning_rate": 4.584682476579406e-05, | |
| "loss": 0.4858, | |
| "step": 3015 | |
| }, | |
| { | |
| "epoch": 0.6348208673790069, | |
| "grad_norm": 1.364617943763733, | |
| "learning_rate": 4.57998866426847e-05, | |
| "loss": 0.4876, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 0.6379635449402892, | |
| "grad_norm": 1.459356665611267, | |
| "learning_rate": 4.575270907728437e-05, | |
| "loss": 0.478, | |
| "step": 3045 | |
| }, | |
| { | |
| "epoch": 0.6411062225015713, | |
| "grad_norm": 1.6396265029907227, | |
| "learning_rate": 4.5705292612687576e-05, | |
| "loss": 0.529, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 0.6442489000628535, | |
| "grad_norm": 0.960100531578064, | |
| "learning_rate": 4.565763779473898e-05, | |
| "loss": 0.4391, | |
| "step": 3075 | |
| }, | |
| { | |
| "epoch": 0.6473915776241358, | |
| "grad_norm": 1.315019130706787, | |
| "learning_rate": 4.560974517202709e-05, | |
| "loss": 0.4917, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 0.650534255185418, | |
| "grad_norm": 1.5295921564102173, | |
| "learning_rate": 4.556161529587794e-05, | |
| "loss": 0.4924, | |
| "step": 3105 | |
| }, | |
| { | |
| "epoch": 0.6536769327467001, | |
| "grad_norm": 1.1837646961212158, | |
| "learning_rate": 4.551324872034879e-05, | |
| "loss": 0.4493, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 0.6568196103079824, | |
| "grad_norm": 1.4307267665863037, | |
| "learning_rate": 4.5464646002221684e-05, | |
| "loss": 0.468, | |
| "step": 3135 | |
| }, | |
| { | |
| "epoch": 0.6599622878692646, | |
| "grad_norm": 1.155652403831482, | |
| "learning_rate": 4.541580770099709e-05, | |
| "loss": 0.4243, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 0.6631049654305469, | |
| "grad_norm": 1.3834953308105469, | |
| "learning_rate": 4.536673437888743e-05, | |
| "loss": 0.5501, | |
| "step": 3165 | |
| }, | |
| { | |
| "epoch": 0.666247642991829, | |
| "grad_norm": 1.0636712312698364, | |
| "learning_rate": 4.531742660081063e-05, | |
| "loss": 0.4274, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 0.6693903205531112, | |
| "grad_norm": 0.8389808535575867, | |
| "learning_rate": 4.526788493438359e-05, | |
| "loss": 0.4489, | |
| "step": 3195 | |
| }, | |
| { | |
| "epoch": 0.6725329981143935, | |
| "grad_norm": 1.242849349975586, | |
| "learning_rate": 4.5218109949915674e-05, | |
| "loss": 0.5231, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 0.6756756756756757, | |
| "grad_norm": 1.4097121953964233, | |
| "learning_rate": 4.516810222040214e-05, | |
| "loss": 0.4373, | |
| "step": 3225 | |
| }, | |
| { | |
| "epoch": 0.6788183532369579, | |
| "grad_norm": 1.4146395921707153, | |
| "learning_rate": 4.511786232151753e-05, | |
| "loss": 0.4185, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 0.6819610307982401, | |
| "grad_norm": 1.1632105112075806, | |
| "learning_rate": 4.506739083160906e-05, | |
| "loss": 0.4387, | |
| "step": 3255 | |
| }, | |
| { | |
| "epoch": 0.6851037083595223, | |
| "grad_norm": 1.1534103155136108, | |
| "learning_rate": 4.501668833168995e-05, | |
| "loss": 0.4387, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 0.6882463859208046, | |
| "grad_norm": 1.355643391609192, | |
| "learning_rate": 4.496575540543275e-05, | |
| "loss": 0.4568, | |
| "step": 3285 | |
| }, | |
| { | |
| "epoch": 0.6913890634820867, | |
| "grad_norm": 1.2842720746994019, | |
| "learning_rate": 4.49145926391626e-05, | |
| "loss": 0.4486, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.6945317410433689, | |
| "grad_norm": 0.981799840927124, | |
| "learning_rate": 4.48632006218505e-05, | |
| "loss": 0.4268, | |
| "step": 3315 | |
| }, | |
| { | |
| "epoch": 0.6976744186046512, | |
| "grad_norm": 1.5337742567062378, | |
| "learning_rate": 4.481157994510652e-05, | |
| "loss": 0.5001, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 0.7008170961659334, | |
| "grad_norm": 1.4315093755722046, | |
| "learning_rate": 4.475973120317298e-05, | |
| "loss": 0.4779, | |
| "step": 3345 | |
| }, | |
| { | |
| "epoch": 0.7039597737272156, | |
| "grad_norm": 1.181176781654358, | |
| "learning_rate": 4.4707654992917635e-05, | |
| "loss": 0.4312, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 0.7071024512884978, | |
| "grad_norm": 1.5547527074813843, | |
| "learning_rate": 4.465535191382679e-05, | |
| "loss": 0.5246, | |
| "step": 3375 | |
| }, | |
| { | |
| "epoch": 0.71024512884978, | |
| "grad_norm": 1.2100272178649902, | |
| "learning_rate": 4.460282256799839e-05, | |
| "loss": 0.4601, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 0.7133878064110623, | |
| "grad_norm": 1.2901486158370972, | |
| "learning_rate": 4.455006756013511e-05, | |
| "loss": 0.4294, | |
| "step": 3405 | |
| }, | |
| { | |
| "epoch": 0.7165304839723444, | |
| "grad_norm": 1.2931948900222778, | |
| "learning_rate": 4.449708749753736e-05, | |
| "loss": 0.4618, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 0.7196731615336267, | |
| "grad_norm": 1.1794995069503784, | |
| "learning_rate": 4.444388299009633e-05, | |
| "loss": 0.4513, | |
| "step": 3435 | |
| }, | |
| { | |
| "epoch": 0.7228158390949089, | |
| "grad_norm": 0.9884097576141357, | |
| "learning_rate": 4.439045465028695e-05, | |
| "loss": 0.4033, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 0.725958516656191, | |
| "grad_norm": 1.3767797946929932, | |
| "learning_rate": 4.433680309316086e-05, | |
| "loss": 0.5132, | |
| "step": 3465 | |
| }, | |
| { | |
| "epoch": 0.7291011942174733, | |
| "grad_norm": 1.2242072820663452, | |
| "learning_rate": 4.428292893633928e-05, | |
| "loss": 0.4564, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 0.7322438717787555, | |
| "grad_norm": 1.416617512702942, | |
| "learning_rate": 4.422883280000596e-05, | |
| "loss": 0.4765, | |
| "step": 3495 | |
| }, | |
| { | |
| "epoch": 0.7353865493400377, | |
| "grad_norm": 1.5963226556777954, | |
| "learning_rate": 4.417451530690001e-05, | |
| "loss": 0.4593, | |
| "step": 3510 | |
| }, | |
| { | |
| "epoch": 0.73852922690132, | |
| "grad_norm": 1.3153035640716553, | |
| "learning_rate": 4.411997708230872e-05, | |
| "loss": 0.4175, | |
| "step": 3525 | |
| }, | |
| { | |
| "epoch": 0.7416719044626021, | |
| "grad_norm": 1.202329158782959, | |
| "learning_rate": 4.40652187540604e-05, | |
| "loss": 0.4668, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 0.7448145820238844, | |
| "grad_norm": 1.2087334394454956, | |
| "learning_rate": 4.4010240952517115e-05, | |
| "loss": 0.469, | |
| "step": 3555 | |
| }, | |
| { | |
| "epoch": 0.7479572595851666, | |
| "grad_norm": 1.1056499481201172, | |
| "learning_rate": 4.395504431056745e-05, | |
| "loss": 0.4764, | |
| "step": 3570 | |
| }, | |
| { | |
| "epoch": 0.7510999371464487, | |
| "grad_norm": 1.2779186964035034, | |
| "learning_rate": 4.389962946361921e-05, | |
| "loss": 0.3649, | |
| "step": 3585 | |
| }, | |
| { | |
| "epoch": 0.754242614707731, | |
| "grad_norm": 1.545474886894226, | |
| "learning_rate": 4.384399704959211e-05, | |
| "loss": 0.4498, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.7573852922690132, | |
| "grad_norm": 1.0024960041046143, | |
| "learning_rate": 4.378814770891045e-05, | |
| "loss": 0.4717, | |
| "step": 3615 | |
| }, | |
| { | |
| "epoch": 0.7605279698302954, | |
| "grad_norm": 1.3661173582077026, | |
| "learning_rate": 4.373208208449572e-05, | |
| "loss": 0.4662, | |
| "step": 3630 | |
| }, | |
| { | |
| "epoch": 0.7636706473915776, | |
| "grad_norm": 1.1410945653915405, | |
| "learning_rate": 4.3675800821759205e-05, | |
| "loss": 0.5376, | |
| "step": 3645 | |
| }, | |
| { | |
| "epoch": 0.7668133249528598, | |
| "grad_norm": 1.1424890756607056, | |
| "learning_rate": 4.361930456859455e-05, | |
| "loss": 0.4682, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 0.7699560025141421, | |
| "grad_norm": 1.373201847076416, | |
| "learning_rate": 4.3562593975370314e-05, | |
| "loss": 0.4454, | |
| "step": 3675 | |
| }, | |
| { | |
| "epoch": 0.7730986800754243, | |
| "grad_norm": 1.1460034847259521, | |
| "learning_rate": 4.350566969492248e-05, | |
| "loss": 0.4749, | |
| "step": 3690 | |
| }, | |
| { | |
| "epoch": 0.7762413576367064, | |
| "grad_norm": 1.2430229187011719, | |
| "learning_rate": 4.344853238254692e-05, | |
| "loss": 0.4535, | |
| "step": 3705 | |
| }, | |
| { | |
| "epoch": 0.7793840351979887, | |
| "grad_norm": 1.3757741451263428, | |
| "learning_rate": 4.339118269599191e-05, | |
| "loss": 0.41, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 0.7825267127592709, | |
| "grad_norm": 0.9454161524772644, | |
| "learning_rate": 4.333362129545046e-05, | |
| "loss": 0.4454, | |
| "step": 3735 | |
| }, | |
| { | |
| "epoch": 0.7856693903205532, | |
| "grad_norm": 0.9156450033187866, | |
| "learning_rate": 4.327584884355281e-05, | |
| "loss": 0.4719, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 0.7888120678818353, | |
| "grad_norm": 1.2694880962371826, | |
| "learning_rate": 4.321786600535874e-05, | |
| "loss": 0.4304, | |
| "step": 3765 | |
| }, | |
| { | |
| "epoch": 0.7919547454431175, | |
| "grad_norm": 1.2514046430587769, | |
| "learning_rate": 4.315967344834996e-05, | |
| "loss": 0.409, | |
| "step": 3780 | |
| }, | |
| { | |
| "epoch": 0.7950974230043998, | |
| "grad_norm": 1.184391736984253, | |
| "learning_rate": 4.310127184242237e-05, | |
| "loss": 0.4198, | |
| "step": 3795 | |
| }, | |
| { | |
| "epoch": 0.798240100565682, | |
| "grad_norm": 1.2372093200683594, | |
| "learning_rate": 4.304266185987842e-05, | |
| "loss": 0.5023, | |
| "step": 3810 | |
| }, | |
| { | |
| "epoch": 0.8013827781269641, | |
| "grad_norm": 1.340918779373169, | |
| "learning_rate": 4.29838441754193e-05, | |
| "loss": 0.4776, | |
| "step": 3825 | |
| }, | |
| { | |
| "epoch": 0.8045254556882464, | |
| "grad_norm": 1.2824565172195435, | |
| "learning_rate": 4.292481946613721e-05, | |
| "loss": 0.4951, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 0.8076681332495286, | |
| "grad_norm": 1.2031137943267822, | |
| "learning_rate": 4.286558841150757e-05, | |
| "loss": 0.5001, | |
| "step": 3855 | |
| }, | |
| { | |
| "epoch": 0.8108108108108109, | |
| "grad_norm": 1.3976994752883911, | |
| "learning_rate": 4.2806151693381194e-05, | |
| "loss": 0.459, | |
| "step": 3870 | |
| }, | |
| { | |
| "epoch": 0.813953488372093, | |
| "grad_norm": 1.8632055521011353, | |
| "learning_rate": 4.274650999597641e-05, | |
| "loss": 0.4622, | |
| "step": 3885 | |
| }, | |
| { | |
| "epoch": 0.8170961659333752, | |
| "grad_norm": 1.4277501106262207, | |
| "learning_rate": 4.2686664005871226e-05, | |
| "loss": 0.4629, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.8202388434946575, | |
| "grad_norm": 1.189048409461975, | |
| "learning_rate": 4.262661441199541e-05, | |
| "loss": 0.4408, | |
| "step": 3915 | |
| }, | |
| { | |
| "epoch": 0.8233815210559396, | |
| "grad_norm": 1.2833003997802734, | |
| "learning_rate": 4.2566361905622555e-05, | |
| "loss": 0.4064, | |
| "step": 3930 | |
| }, | |
| { | |
| "epoch": 0.8265241986172219, | |
| "grad_norm": 1.1060303449630737, | |
| "learning_rate": 4.250590718036211e-05, | |
| "loss": 0.3962, | |
| "step": 3945 | |
| }, | |
| { | |
| "epoch": 0.8296668761785041, | |
| "grad_norm": 1.0350922346115112, | |
| "learning_rate": 4.2445250932151425e-05, | |
| "loss": 0.4252, | |
| "step": 3960 | |
| }, | |
| { | |
| "epoch": 0.8328095537397863, | |
| "grad_norm": 1.3250532150268555, | |
| "learning_rate": 4.2384393859247726e-05, | |
| "loss": 0.4291, | |
| "step": 3975 | |
| }, | |
| { | |
| "epoch": 0.8359522313010685, | |
| "grad_norm": 1.2099930047988892, | |
| "learning_rate": 4.232333666222006e-05, | |
| "loss": 0.4341, | |
| "step": 3990 | |
| }, | |
| { | |
| "epoch": 0.8390949088623507, | |
| "grad_norm": 1.3332287073135376, | |
| "learning_rate": 4.226208004394127e-05, | |
| "loss": 0.466, | |
| "step": 4005 | |
| }, | |
| { | |
| "epoch": 0.8422375864236329, | |
| "grad_norm": 1.3363186120986938, | |
| "learning_rate": 4.220062470957986e-05, | |
| "loss": 0.4196, | |
| "step": 4020 | |
| }, | |
| { | |
| "epoch": 0.8453802639849152, | |
| "grad_norm": 0.9614083170890808, | |
| "learning_rate": 4.213897136659189e-05, | |
| "loss": 0.4183, | |
| "step": 4035 | |
| }, | |
| { | |
| "epoch": 0.8485229415461973, | |
| "grad_norm": 1.7605079412460327, | |
| "learning_rate": 4.2077120724712844e-05, | |
| "loss": 0.4756, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 0.8516656191074796, | |
| "grad_norm": 1.3952196836471558, | |
| "learning_rate": 4.201507349594946e-05, | |
| "loss": 0.433, | |
| "step": 4065 | |
| }, | |
| { | |
| "epoch": 0.8548082966687618, | |
| "grad_norm": 1.1092714071273804, | |
| "learning_rate": 4.195283039457155e-05, | |
| "loss": 0.4721, | |
| "step": 4080 | |
| }, | |
| { | |
| "epoch": 0.857950974230044, | |
| "grad_norm": 0.9377354979515076, | |
| "learning_rate": 4.189039213710369e-05, | |
| "loss": 0.4666, | |
| "step": 4095 | |
| }, | |
| { | |
| "epoch": 0.8610936517913262, | |
| "grad_norm": 1.2234201431274414, | |
| "learning_rate": 4.1827759442317116e-05, | |
| "loss": 0.4582, | |
| "step": 4110 | |
| }, | |
| { | |
| "epoch": 0.8642363293526084, | |
| "grad_norm": 1.2329143285751343, | |
| "learning_rate": 4.176493303122131e-05, | |
| "loss": 0.4581, | |
| "step": 4125 | |
| }, | |
| { | |
| "epoch": 0.8673790069138906, | |
| "grad_norm": 1.2294172048568726, | |
| "learning_rate": 4.170191362705578e-05, | |
| "loss": 0.4688, | |
| "step": 4140 | |
| }, | |
| { | |
| "epoch": 0.8705216844751729, | |
| "grad_norm": 0.8059648871421814, | |
| "learning_rate": 4.163870195528171e-05, | |
| "loss": 0.3847, | |
| "step": 4155 | |
| }, | |
| { | |
| "epoch": 0.873664362036455, | |
| "grad_norm": 1.3568918704986572, | |
| "learning_rate": 4.157529874357364e-05, | |
| "loss": 0.4839, | |
| "step": 4170 | |
| }, | |
| { | |
| "epoch": 0.8768070395977373, | |
| "grad_norm": 1.33687424659729, | |
| "learning_rate": 4.151170472181103e-05, | |
| "loss": 0.469, | |
| "step": 4185 | |
| }, | |
| { | |
| "epoch": 0.8799497171590195, | |
| "grad_norm": 1.1635092496871948, | |
| "learning_rate": 4.144792062206989e-05, | |
| "loss": 0.4117, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.8830923947203017, | |
| "grad_norm": 0.4810682237148285, | |
| "learning_rate": 4.138394717861438e-05, | |
| "loss": 0.3328, | |
| "step": 4215 | |
| }, | |
| { | |
| "epoch": 0.8862350722815839, | |
| "grad_norm": 1.170903205871582, | |
| "learning_rate": 4.131978512788832e-05, | |
| "loss": 0.5026, | |
| "step": 4230 | |
| }, | |
| { | |
| "epoch": 0.8893777498428661, | |
| "grad_norm": 0.9785465598106384, | |
| "learning_rate": 4.1255435208506695e-05, | |
| "loss": 0.4031, | |
| "step": 4245 | |
| }, | |
| { | |
| "epoch": 0.8925204274041484, | |
| "grad_norm": 1.0040161609649658, | |
| "learning_rate": 4.1190898161247216e-05, | |
| "loss": 0.3992, | |
| "step": 4260 | |
| }, | |
| { | |
| "epoch": 0.8956631049654306, | |
| "grad_norm": 1.2257813215255737, | |
| "learning_rate": 4.112617472904175e-05, | |
| "loss": 0.4431, | |
| "step": 4275 | |
| }, | |
| { | |
| "epoch": 0.8988057825267127, | |
| "grad_norm": 0.9779378771781921, | |
| "learning_rate": 4.106126565696774e-05, | |
| "loss": 0.4387, | |
| "step": 4290 | |
| }, | |
| { | |
| "epoch": 0.9000628535512256, | |
| "eval_accuracy": 0.8749659063444953, | |
| "eval_loss": 0.4478217661380768, | |
| "eval_runtime": 801.5583, | |
| "eval_samples_per_second": 5.97, | |
| "eval_steps_per_second": 1.493, | |
| "step": 4296 | |
| }, | |
| { | |
| "epoch": 0.901948460087995, | |
| "grad_norm": 1.0927642583847046, | |
| "learning_rate": 4.099617169223971e-05, | |
| "loss": 0.4717, | |
| "step": 4305 | |
| }, | |
| { | |
| "epoch": 0.9050911376492772, | |
| "grad_norm": 1.3863451480865479, | |
| "learning_rate": 4.093089358420059e-05, | |
| "loss": 0.4482, | |
| "step": 4320 | |
| }, | |
| { | |
| "epoch": 0.9082338152105593, | |
| "grad_norm": 0.8744410276412964, | |
| "learning_rate": 4.08654320843131e-05, | |
| "loss": 0.4739, | |
| "step": 4335 | |
| }, | |
| { | |
| "epoch": 0.9113764927718416, | |
| "grad_norm": 1.1781022548675537, | |
| "learning_rate": 4.079978794615115e-05, | |
| "loss": 0.408, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 0.9145191703331238, | |
| "grad_norm": 1.225847840309143, | |
| "learning_rate": 4.07339619253911e-05, | |
| "loss": 0.4624, | |
| "step": 4365 | |
| }, | |
| { | |
| "epoch": 0.9176618478944061, | |
| "grad_norm": 1.2807953357696533, | |
| "learning_rate": 4.0667954779803094e-05, | |
| "loss": 0.4506, | |
| "step": 4380 | |
| }, | |
| { | |
| "epoch": 0.9208045254556882, | |
| "grad_norm": 1.3124723434448242, | |
| "learning_rate": 4.0601767269242356e-05, | |
| "loss": 0.4253, | |
| "step": 4395 | |
| }, | |
| { | |
| "epoch": 0.9239472030169704, | |
| "grad_norm": 1.10555899143219, | |
| "learning_rate": 4.053540015564039e-05, | |
| "loss": 0.4078, | |
| "step": 4410 | |
| }, | |
| { | |
| "epoch": 0.9270898805782527, | |
| "grad_norm": 1.0445165634155273, | |
| "learning_rate": 4.046885420299625e-05, | |
| "loss": 0.4157, | |
| "step": 4425 | |
| }, | |
| { | |
| "epoch": 0.9302325581395349, | |
| "grad_norm": 1.0756609439849854, | |
| "learning_rate": 4.040213017736774e-05, | |
| "loss": 0.4494, | |
| "step": 4440 | |
| }, | |
| { | |
| "epoch": 0.933375235700817, | |
| "grad_norm": 1.2414379119873047, | |
| "learning_rate": 4.0335228846862575e-05, | |
| "loss": 0.4544, | |
| "step": 4455 | |
| }, | |
| { | |
| "epoch": 0.9365179132620993, | |
| "grad_norm": 1.2390245199203491, | |
| "learning_rate": 4.026815098162957e-05, | |
| "loss": 0.4086, | |
| "step": 4470 | |
| }, | |
| { | |
| "epoch": 0.9396605908233815, | |
| "grad_norm": 1.250126600265503, | |
| "learning_rate": 4.020089735384973e-05, | |
| "loss": 0.4206, | |
| "step": 4485 | |
| }, | |
| { | |
| "epoch": 0.9428032683846638, | |
| "grad_norm": 1.0727368593215942, | |
| "learning_rate": 4.013346873772743e-05, | |
| "loss": 0.4265, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.9459459459459459, | |
| "grad_norm": 1.2256518602371216, | |
| "learning_rate": 4.0065865909481417e-05, | |
| "loss": 0.4437, | |
| "step": 4515 | |
| }, | |
| { | |
| "epoch": 0.9490886235072281, | |
| "grad_norm": 1.4009459018707275, | |
| "learning_rate": 3.9998089647335933e-05, | |
| "loss": 0.4203, | |
| "step": 4530 | |
| }, | |
| { | |
| "epoch": 0.9522313010685104, | |
| "grad_norm": 1.1759395599365234, | |
| "learning_rate": 3.993014073151175e-05, | |
| "loss": 0.4978, | |
| "step": 4545 | |
| }, | |
| { | |
| "epoch": 0.9553739786297926, | |
| "grad_norm": 1.0505579710006714, | |
| "learning_rate": 3.9862019944217175e-05, | |
| "loss": 0.4191, | |
| "step": 4560 | |
| }, | |
| { | |
| "epoch": 0.9585166561910748, | |
| "grad_norm": 1.3067837953567505, | |
| "learning_rate": 3.9793728069639046e-05, | |
| "loss": 0.4671, | |
| "step": 4575 | |
| }, | |
| { | |
| "epoch": 0.961659333752357, | |
| "grad_norm": 1.2706676721572876, | |
| "learning_rate": 3.972526589393372e-05, | |
| "loss": 0.4288, | |
| "step": 4590 | |
| }, | |
| { | |
| "epoch": 0.9648020113136392, | |
| "grad_norm": 1.1527299880981445, | |
| "learning_rate": 3.965663420521798e-05, | |
| "loss": 0.4697, | |
| "step": 4605 | |
| }, | |
| { | |
| "epoch": 0.9679446888749215, | |
| "grad_norm": 0.8752300143241882, | |
| "learning_rate": 3.9587833793560026e-05, | |
| "loss": 0.4522, | |
| "step": 4620 | |
| }, | |
| { | |
| "epoch": 0.9710873664362036, | |
| "grad_norm": 1.0137310028076172, | |
| "learning_rate": 3.9518865450970346e-05, | |
| "loss": 0.4606, | |
| "step": 4635 | |
| }, | |
| { | |
| "epoch": 0.9742300439974858, | |
| "grad_norm": 1.1071418523788452, | |
| "learning_rate": 3.944972997139257e-05, | |
| "loss": 0.4403, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 0.9773727215587681, | |
| "grad_norm": 1.193814754486084, | |
| "learning_rate": 3.93804281506944e-05, | |
| "loss": 0.4046, | |
| "step": 4665 | |
| }, | |
| { | |
| "epoch": 0.9805153991200503, | |
| "grad_norm": 1.1703835725784302, | |
| "learning_rate": 3.93109607866584e-05, | |
| "loss": 0.3727, | |
| "step": 4680 | |
| }, | |
| { | |
| "epoch": 0.9836580766813325, | |
| "grad_norm": 1.2460951805114746, | |
| "learning_rate": 3.924132867897279e-05, | |
| "loss": 0.4457, | |
| "step": 4695 | |
| }, | |
| { | |
| "epoch": 0.9868007542426147, | |
| "grad_norm": 1.162644624710083, | |
| "learning_rate": 3.9171532629222304e-05, | |
| "loss": 0.4532, | |
| "step": 4710 | |
| }, | |
| { | |
| "epoch": 0.9899434318038969, | |
| "grad_norm": 1.1026623249053955, | |
| "learning_rate": 3.910157344087892e-05, | |
| "loss": 0.4886, | |
| "step": 4725 | |
| }, | |
| { | |
| "epoch": 0.9930861093651792, | |
| "grad_norm": 1.3245232105255127, | |
| "learning_rate": 3.9031451919292616e-05, | |
| "loss": 0.474, | |
| "step": 4740 | |
| }, | |
| { | |
| "epoch": 0.9962287869264613, | |
| "grad_norm": 1.5628905296325684, | |
| "learning_rate": 3.8961168871682116e-05, | |
| "loss": 0.5021, | |
| "step": 4755 | |
| }, | |
| { | |
| "epoch": 0.9993714644877436, | |
| "grad_norm": 1.0988940000534058, | |
| "learning_rate": 3.889072510712557e-05, | |
| "loss": 0.4488, | |
| "step": 4770 | |
| }, | |
| { | |
| "epoch": 1.0025141420490258, | |
| "grad_norm": 1.1718677282333374, | |
| "learning_rate": 3.882012143655126e-05, | |
| "loss": 0.4284, | |
| "step": 4785 | |
| }, | |
| { | |
| "epoch": 1.005656819610308, | |
| "grad_norm": 1.3951458930969238, | |
| "learning_rate": 3.874935867272826e-05, | |
| "loss": 0.4057, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 1.0087994971715901, | |
| "grad_norm": 1.1581798791885376, | |
| "learning_rate": 3.867843763025709e-05, | |
| "loss": 0.4073, | |
| "step": 4815 | |
| }, | |
| { | |
| "epoch": 1.0119421747328725, | |
| "grad_norm": 1.4225468635559082, | |
| "learning_rate": 3.860735912556031e-05, | |
| "loss": 0.4437, | |
| "step": 4830 | |
| }, | |
| { | |
| "epoch": 1.0150848522941547, | |
| "grad_norm": 0.9562087059020996, | |
| "learning_rate": 3.853612397687315e-05, | |
| "loss": 0.4008, | |
| "step": 4845 | |
| }, | |
| { | |
| "epoch": 1.0182275298554369, | |
| "grad_norm": 1.3174970149993896, | |
| "learning_rate": 3.846473300423409e-05, | |
| "loss": 0.4135, | |
| "step": 4860 | |
| }, | |
| { | |
| "epoch": 1.021370207416719, | |
| "grad_norm": 1.4198646545410156, | |
| "learning_rate": 3.839318702947538e-05, | |
| "loss": 0.434, | |
| "step": 4875 | |
| }, | |
| { | |
| "epoch": 1.0245128849780012, | |
| "grad_norm": 1.2705206871032715, | |
| "learning_rate": 3.832148687621365e-05, | |
| "loss": 0.4136, | |
| "step": 4890 | |
| }, | |
| { | |
| "epoch": 1.0276555625392834, | |
| "grad_norm": 1.254346489906311, | |
| "learning_rate": 3.8249633369840346e-05, | |
| "loss": 0.3875, | |
| "step": 4905 | |
| }, | |
| { | |
| "epoch": 1.0307982401005658, | |
| "grad_norm": 1.2936162948608398, | |
| "learning_rate": 3.817762733751231e-05, | |
| "loss": 0.3966, | |
| "step": 4920 | |
| }, | |
| { | |
| "epoch": 1.033940917661848, | |
| "grad_norm": 1.0256013870239258, | |
| "learning_rate": 3.81054696081422e-05, | |
| "loss": 0.4171, | |
| "step": 4935 | |
| }, | |
| { | |
| "epoch": 1.03708359522313, | |
| "grad_norm": 1.2666840553283691, | |
| "learning_rate": 3.803316101238895e-05, | |
| "loss": 0.4003, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 1.0402262727844123, | |
| "grad_norm": 1.2721953392028809, | |
| "learning_rate": 3.796070238264826e-05, | |
| "loss": 0.4034, | |
| "step": 4965 | |
| }, | |
| { | |
| "epoch": 1.0433689503456944, | |
| "grad_norm": 1.24618661403656, | |
| "learning_rate": 3.7888094553042954e-05, | |
| "loss": 0.4406, | |
| "step": 4980 | |
| }, | |
| { | |
| "epoch": 1.0465116279069768, | |
| "grad_norm": 0.923187255859375, | |
| "learning_rate": 3.78153383594134e-05, | |
| "loss": 0.4689, | |
| "step": 4995 | |
| }, | |
| { | |
| "epoch": 1.049654305468259, | |
| "grad_norm": 1.0710513591766357, | |
| "learning_rate": 3.774243463930791e-05, | |
| "loss": 0.3844, | |
| "step": 5010 | |
| }, | |
| { | |
| "epoch": 1.0527969830295412, | |
| "grad_norm": 1.2138617038726807, | |
| "learning_rate": 3.766938423197306e-05, | |
| "loss": 0.3412, | |
| "step": 5025 | |
| }, | |
| { | |
| "epoch": 1.0559396605908233, | |
| "grad_norm": 1.3552145957946777, | |
| "learning_rate": 3.7596187978344056e-05, | |
| "loss": 0.4033, | |
| "step": 5040 | |
| }, | |
| { | |
| "epoch": 1.0590823381521055, | |
| "grad_norm": 1.2156639099121094, | |
| "learning_rate": 3.752284672103503e-05, | |
| "loss": 0.4309, | |
| "step": 5055 | |
| }, | |
| { | |
| "epoch": 1.062225015713388, | |
| "grad_norm": 1.4516615867614746, | |
| "learning_rate": 3.7449361304329384e-05, | |
| "loss": 0.42, | |
| "step": 5070 | |
| }, | |
| { | |
| "epoch": 1.06536769327467, | |
| "grad_norm": 1.2875463962554932, | |
| "learning_rate": 3.737573257417001e-05, | |
| "loss": 0.3772, | |
| "step": 5085 | |
| }, | |
| { | |
| "epoch": 1.0685103708359522, | |
| "grad_norm": 1.2341505289077759, | |
| "learning_rate": 3.730196137814959e-05, | |
| "loss": 0.4058, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 1.0716530483972344, | |
| "grad_norm": 1.193441390991211, | |
| "learning_rate": 3.7228048565500854e-05, | |
| "loss": 0.4121, | |
| "step": 5115 | |
| }, | |
| { | |
| "epoch": 1.0747957259585166, | |
| "grad_norm": 1.274909496307373, | |
| "learning_rate": 3.715399498708676e-05, | |
| "loss": 0.4187, | |
| "step": 5130 | |
| }, | |
| { | |
| "epoch": 1.077938403519799, | |
| "grad_norm": 1.2880769968032837, | |
| "learning_rate": 3.7079801495390715e-05, | |
| "loss": 0.4071, | |
| "step": 5145 | |
| }, | |
| { | |
| "epoch": 1.0810810810810811, | |
| "grad_norm": 0.7923028469085693, | |
| "learning_rate": 3.70054689445068e-05, | |
| "loss": 0.3541, | |
| "step": 5160 | |
| }, | |
| { | |
| "epoch": 1.0842237586423633, | |
| "grad_norm": 1.3296815156936646, | |
| "learning_rate": 3.6930998190129864e-05, | |
| "loss": 0.3166, | |
| "step": 5175 | |
| }, | |
| { | |
| "epoch": 1.0873664362036455, | |
| "grad_norm": 1.1654574871063232, | |
| "learning_rate": 3.685639008954574e-05, | |
| "loss": 0.484, | |
| "step": 5190 | |
| }, | |
| { | |
| "epoch": 1.0905091137649277, | |
| "grad_norm": 1.2645684480667114, | |
| "learning_rate": 3.6781645501621365e-05, | |
| "loss": 0.416, | |
| "step": 5205 | |
| }, | |
| { | |
| "epoch": 1.0936517913262098, | |
| "grad_norm": 1.2940104007720947, | |
| "learning_rate": 3.670676528679483e-05, | |
| "loss": 0.3892, | |
| "step": 5220 | |
| }, | |
| { | |
| "epoch": 1.0967944688874922, | |
| "grad_norm": 1.003873586654663, | |
| "learning_rate": 3.663175030706557e-05, | |
| "loss": 0.4249, | |
| "step": 5235 | |
| }, | |
| { | |
| "epoch": 1.0999371464487744, | |
| "grad_norm": 1.3847322463989258, | |
| "learning_rate": 3.655660142598437e-05, | |
| "loss": 0.3728, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 1.1030798240100566, | |
| "grad_norm": 0.9578964710235596, | |
| "learning_rate": 3.648131950864347e-05, | |
| "loss": 0.3692, | |
| "step": 5265 | |
| }, | |
| { | |
| "epoch": 1.1062225015713387, | |
| "grad_norm": 1.3054499626159668, | |
| "learning_rate": 3.640590542166656e-05, | |
| "loss": 0.3691, | |
| "step": 5280 | |
| }, | |
| { | |
| "epoch": 1.109365179132621, | |
| "grad_norm": 1.1627558469772339, | |
| "learning_rate": 3.633036003319885e-05, | |
| "loss": 0.4018, | |
| "step": 5295 | |
| }, | |
| { | |
| "epoch": 1.1125078566939033, | |
| "grad_norm": 1.445669174194336, | |
| "learning_rate": 3.6254684212897035e-05, | |
| "loss": 0.4158, | |
| "step": 5310 | |
| }, | |
| { | |
| "epoch": 1.1156505342551855, | |
| "grad_norm": 0.9246712327003479, | |
| "learning_rate": 3.617887883191931e-05, | |
| "loss": 0.3393, | |
| "step": 5325 | |
| }, | |
| { | |
| "epoch": 1.1187932118164676, | |
| "grad_norm": 1.249263882637024, | |
| "learning_rate": 3.6102944762915355e-05, | |
| "loss": 0.3863, | |
| "step": 5340 | |
| }, | |
| { | |
| "epoch": 1.1219358893777498, | |
| "grad_norm": 1.1501426696777344, | |
| "learning_rate": 3.602688288001624e-05, | |
| "loss": 0.403, | |
| "step": 5355 | |
| }, | |
| { | |
| "epoch": 1.125078566939032, | |
| "grad_norm": 1.2710976600646973, | |
| "learning_rate": 3.595069405882441e-05, | |
| "loss": 0.4146, | |
| "step": 5370 | |
| }, | |
| { | |
| "epoch": 1.1282212445003144, | |
| "grad_norm": 1.4132471084594727, | |
| "learning_rate": 3.587437917640358e-05, | |
| "loss": 0.3891, | |
| "step": 5385 | |
| }, | |
| { | |
| "epoch": 1.1313639220615965, | |
| "grad_norm": 1.3578236103057861, | |
| "learning_rate": 3.5797939111268665e-05, | |
| "loss": 0.378, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 1.1345065996228787, | |
| "grad_norm": 1.1907520294189453, | |
| "learning_rate": 3.57213747433756e-05, | |
| "loss": 0.379, | |
| "step": 5415 | |
| }, | |
| { | |
| "epoch": 1.1376492771841609, | |
| "grad_norm": 1.0988811254501343, | |
| "learning_rate": 3.5644686954111305e-05, | |
| "loss": 0.3431, | |
| "step": 5430 | |
| }, | |
| { | |
| "epoch": 1.140791954745443, | |
| "grad_norm": 1.3456612825393677, | |
| "learning_rate": 3.556787662628347e-05, | |
| "loss": 0.3863, | |
| "step": 5445 | |
| }, | |
| { | |
| "epoch": 1.1439346323067254, | |
| "grad_norm": 1.257224678993225, | |
| "learning_rate": 3.549094464411042e-05, | |
| "loss": 0.4368, | |
| "step": 5460 | |
| }, | |
| { | |
| "epoch": 1.1470773098680076, | |
| "grad_norm": 1.4249401092529297, | |
| "learning_rate": 3.541389189321092e-05, | |
| "loss": 0.4006, | |
| "step": 5475 | |
| }, | |
| { | |
| "epoch": 1.1502199874292898, | |
| "grad_norm": 1.2512503862380981, | |
| "learning_rate": 3.5336719260594e-05, | |
| "loss": 0.4137, | |
| "step": 5490 | |
| }, | |
| { | |
| "epoch": 1.153362664990572, | |
| "grad_norm": 1.3531768321990967, | |
| "learning_rate": 3.5259427634648737e-05, | |
| "loss": 0.4046, | |
| "step": 5505 | |
| }, | |
| { | |
| "epoch": 1.156505342551854, | |
| "grad_norm": 0.8420467972755432, | |
| "learning_rate": 3.5182017905134e-05, | |
| "loss": 0.3743, | |
| "step": 5520 | |
| }, | |
| { | |
| "epoch": 1.1596480201131363, | |
| "grad_norm": 1.3925787210464478, | |
| "learning_rate": 3.5104490963168274e-05, | |
| "loss": 0.4171, | |
| "step": 5535 | |
| }, | |
| { | |
| "epoch": 1.1627906976744187, | |
| "grad_norm": 1.1061654090881348, | |
| "learning_rate": 3.502684770121932e-05, | |
| "loss": 0.3032, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 1.1659333752357008, | |
| "grad_norm": 1.4722493886947632, | |
| "learning_rate": 3.494908901309396e-05, | |
| "loss": 0.3401, | |
| "step": 5565 | |
| }, | |
| { | |
| "epoch": 1.169076052796983, | |
| "grad_norm": 1.3742226362228394, | |
| "learning_rate": 3.487121579392777e-05, | |
| "loss": 0.394, | |
| "step": 5580 | |
| }, | |
| { | |
| "epoch": 1.1722187303582652, | |
| "grad_norm": 0.6497241258621216, | |
| "learning_rate": 3.479322894017476e-05, | |
| "loss": 0.362, | |
| "step": 5595 | |
| }, | |
| { | |
| "epoch": 1.1753614079195476, | |
| "grad_norm": 1.2617154121398926, | |
| "learning_rate": 3.471512934959709e-05, | |
| "loss": 0.3857, | |
| "step": 5610 | |
| }, | |
| { | |
| "epoch": 1.1785040854808297, | |
| "grad_norm": 1.2584044933319092, | |
| "learning_rate": 3.46369179212547e-05, | |
| "loss": 0.4159, | |
| "step": 5625 | |
| }, | |
| { | |
| "epoch": 1.181646763042112, | |
| "grad_norm": 0.9578741788864136, | |
| "learning_rate": 3.455859555549498e-05, | |
| "loss": 0.4259, | |
| "step": 5640 | |
| }, | |
| { | |
| "epoch": 1.184789440603394, | |
| "grad_norm": 1.0911635160446167, | |
| "learning_rate": 3.448016315394238e-05, | |
| "loss": 0.3585, | |
| "step": 5655 | |
| }, | |
| { | |
| "epoch": 1.1879321181646763, | |
| "grad_norm": 1.2654902935028076, | |
| "learning_rate": 3.440162161948809e-05, | |
| "loss": 0.3954, | |
| "step": 5670 | |
| }, | |
| { | |
| "epoch": 1.1910747957259584, | |
| "grad_norm": 1.2683358192443848, | |
| "learning_rate": 3.432297185627956e-05, | |
| "loss": 0.3946, | |
| "step": 5685 | |
| }, | |
| { | |
| "epoch": 1.1942174732872408, | |
| "grad_norm": 1.0978072881698608, | |
| "learning_rate": 3.424421476971018e-05, | |
| "loss": 0.3866, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 1.197360150848523, | |
| "grad_norm": 1.1124176979064941, | |
| "learning_rate": 3.41653512664088e-05, | |
| "loss": 0.3547, | |
| "step": 5715 | |
| }, | |
| { | |
| "epoch": 1.2005028284098052, | |
| "grad_norm": 1.274763584136963, | |
| "learning_rate": 3.408638225422928e-05, | |
| "loss": 0.3512, | |
| "step": 5730 | |
| }, | |
| { | |
| "epoch": 1.2036455059710873, | |
| "grad_norm": 1.1088907718658447, | |
| "learning_rate": 3.400730864224011e-05, | |
| "loss": 0.3982, | |
| "step": 5745 | |
| }, | |
| { | |
| "epoch": 1.2067881835323695, | |
| "grad_norm": 1.464532494544983, | |
| "learning_rate": 3.392813134071388e-05, | |
| "loss": 0.3889, | |
| "step": 5760 | |
| }, | |
| { | |
| "epoch": 1.2099308610936519, | |
| "grad_norm": 1.2237341403961182, | |
| "learning_rate": 3.3848851261116845e-05, | |
| "loss": 0.433, | |
| "step": 5775 | |
| }, | |
| { | |
| "epoch": 1.213073538654934, | |
| "grad_norm": 1.3050017356872559, | |
| "learning_rate": 3.3769469316098375e-05, | |
| "loss": 0.3904, | |
| "step": 5790 | |
| }, | |
| { | |
| "epoch": 1.2162162162162162, | |
| "grad_norm": 1.3422915935516357, | |
| "learning_rate": 3.368998641948052e-05, | |
| "loss": 0.3807, | |
| "step": 5805 | |
| }, | |
| { | |
| "epoch": 1.2193588937774984, | |
| "grad_norm": 1.2591235637664795, | |
| "learning_rate": 3.3610403486247436e-05, | |
| "loss": 0.3875, | |
| "step": 5820 | |
| }, | |
| { | |
| "epoch": 1.2225015713387806, | |
| "grad_norm": 1.665328860282898, | |
| "learning_rate": 3.353072143253489e-05, | |
| "loss": 0.3621, | |
| "step": 5835 | |
| }, | |
| { | |
| "epoch": 1.2256442489000627, | |
| "grad_norm": 1.1227225065231323, | |
| "learning_rate": 3.345094117561967e-05, | |
| "loss": 0.4314, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 1.2287869264613451, | |
| "grad_norm": 1.421695351600647, | |
| "learning_rate": 3.337106363390907e-05, | |
| "loss": 0.3899, | |
| "step": 5865 | |
| }, | |
| { | |
| "epoch": 1.2319296040226273, | |
| "grad_norm": 1.3472914695739746, | |
| "learning_rate": 3.32910897269303e-05, | |
| "loss": 0.4728, | |
| "step": 5880 | |
| }, | |
| { | |
| "epoch": 1.2350722815839095, | |
| "grad_norm": 1.234174132347107, | |
| "learning_rate": 3.321102037531987e-05, | |
| "loss": 0.4298, | |
| "step": 5895 | |
| }, | |
| { | |
| "epoch": 1.2382149591451916, | |
| "grad_norm": 1.3448835611343384, | |
| "learning_rate": 3.313085650081307e-05, | |
| "loss": 0.3667, | |
| "step": 5910 | |
| }, | |
| { | |
| "epoch": 1.241357636706474, | |
| "grad_norm": 1.5955106019973755, | |
| "learning_rate": 3.305059902623326e-05, | |
| "loss": 0.3968, | |
| "step": 5925 | |
| }, | |
| { | |
| "epoch": 1.2445003142677562, | |
| "grad_norm": 0.8962088823318481, | |
| "learning_rate": 3.297024887548134e-05, | |
| "loss": 0.3656, | |
| "step": 5940 | |
| }, | |
| { | |
| "epoch": 1.2476429918290384, | |
| "grad_norm": 1.0347754955291748, | |
| "learning_rate": 3.288980697352504e-05, | |
| "loss": 0.3872, | |
| "step": 5955 | |
| }, | |
| { | |
| "epoch": 1.2507856693903205, | |
| "grad_norm": 1.20237135887146, | |
| "learning_rate": 3.280927424638832e-05, | |
| "loss": 0.338, | |
| "step": 5970 | |
| }, | |
| { | |
| "epoch": 1.2539283469516027, | |
| "grad_norm": 1.0156171321868896, | |
| "learning_rate": 3.272865162114068e-05, | |
| "loss": 0.3318, | |
| "step": 5985 | |
| }, | |
| { | |
| "epoch": 1.2570710245128849, | |
| "grad_norm": 1.4129784107208252, | |
| "learning_rate": 3.2647940025886525e-05, | |
| "loss": 0.4283, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 1.260213702074167, | |
| "grad_norm": 1.121748924255371, | |
| "learning_rate": 3.256714038975443e-05, | |
| "loss": 0.4193, | |
| "step": 6015 | |
| }, | |
| { | |
| "epoch": 1.2633563796354494, | |
| "grad_norm": 1.0323454141616821, | |
| "learning_rate": 3.248625364288648e-05, | |
| "loss": 0.4382, | |
| "step": 6030 | |
| }, | |
| { | |
| "epoch": 1.2664990571967316, | |
| "grad_norm": 1.118606686592102, | |
| "learning_rate": 3.240528071642756e-05, | |
| "loss": 0.3337, | |
| "step": 6045 | |
| }, | |
| { | |
| "epoch": 1.2696417347580138, | |
| "grad_norm": 1.1677335500717163, | |
| "learning_rate": 3.232422254251463e-05, | |
| "loss": 0.4412, | |
| "step": 6060 | |
| }, | |
| { | |
| "epoch": 1.2727844123192962, | |
| "grad_norm": 1.3037948608398438, | |
| "learning_rate": 3.2243080054265994e-05, | |
| "loss": 0.4399, | |
| "step": 6075 | |
| }, | |
| { | |
| "epoch": 1.2759270898805783, | |
| "grad_norm": 1.1724669933319092, | |
| "learning_rate": 3.216185418577054e-05, | |
| "loss": 0.3618, | |
| "step": 6090 | |
| }, | |
| { | |
| "epoch": 1.2790697674418605, | |
| "grad_norm": 1.173636794090271, | |
| "learning_rate": 3.208054587207703e-05, | |
| "loss": 0.3273, | |
| "step": 6105 | |
| }, | |
| { | |
| "epoch": 1.2822124450031427, | |
| "grad_norm": 1.416745901107788, | |
| "learning_rate": 3.1999156049183297e-05, | |
| "loss": 0.4196, | |
| "step": 6120 | |
| }, | |
| { | |
| "epoch": 1.2853551225644249, | |
| "grad_norm": 1.1313838958740234, | |
| "learning_rate": 3.191768565402549e-05, | |
| "loss": 0.3977, | |
| "step": 6135 | |
| }, | |
| { | |
| "epoch": 1.288497800125707, | |
| "grad_norm": 1.193344235420227, | |
| "learning_rate": 3.1836135624467276e-05, | |
| "loss": 0.4304, | |
| "step": 6150 | |
| }, | |
| { | |
| "epoch": 1.2916404776869892, | |
| "grad_norm": 1.3981118202209473, | |
| "learning_rate": 3.175450689928907e-05, | |
| "loss": 0.3614, | |
| "step": 6165 | |
| }, | |
| { | |
| "epoch": 1.2947831552482716, | |
| "grad_norm": 1.1428194046020508, | |
| "learning_rate": 3.167280041817717e-05, | |
| "loss": 0.4059, | |
| "step": 6180 | |
| }, | |
| { | |
| "epoch": 1.2979258328095538, | |
| "grad_norm": 1.2573941946029663, | |
| "learning_rate": 3.1591017121713027e-05, | |
| "loss": 0.3004, | |
| "step": 6195 | |
| }, | |
| { | |
| "epoch": 1.301068510370836, | |
| "grad_norm": 1.4468852281570435, | |
| "learning_rate": 3.150915795136232e-05, | |
| "loss": 0.43, | |
| "step": 6210 | |
| }, | |
| { | |
| "epoch": 1.304211187932118, | |
| "grad_norm": 1.2576549053192139, | |
| "learning_rate": 3.14272238494642e-05, | |
| "loss": 0.4297, | |
| "step": 6225 | |
| }, | |
| { | |
| "epoch": 1.3073538654934005, | |
| "grad_norm": 1.1931512355804443, | |
| "learning_rate": 3.1345215759220405e-05, | |
| "loss": 0.4177, | |
| "step": 6240 | |
| }, | |
| { | |
| "epoch": 1.3104965430546827, | |
| "grad_norm": 1.3183330297470093, | |
| "learning_rate": 3.126313462468438e-05, | |
| "loss": 0.3405, | |
| "step": 6255 | |
| }, | |
| { | |
| "epoch": 1.3136392206159648, | |
| "grad_norm": 1.4701759815216064, | |
| "learning_rate": 3.118098139075046e-05, | |
| "loss": 0.4108, | |
| "step": 6270 | |
| }, | |
| { | |
| "epoch": 1.316781898177247, | |
| "grad_norm": 1.1573525667190552, | |
| "learning_rate": 3.109875700314296e-05, | |
| "loss": 0.3971, | |
| "step": 6285 | |
| }, | |
| { | |
| "epoch": 1.3199245757385292, | |
| "grad_norm": 1.167579174041748, | |
| "learning_rate": 3.1016462408405304e-05, | |
| "loss": 0.2966, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 1.3230672532998113, | |
| "grad_norm": 1.184237003326416, | |
| "learning_rate": 3.0934098553889095e-05, | |
| "loss": 0.4177, | |
| "step": 6315 | |
| }, | |
| { | |
| "epoch": 1.3262099308610937, | |
| "grad_norm": 1.4354579448699951, | |
| "learning_rate": 3.0851666387743265e-05, | |
| "loss": 0.3421, | |
| "step": 6330 | |
| }, | |
| { | |
| "epoch": 1.329352608422376, | |
| "grad_norm": 1.3448097705841064, | |
| "learning_rate": 3.076916685890311e-05, | |
| "loss": 0.3851, | |
| "step": 6345 | |
| }, | |
| { | |
| "epoch": 1.332495285983658, | |
| "grad_norm": 1.4120362997055054, | |
| "learning_rate": 3.0686600917079386e-05, | |
| "loss": 0.3758, | |
| "step": 6360 | |
| }, | |
| { | |
| "epoch": 1.3356379635449402, | |
| "grad_norm": 1.4061853885650635, | |
| "learning_rate": 3.060396951274739e-05, | |
| "loss": 0.4013, | |
| "step": 6375 | |
| }, | |
| { | |
| "epoch": 1.3387806411062226, | |
| "grad_norm": 0.6553401947021484, | |
| "learning_rate": 3.0521273597136e-05, | |
| "loss": 0.3807, | |
| "step": 6390 | |
| }, | |
| { | |
| "epoch": 1.3419233186675048, | |
| "grad_norm": 1.2400474548339844, | |
| "learning_rate": 3.0438514122216722e-05, | |
| "loss": 0.3544, | |
| "step": 6405 | |
| }, | |
| { | |
| "epoch": 1.345065996228787, | |
| "grad_norm": 1.2030977010726929, | |
| "learning_rate": 3.0355692040692736e-05, | |
| "loss": 0.3586, | |
| "step": 6420 | |
| }, | |
| { | |
| "epoch": 1.3482086737900691, | |
| "grad_norm": 1.2839069366455078, | |
| "learning_rate": 3.0272808305987943e-05, | |
| "loss": 0.3798, | |
| "step": 6435 | |
| }, | |
| { | |
| "epoch": 1.3513513513513513, | |
| "grad_norm": 1.0002667903900146, | |
| "learning_rate": 3.0189863872235968e-05, | |
| "loss": 0.386, | |
| "step": 6450 | |
| }, | |
| { | |
| "epoch": 1.3544940289126335, | |
| "grad_norm": 1.1636244058609009, | |
| "learning_rate": 3.0106859694269196e-05, | |
| "loss": 0.4351, | |
| "step": 6465 | |
| }, | |
| { | |
| "epoch": 1.3576367064739157, | |
| "grad_norm": 0.9394842982292175, | |
| "learning_rate": 3.002379672760776e-05, | |
| "loss": 0.3461, | |
| "step": 6480 | |
| }, | |
| { | |
| "epoch": 1.360779384035198, | |
| "grad_norm": 1.2645450830459595, | |
| "learning_rate": 2.994067592844856e-05, | |
| "loss": 0.3852, | |
| "step": 6495 | |
| }, | |
| { | |
| "epoch": 1.3639220615964802, | |
| "grad_norm": 1.3446435928344727, | |
| "learning_rate": 2.9857498253654232e-05, | |
| "loss": 0.3481, | |
| "step": 6510 | |
| }, | |
| { | |
| "epoch": 1.3670647391577624, | |
| "grad_norm": 1.2624894380569458, | |
| "learning_rate": 2.9774264660742164e-05, | |
| "loss": 0.3987, | |
| "step": 6525 | |
| }, | |
| { | |
| "epoch": 1.3702074167190446, | |
| "grad_norm": 1.2067941427230835, | |
| "learning_rate": 2.9690976107873453e-05, | |
| "loss": 0.3639, | |
| "step": 6540 | |
| }, | |
| { | |
| "epoch": 1.373350094280327, | |
| "grad_norm": 1.1371479034423828, | |
| "learning_rate": 2.960763355384188e-05, | |
| "loss": 0.3925, | |
| "step": 6555 | |
| }, | |
| { | |
| "epoch": 1.3764927718416091, | |
| "grad_norm": 1.0012383460998535, | |
| "learning_rate": 2.9524237958062862e-05, | |
| "loss": 0.4186, | |
| "step": 6570 | |
| }, | |
| { | |
| "epoch": 1.3796354494028913, | |
| "grad_norm": 1.0432685613632202, | |
| "learning_rate": 2.944079028056243e-05, | |
| "loss": 0.3869, | |
| "step": 6585 | |
| }, | |
| { | |
| "epoch": 1.3827781269641735, | |
| "grad_norm": 1.4123237133026123, | |
| "learning_rate": 2.9357291481966155e-05, | |
| "loss": 0.4134, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 1.3859208045254556, | |
| "grad_norm": 1.1969938278198242, | |
| "learning_rate": 2.927374252348812e-05, | |
| "loss": 0.3821, | |
| "step": 6615 | |
| }, | |
| { | |
| "epoch": 1.3890634820867378, | |
| "grad_norm": 1.2030854225158691, | |
| "learning_rate": 2.9190144366919793e-05, | |
| "loss": 0.3853, | |
| "step": 6630 | |
| }, | |
| { | |
| "epoch": 1.3922061596480202, | |
| "grad_norm": 1.1836553812026978, | |
| "learning_rate": 2.9106497974619042e-05, | |
| "loss": 0.3595, | |
| "step": 6645 | |
| }, | |
| { | |
| "epoch": 1.3953488372093024, | |
| "grad_norm": 1.6539838314056396, | |
| "learning_rate": 2.9022804309498975e-05, | |
| "loss": 0.4392, | |
| "step": 6660 | |
| }, | |
| { | |
| "epoch": 1.3984915147705845, | |
| "grad_norm": 1.295224666595459, | |
| "learning_rate": 2.8939064335016913e-05, | |
| "loss": 0.4172, | |
| "step": 6675 | |
| }, | |
| { | |
| "epoch": 1.4016341923318667, | |
| "grad_norm": 1.1444505453109741, | |
| "learning_rate": 2.8855279015163273e-05, | |
| "loss": 0.3857, | |
| "step": 6690 | |
| }, | |
| { | |
| "epoch": 1.404776869893149, | |
| "grad_norm": 1.4091520309448242, | |
| "learning_rate": 2.8771449314450466e-05, | |
| "loss": 0.4384, | |
| "step": 6705 | |
| }, | |
| { | |
| "epoch": 1.4079195474544313, | |
| "grad_norm": 0.9858888983726501, | |
| "learning_rate": 2.8687576197901812e-05, | |
| "loss": 0.342, | |
| "step": 6720 | |
| }, | |
| { | |
| "epoch": 1.4110622250157134, | |
| "grad_norm": 1.2735475301742554, | |
| "learning_rate": 2.860366063104041e-05, | |
| "loss": 0.462, | |
| "step": 6735 | |
| }, | |
| { | |
| "epoch": 1.4142049025769956, | |
| "grad_norm": 1.1398062705993652, | |
| "learning_rate": 2.8519703579878053e-05, | |
| "loss": 0.4295, | |
| "step": 6750 | |
| }, | |
| { | |
| "epoch": 1.4173475801382778, | |
| "grad_norm": 1.4460091590881348, | |
| "learning_rate": 2.8435706010904085e-05, | |
| "loss": 0.3801, | |
| "step": 6765 | |
| }, | |
| { | |
| "epoch": 1.42049025769956, | |
| "grad_norm": 1.573014736175537, | |
| "learning_rate": 2.835166889107425e-05, | |
| "loss": 0.4661, | |
| "step": 6780 | |
| }, | |
| { | |
| "epoch": 1.4236329352608421, | |
| "grad_norm": 1.5855605602264404, | |
| "learning_rate": 2.8267593187799633e-05, | |
| "loss": 0.3628, | |
| "step": 6795 | |
| }, | |
| { | |
| "epoch": 1.4267756128221245, | |
| "grad_norm": 1.3220208883285522, | |
| "learning_rate": 2.8183479868935466e-05, | |
| "loss": 0.3755, | |
| "step": 6810 | |
| }, | |
| { | |
| "epoch": 1.4299182903834067, | |
| "grad_norm": 1.4992631673812866, | |
| "learning_rate": 2.809932990276997e-05, | |
| "loss": 0.4043, | |
| "step": 6825 | |
| }, | |
| { | |
| "epoch": 1.4330609679446888, | |
| "grad_norm": 1.355560302734375, | |
| "learning_rate": 2.8015144258013282e-05, | |
| "loss": 0.412, | |
| "step": 6840 | |
| }, | |
| { | |
| "epoch": 1.436203645505971, | |
| "grad_norm": 1.146181583404541, | |
| "learning_rate": 2.7930923903786255e-05, | |
| "loss": 0.3505, | |
| "step": 6855 | |
| }, | |
| { | |
| "epoch": 1.4393463230672534, | |
| "grad_norm": 1.8377063274383545, | |
| "learning_rate": 2.7846669809609267e-05, | |
| "loss": 0.4537, | |
| "step": 6870 | |
| }, | |
| { | |
| "epoch": 1.4424890006285356, | |
| "grad_norm": 1.4548070430755615, | |
| "learning_rate": 2.7762382945391156e-05, | |
| "loss": 0.4113, | |
| "step": 6885 | |
| }, | |
| { | |
| "epoch": 1.4456316781898177, | |
| "grad_norm": 1.3672486543655396, | |
| "learning_rate": 2.7678064281417952e-05, | |
| "loss": 0.3917, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 1.4487743557511, | |
| "grad_norm": 1.1587488651275635, | |
| "learning_rate": 2.7593714788341795e-05, | |
| "loss": 0.3334, | |
| "step": 6915 | |
| }, | |
| { | |
| "epoch": 1.451917033312382, | |
| "grad_norm": 1.2732610702514648, | |
| "learning_rate": 2.7509335437169693e-05, | |
| "loss": 0.373, | |
| "step": 6930 | |
| }, | |
| { | |
| "epoch": 1.4550597108736643, | |
| "grad_norm": 1.458500862121582, | |
| "learning_rate": 2.7424927199252364e-05, | |
| "loss": 0.3409, | |
| "step": 6945 | |
| }, | |
| { | |
| "epoch": 1.4582023884349467, | |
| "grad_norm": 1.3266096115112305, | |
| "learning_rate": 2.734049104627311e-05, | |
| "loss": 0.443, | |
| "step": 6960 | |
| }, | |
| { | |
| "epoch": 1.4613450659962288, | |
| "grad_norm": 1.0348279476165771, | |
| "learning_rate": 2.7256027950236517e-05, | |
| "loss": 0.3772, | |
| "step": 6975 | |
| }, | |
| { | |
| "epoch": 1.464487743557511, | |
| "grad_norm": 1.2738145589828491, | |
| "learning_rate": 2.7171538883457396e-05, | |
| "loss": 0.364, | |
| "step": 6990 | |
| }, | |
| { | |
| "epoch": 1.4676304211187932, | |
| "grad_norm": 1.184635877609253, | |
| "learning_rate": 2.708702481854947e-05, | |
| "loss": 0.3866, | |
| "step": 7005 | |
| }, | |
| { | |
| "epoch": 1.4707730986800756, | |
| "grad_norm": 1.2299425601959229, | |
| "learning_rate": 2.7002486728414283e-05, | |
| "loss": 0.3716, | |
| "step": 7020 | |
| }, | |
| { | |
| "epoch": 1.4739157762413577, | |
| "grad_norm": 1.3776116371154785, | |
| "learning_rate": 2.6917925586229897e-05, | |
| "loss": 0.402, | |
| "step": 7035 | |
| }, | |
| { | |
| "epoch": 1.47705845380264, | |
| "grad_norm": 1.3003356456756592, | |
| "learning_rate": 2.68333423654398e-05, | |
| "loss": 0.3722, | |
| "step": 7050 | |
| }, | |
| { | |
| "epoch": 1.480201131363922, | |
| "grad_norm": 1.2862930297851562, | |
| "learning_rate": 2.67487380397416e-05, | |
| "loss": 0.4417, | |
| "step": 7065 | |
| }, | |
| { | |
| "epoch": 1.4833438089252042, | |
| "grad_norm": 1.116700530052185, | |
| "learning_rate": 2.666411358307586e-05, | |
| "loss": 0.3577, | |
| "step": 7080 | |
| }, | |
| { | |
| "epoch": 1.4864864864864864, | |
| "grad_norm": 1.3424625396728516, | |
| "learning_rate": 2.657946996961493e-05, | |
| "loss": 0.3389, | |
| "step": 7095 | |
| }, | |
| { | |
| "epoch": 1.4896291640477686, | |
| "grad_norm": 1.3122916221618652, | |
| "learning_rate": 2.6494808173751622e-05, | |
| "loss": 0.4148, | |
| "step": 7110 | |
| }, | |
| { | |
| "epoch": 1.492771841609051, | |
| "grad_norm": 0.8987470865249634, | |
| "learning_rate": 2.6410129170088115e-05, | |
| "loss": 0.387, | |
| "step": 7125 | |
| }, | |
| { | |
| "epoch": 1.4959145191703331, | |
| "grad_norm": 1.0086872577667236, | |
| "learning_rate": 2.6325433933424644e-05, | |
| "loss": 0.3495, | |
| "step": 7140 | |
| }, | |
| { | |
| "epoch": 1.4990571967316153, | |
| "grad_norm": 1.3022773265838623, | |
| "learning_rate": 2.6240723438748332e-05, | |
| "loss": 0.366, | |
| "step": 7155 | |
| }, | |
| { | |
| "epoch": 1.5021998742928977, | |
| "grad_norm": 1.324033260345459, | |
| "learning_rate": 2.615599866122193e-05, | |
| "loss": 0.3845, | |
| "step": 7170 | |
| }, | |
| { | |
| "epoch": 1.5053425518541799, | |
| "grad_norm": 0.7969958782196045, | |
| "learning_rate": 2.6071260576172634e-05, | |
| "loss": 0.3597, | |
| "step": 7185 | |
| }, | |
| { | |
| "epoch": 1.508485229415462, | |
| "grad_norm": 1.2666351795196533, | |
| "learning_rate": 2.5986510159080824e-05, | |
| "loss": 0.3573, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 1.5116279069767442, | |
| "grad_norm": 1.4982563257217407, | |
| "learning_rate": 2.590174838556881e-05, | |
| "loss": 0.3576, | |
| "step": 7215 | |
| }, | |
| { | |
| "epoch": 1.5147705845380264, | |
| "grad_norm": 1.5081130266189575, | |
| "learning_rate": 2.581697623138969e-05, | |
| "loss": 0.2803, | |
| "step": 7230 | |
| }, | |
| { | |
| "epoch": 1.5179132620993085, | |
| "grad_norm": 1.267719030380249, | |
| "learning_rate": 2.5732194672416012e-05, | |
| "loss": 0.3586, | |
| "step": 7245 | |
| }, | |
| { | |
| "epoch": 1.5210559396605907, | |
| "grad_norm": 1.1292250156402588, | |
| "learning_rate": 2.5647404684628622e-05, | |
| "loss": 0.3974, | |
| "step": 7260 | |
| }, | |
| { | |
| "epoch": 1.5241986172218729, | |
| "grad_norm": 1.3279204368591309, | |
| "learning_rate": 2.556260724410538e-05, | |
| "loss": 0.3828, | |
| "step": 7275 | |
| }, | |
| { | |
| "epoch": 1.5273412947831553, | |
| "grad_norm": 1.337803602218628, | |
| "learning_rate": 2.5477803327009948e-05, | |
| "loss": 0.3692, | |
| "step": 7290 | |
| }, | |
| { | |
| "epoch": 1.5304839723444374, | |
| "grad_norm": 1.159134030342102, | |
| "learning_rate": 2.5392993909580537e-05, | |
| "loss": 0.354, | |
| "step": 7305 | |
| }, | |
| { | |
| "epoch": 1.5336266499057196, | |
| "grad_norm": 1.2121402025222778, | |
| "learning_rate": 2.5308179968118677e-05, | |
| "loss": 0.4087, | |
| "step": 7320 | |
| }, | |
| { | |
| "epoch": 1.536769327467002, | |
| "grad_norm": 1.2714091539382935, | |
| "learning_rate": 2.522336247897799e-05, | |
| "loss": 0.4065, | |
| "step": 7335 | |
| }, | |
| { | |
| "epoch": 1.5399120050282842, | |
| "grad_norm": 1.128733515739441, | |
| "learning_rate": 2.5138542418552913e-05, | |
| "loss": 0.3605, | |
| "step": 7350 | |
| }, | |
| { | |
| "epoch": 1.5430546825895664, | |
| "grad_norm": 1.140023946762085, | |
| "learning_rate": 2.5053720763267506e-05, | |
| "loss": 0.3573, | |
| "step": 7365 | |
| }, | |
| { | |
| "epoch": 1.5461973601508485, | |
| "grad_norm": 1.3230198621749878, | |
| "learning_rate": 2.4968898489564185e-05, | |
| "loss": 0.3182, | |
| "step": 7380 | |
| }, | |
| { | |
| "epoch": 1.5493400377121307, | |
| "grad_norm": 1.0801093578338623, | |
| "learning_rate": 2.4884076573892464e-05, | |
| "loss": 0.3523, | |
| "step": 7395 | |
| }, | |
| { | |
| "epoch": 1.5524827152734129, | |
| "grad_norm": 1.204451084136963, | |
| "learning_rate": 2.4799255992697767e-05, | |
| "loss": 0.3502, | |
| "step": 7410 | |
| }, | |
| { | |
| "epoch": 1.555625392834695, | |
| "grad_norm": 1.164306640625, | |
| "learning_rate": 2.4714437722410145e-05, | |
| "loss": 0.3451, | |
| "step": 7425 | |
| }, | |
| { | |
| "epoch": 1.5587680703959774, | |
| "grad_norm": 0.8542248606681824, | |
| "learning_rate": 2.4629622739433016e-05, | |
| "loss": 0.3803, | |
| "step": 7440 | |
| }, | |
| { | |
| "epoch": 1.5619107479572596, | |
| "grad_norm": 1.2533782720565796, | |
| "learning_rate": 2.4544812020132007e-05, | |
| "loss": 0.3561, | |
| "step": 7455 | |
| }, | |
| { | |
| "epoch": 1.5650534255185418, | |
| "grad_norm": 1.3054505586624146, | |
| "learning_rate": 2.4460006540823635e-05, | |
| "loss": 0.4579, | |
| "step": 7470 | |
| }, | |
| { | |
| "epoch": 1.5681961030798242, | |
| "grad_norm": 1.4427162408828735, | |
| "learning_rate": 2.4375207277764085e-05, | |
| "loss": 0.3762, | |
| "step": 7485 | |
| }, | |
| { | |
| "epoch": 1.5713387806411063, | |
| "grad_norm": 1.1473865509033203, | |
| "learning_rate": 2.4290415207137995e-05, | |
| "loss": 0.4135, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 1.5744814582023885, | |
| "grad_norm": 1.0101532936096191, | |
| "learning_rate": 2.4205631305047222e-05, | |
| "loss": 0.3653, | |
| "step": 7515 | |
| }, | |
| { | |
| "epoch": 1.5776241357636707, | |
| "grad_norm": 1.428271770477295, | |
| "learning_rate": 2.4120856547499564e-05, | |
| "loss": 0.386, | |
| "step": 7530 | |
| }, | |
| { | |
| "epoch": 1.5807668133249528, | |
| "grad_norm": 1.0353528261184692, | |
| "learning_rate": 2.4036091910397555e-05, | |
| "loss": 0.3912, | |
| "step": 7545 | |
| }, | |
| { | |
| "epoch": 1.583909490886235, | |
| "grad_norm": 1.2192641496658325, | |
| "learning_rate": 2.3951338369527233e-05, | |
| "loss": 0.3303, | |
| "step": 7560 | |
| }, | |
| { | |
| "epoch": 1.5870521684475172, | |
| "grad_norm": 1.2922149896621704, | |
| "learning_rate": 2.3866596900546902e-05, | |
| "loss": 0.3768, | |
| "step": 7575 | |
| }, | |
| { | |
| "epoch": 1.5901948460087993, | |
| "grad_norm": 1.3581557273864746, | |
| "learning_rate": 2.3781868478975884e-05, | |
| "loss": 0.393, | |
| "step": 7590 | |
| }, | |
| { | |
| "epoch": 1.5933375235700817, | |
| "grad_norm": 1.2488782405853271, | |
| "learning_rate": 2.3697154080183308e-05, | |
| "loss": 0.3889, | |
| "step": 7605 | |
| }, | |
| { | |
| "epoch": 1.596480201131364, | |
| "grad_norm": 1.0586172342300415, | |
| "learning_rate": 2.3612454679376886e-05, | |
| "loss": 0.3639, | |
| "step": 7620 | |
| }, | |
| { | |
| "epoch": 1.5996228786926463, | |
| "grad_norm": 1.226731300354004, | |
| "learning_rate": 2.3527771251591675e-05, | |
| "loss": 0.3783, | |
| "step": 7635 | |
| }, | |
| { | |
| "epoch": 1.6027655562539285, | |
| "grad_norm": 1.4184266328811646, | |
| "learning_rate": 2.344310477167883e-05, | |
| "loss": 0.4132, | |
| "step": 7650 | |
| }, | |
| { | |
| "epoch": 1.6059082338152106, | |
| "grad_norm": 1.2709243297576904, | |
| "learning_rate": 2.3358456214294456e-05, | |
| "loss": 0.3314, | |
| "step": 7665 | |
| }, | |
| { | |
| "epoch": 1.6090509113764928, | |
| "grad_norm": 1.1103581190109253, | |
| "learning_rate": 2.3273826553888294e-05, | |
| "loss": 0.3735, | |
| "step": 7680 | |
| }, | |
| { | |
| "epoch": 1.612193588937775, | |
| "grad_norm": 1.1599838733673096, | |
| "learning_rate": 2.3189216764692578e-05, | |
| "loss": 0.3968, | |
| "step": 7695 | |
| }, | |
| { | |
| "epoch": 1.6153362664990571, | |
| "grad_norm": 1.1679604053497314, | |
| "learning_rate": 2.3104627820710754e-05, | |
| "loss": 0.3501, | |
| "step": 7710 | |
| }, | |
| { | |
| "epoch": 1.6184789440603393, | |
| "grad_norm": 1.0258073806762695, | |
| "learning_rate": 2.302006069570635e-05, | |
| "loss": 0.3992, | |
| "step": 7725 | |
| }, | |
| { | |
| "epoch": 1.6216216216216215, | |
| "grad_norm": 1.1728984117507935, | |
| "learning_rate": 2.2935516363191693e-05, | |
| "loss": 0.3625, | |
| "step": 7740 | |
| }, | |
| { | |
| "epoch": 1.6247642991829039, | |
| "grad_norm": 1.3930670022964478, | |
| "learning_rate": 2.2850995796416726e-05, | |
| "loss": 0.3898, | |
| "step": 7755 | |
| }, | |
| { | |
| "epoch": 1.627906976744186, | |
| "grad_norm": 0.9263485074043274, | |
| "learning_rate": 2.2766499968357834e-05, | |
| "loss": 0.3145, | |
| "step": 7770 | |
| }, | |
| { | |
| "epoch": 1.6310496543054682, | |
| "grad_norm": 1.388420581817627, | |
| "learning_rate": 2.2682029851706584e-05, | |
| "loss": 0.3849, | |
| "step": 7785 | |
| }, | |
| { | |
| "epoch": 1.6341923318667506, | |
| "grad_norm": 1.2891064882278442, | |
| "learning_rate": 2.2597586418858586e-05, | |
| "loss": 0.3998, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 1.6373350094280328, | |
| "grad_norm": 1.1814244985580444, | |
| "learning_rate": 2.251317064190224e-05, | |
| "loss": 0.3652, | |
| "step": 7815 | |
| }, | |
| { | |
| "epoch": 1.640477686989315, | |
| "grad_norm": 1.1944345235824585, | |
| "learning_rate": 2.2428783492607638e-05, | |
| "loss": 0.3612, | |
| "step": 7830 | |
| }, | |
| { | |
| "epoch": 1.6436203645505971, | |
| "grad_norm": 0.9002747535705566, | |
| "learning_rate": 2.2344425942415258e-05, | |
| "loss": 0.3131, | |
| "step": 7845 | |
| }, | |
| { | |
| "epoch": 1.6467630421118793, | |
| "grad_norm": 1.203361988067627, | |
| "learning_rate": 2.2260098962424874e-05, | |
| "loss": 0.3476, | |
| "step": 7860 | |
| }, | |
| { | |
| "epoch": 1.6499057196731615, | |
| "grad_norm": 1.0701284408569336, | |
| "learning_rate": 2.2175803523384352e-05, | |
| "loss": 0.3972, | |
| "step": 7875 | |
| }, | |
| { | |
| "epoch": 1.6530483972344436, | |
| "grad_norm": 1.255242943763733, | |
| "learning_rate": 2.209154059567843e-05, | |
| "loss": 0.4292, | |
| "step": 7890 | |
| }, | |
| { | |
| "epoch": 1.6561910747957258, | |
| "grad_norm": 1.1037348508834839, | |
| "learning_rate": 2.200731114931763e-05, | |
| "loss": 0.3782, | |
| "step": 7905 | |
| }, | |
| { | |
| "epoch": 1.6593337523570082, | |
| "grad_norm": 1.404234528541565, | |
| "learning_rate": 2.1923116153927e-05, | |
| "loss": 0.3984, | |
| "step": 7920 | |
| }, | |
| { | |
| "epoch": 1.6624764299182904, | |
| "grad_norm": 1.2808343172073364, | |
| "learning_rate": 2.183895657873505e-05, | |
| "loss": 0.3551, | |
| "step": 7935 | |
| }, | |
| { | |
| "epoch": 1.6656191074795728, | |
| "grad_norm": 1.4898031949996948, | |
| "learning_rate": 2.1754833392562502e-05, | |
| "loss": 0.3651, | |
| "step": 7950 | |
| }, | |
| { | |
| "epoch": 1.668761785040855, | |
| "grad_norm": 1.1187386512756348, | |
| "learning_rate": 2.167074756381119e-05, | |
| "loss": 0.3626, | |
| "step": 7965 | |
| }, | |
| { | |
| "epoch": 1.671904462602137, | |
| "grad_norm": 0.9661749005317688, | |
| "learning_rate": 2.1586700060452912e-05, | |
| "loss": 0.3337, | |
| "step": 7980 | |
| }, | |
| { | |
| "epoch": 1.6750471401634193, | |
| "grad_norm": 1.339406967163086, | |
| "learning_rate": 2.1502691850018263e-05, | |
| "loss": 0.3907, | |
| "step": 7995 | |
| }, | |
| { | |
| "epoch": 1.6781898177247014, | |
| "grad_norm": 1.0702762603759766, | |
| "learning_rate": 2.141872389958551e-05, | |
| "loss": 0.3788, | |
| "step": 8010 | |
| }, | |
| { | |
| "epoch": 1.6813324952859836, | |
| "grad_norm": 1.4297361373901367, | |
| "learning_rate": 2.133479717576945e-05, | |
| "loss": 0.4034, | |
| "step": 8025 | |
| }, | |
| { | |
| "epoch": 1.6844751728472658, | |
| "grad_norm": 0.8980254530906677, | |
| "learning_rate": 2.1250912644710325e-05, | |
| "loss": 0.3243, | |
| "step": 8040 | |
| }, | |
| { | |
| "epoch": 1.687617850408548, | |
| "grad_norm": 1.4087092876434326, | |
| "learning_rate": 2.1167071272062626e-05, | |
| "loss": 0.4123, | |
| "step": 8055 | |
| }, | |
| { | |
| "epoch": 1.6907605279698303, | |
| "grad_norm": 1.134097933769226, | |
| "learning_rate": 2.108327402298404e-05, | |
| "loss": 0.3734, | |
| "step": 8070 | |
| }, | |
| { | |
| "epoch": 1.6939032055311125, | |
| "grad_norm": 1.1244763135910034, | |
| "learning_rate": 2.099952186212429e-05, | |
| "loss": 0.3626, | |
| "step": 8085 | |
| }, | |
| { | |
| "epoch": 1.6970458830923947, | |
| "grad_norm": 1.1340084075927734, | |
| "learning_rate": 2.091581575361411e-05, | |
| "loss": 0.3261, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 1.700188560653677, | |
| "grad_norm": 1.2386656999588013, | |
| "learning_rate": 2.0832156661054036e-05, | |
| "loss": 0.3485, | |
| "step": 8115 | |
| }, | |
| { | |
| "epoch": 1.7033312382149592, | |
| "grad_norm": 1.6566152572631836, | |
| "learning_rate": 2.074854554750339e-05, | |
| "loss": 0.3902, | |
| "step": 8130 | |
| }, | |
| { | |
| "epoch": 1.7064739157762414, | |
| "grad_norm": 1.209065556526184, | |
| "learning_rate": 2.06649833754692e-05, | |
| "loss": 0.4162, | |
| "step": 8145 | |
| }, | |
| { | |
| "epoch": 1.7096165933375236, | |
| "grad_norm": 1.2372878789901733, | |
| "learning_rate": 2.0581471106895043e-05, | |
| "loss": 0.3521, | |
| "step": 8160 | |
| }, | |
| { | |
| "epoch": 1.7127592708988058, | |
| "grad_norm": 1.2591501474380493, | |
| "learning_rate": 2.0498009703150063e-05, | |
| "loss": 0.3496, | |
| "step": 8175 | |
| }, | |
| { | |
| "epoch": 1.715901948460088, | |
| "grad_norm": 1.1610863208770752, | |
| "learning_rate": 2.0414600125017834e-05, | |
| "loss": 0.407, | |
| "step": 8190 | |
| }, | |
| { | |
| "epoch": 1.71904462602137, | |
| "grad_norm": 1.165305495262146, | |
| "learning_rate": 2.0331243332685367e-05, | |
| "loss": 0.4154, | |
| "step": 8205 | |
| }, | |
| { | |
| "epoch": 1.7221873035826523, | |
| "grad_norm": 0.9598828554153442, | |
| "learning_rate": 2.024794028573197e-05, | |
| "loss": 0.3947, | |
| "step": 8220 | |
| }, | |
| { | |
| "epoch": 1.7253299811439347, | |
| "grad_norm": 1.2426929473876953, | |
| "learning_rate": 2.0164691943118283e-05, | |
| "loss": 0.3481, | |
| "step": 8235 | |
| }, | |
| { | |
| "epoch": 1.7284726587052168, | |
| "grad_norm": 0.9565463066101074, | |
| "learning_rate": 2.00814992631752e-05, | |
| "loss": 0.3251, | |
| "step": 8250 | |
| }, | |
| { | |
| "epoch": 1.7316153362664992, | |
| "grad_norm": 1.1574795246124268, | |
| "learning_rate": 1.9998363203592836e-05, | |
| "loss": 0.374, | |
| "step": 8265 | |
| }, | |
| { | |
| "epoch": 1.7347580138277814, | |
| "grad_norm": 1.3719727993011475, | |
| "learning_rate": 1.9915284721409506e-05, | |
| "loss": 0.4395, | |
| "step": 8280 | |
| }, | |
| { | |
| "epoch": 1.7379006913890636, | |
| "grad_norm": 1.21462082862854, | |
| "learning_rate": 1.983226477300071e-05, | |
| "loss": 0.3879, | |
| "step": 8295 | |
| }, | |
| { | |
| "epoch": 1.7410433689503457, | |
| "grad_norm": 1.2950128316879272, | |
| "learning_rate": 1.974930431406815e-05, | |
| "loss": 0.3903, | |
| "step": 8310 | |
| }, | |
| { | |
| "epoch": 1.744186046511628, | |
| "grad_norm": 0.568601131439209, | |
| "learning_rate": 1.966640429962867e-05, | |
| "loss": 0.3608, | |
| "step": 8325 | |
| }, | |
| { | |
| "epoch": 1.74732872407291, | |
| "grad_norm": 1.234540343284607, | |
| "learning_rate": 1.9583565684003294e-05, | |
| "loss": 0.3574, | |
| "step": 8340 | |
| }, | |
| { | |
| "epoch": 1.7504714016341922, | |
| "grad_norm": 1.170241355895996, | |
| "learning_rate": 1.9500789420806274e-05, | |
| "loss": 0.3476, | |
| "step": 8355 | |
| }, | |
| { | |
| "epoch": 1.7536140791954744, | |
| "grad_norm": 1.1727917194366455, | |
| "learning_rate": 1.9418076462934057e-05, | |
| "loss": 0.3825, | |
| "step": 8370 | |
| }, | |
| { | |
| "epoch": 1.7567567567567568, | |
| "grad_norm": 1.1901155710220337, | |
| "learning_rate": 1.933542776255432e-05, | |
| "loss": 0.3182, | |
| "step": 8385 | |
| }, | |
| { | |
| "epoch": 1.759899434318039, | |
| "grad_norm": 1.3078737258911133, | |
| "learning_rate": 1.9252844271095056e-05, | |
| "loss": 0.3766, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 1.7630421118793211, | |
| "grad_norm": 1.255685567855835, | |
| "learning_rate": 1.917032693923359e-05, | |
| "loss": 0.4278, | |
| "step": 8415 | |
| }, | |
| { | |
| "epoch": 1.7661847894406035, | |
| "grad_norm": 1.2631891965866089, | |
| "learning_rate": 1.908787671688561e-05, | |
| "loss": 0.3988, | |
| "step": 8430 | |
| }, | |
| { | |
| "epoch": 1.7693274670018857, | |
| "grad_norm": 1.0149579048156738, | |
| "learning_rate": 1.9005494553194277e-05, | |
| "loss": 0.3164, | |
| "step": 8445 | |
| }, | |
| { | |
| "epoch": 1.7724701445631679, | |
| "grad_norm": 1.2755389213562012, | |
| "learning_rate": 1.892318139651929e-05, | |
| "loss": 0.3699, | |
| "step": 8460 | |
| }, | |
| { | |
| "epoch": 1.77561282212445, | |
| "grad_norm": 1.3909375667572021, | |
| "learning_rate": 1.884093819442595e-05, | |
| "loss": 0.3975, | |
| "step": 8475 | |
| }, | |
| { | |
| "epoch": 1.7787554996857322, | |
| "grad_norm": 1.3214746713638306, | |
| "learning_rate": 1.8758765893674242e-05, | |
| "loss": 0.385, | |
| "step": 8490 | |
| }, | |
| { | |
| "epoch": 1.7818981772470144, | |
| "grad_norm": 1.1242390871047974, | |
| "learning_rate": 1.867666544020798e-05, | |
| "loss": 0.3882, | |
| "step": 8505 | |
| }, | |
| { | |
| "epoch": 1.7850408548082966, | |
| "grad_norm": 1.41203773021698, | |
| "learning_rate": 1.8594637779143895e-05, | |
| "loss": 0.4134, | |
| "step": 8520 | |
| }, | |
| { | |
| "epoch": 1.7881835323695787, | |
| "grad_norm": 1.1696633100509644, | |
| "learning_rate": 1.851268385476074e-05, | |
| "loss": 0.3835, | |
| "step": 8535 | |
| }, | |
| { | |
| "epoch": 1.7913262099308611, | |
| "grad_norm": 1.27289879322052, | |
| "learning_rate": 1.8430804610488423e-05, | |
| "loss": 0.3411, | |
| "step": 8550 | |
| }, | |
| { | |
| "epoch": 1.7944688874921433, | |
| "grad_norm": 1.1815760135650635, | |
| "learning_rate": 1.8349000988897183e-05, | |
| "loss": 0.3953, | |
| "step": 8565 | |
| }, | |
| { | |
| "epoch": 1.7976115650534257, | |
| "grad_norm": 0.9872913956642151, | |
| "learning_rate": 1.8267273931686697e-05, | |
| "loss": 0.3807, | |
| "step": 8580 | |
| }, | |
| { | |
| "epoch": 1.8001257071024512, | |
| "eval_accuracy": 0.8832372290913474, | |
| "eval_loss": 0.4148283004760742, | |
| "eval_runtime": 1191.4012, | |
| "eval_samples_per_second": 4.016, | |
| "eval_steps_per_second": 1.005, | |
| "step": 8592 | |
| }, | |
| { | |
| "epoch": 1.8007542426147078, | |
| "grad_norm": 1.2675862312316895, | |
| "learning_rate": 1.818562437967525e-05, | |
| "loss": 0.4136, | |
| "step": 8595 | |
| }, | |
| { | |
| "epoch": 1.80389692017599, | |
| "grad_norm": 1.2914496660232544, | |
| "learning_rate": 1.8104053272788912e-05, | |
| "loss": 0.3426, | |
| "step": 8610 | |
| }, | |
| { | |
| "epoch": 1.8070395977372722, | |
| "grad_norm": 0.8845340609550476, | |
| "learning_rate": 1.802256155005073e-05, | |
| "loss": 0.3796, | |
| "step": 8625 | |
| }, | |
| { | |
| "epoch": 1.8101822752985544, | |
| "grad_norm": 1.2812376022338867, | |
| "learning_rate": 1.79411501495699e-05, | |
| "loss": 0.3813, | |
| "step": 8640 | |
| }, | |
| { | |
| "epoch": 1.8133249528598365, | |
| "grad_norm": 1.479176640510559, | |
| "learning_rate": 1.7859820008530943e-05, | |
| "loss": 0.347, | |
| "step": 8655 | |
| }, | |
| { | |
| "epoch": 1.8164676304211187, | |
| "grad_norm": 1.5261151790618896, | |
| "learning_rate": 1.7778572063182976e-05, | |
| "loss": 0.3942, | |
| "step": 8670 | |
| }, | |
| { | |
| "epoch": 1.8196103079824009, | |
| "grad_norm": 1.0050832033157349, | |
| "learning_rate": 1.76974072488289e-05, | |
| "loss": 0.3831, | |
| "step": 8685 | |
| }, | |
| { | |
| "epoch": 1.8227529855436833, | |
| "grad_norm": 0.8978458046913147, | |
| "learning_rate": 1.761632649981462e-05, | |
| "loss": 0.4253, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 1.8258956631049654, | |
| "grad_norm": 1.3533804416656494, | |
| "learning_rate": 1.753533074951831e-05, | |
| "loss": 0.4012, | |
| "step": 8715 | |
| }, | |
| { | |
| "epoch": 1.8290383406662476, | |
| "grad_norm": 1.2724169492721558, | |
| "learning_rate": 1.7454420930339676e-05, | |
| "loss": 0.4422, | |
| "step": 8730 | |
| }, | |
| { | |
| "epoch": 1.83218101822753, | |
| "grad_norm": 1.2476907968521118, | |
| "learning_rate": 1.737359797368921e-05, | |
| "loss": 0.3421, | |
| "step": 8745 | |
| }, | |
| { | |
| "epoch": 1.8353236957888122, | |
| "grad_norm": 1.1641727685928345, | |
| "learning_rate": 1.7292862809977432e-05, | |
| "loss": 0.3912, | |
| "step": 8760 | |
| }, | |
| { | |
| "epoch": 1.8384663733500943, | |
| "grad_norm": 1.0571367740631104, | |
| "learning_rate": 1.7212216368604264e-05, | |
| "loss": 0.3262, | |
| "step": 8775 | |
| }, | |
| { | |
| "epoch": 1.8416090509113765, | |
| "grad_norm": 1.1409281492233276, | |
| "learning_rate": 1.7131659577948254e-05, | |
| "loss": 0.4101, | |
| "step": 8790 | |
| }, | |
| { | |
| "epoch": 1.8447517284726587, | |
| "grad_norm": 1.1299269199371338, | |
| "learning_rate": 1.7051193365355926e-05, | |
| "loss": 0.4095, | |
| "step": 8805 | |
| }, | |
| { | |
| "epoch": 1.8478944060339408, | |
| "grad_norm": 1.0926958322525024, | |
| "learning_rate": 1.697081865713108e-05, | |
| "loss": 0.3668, | |
| "step": 8820 | |
| }, | |
| { | |
| "epoch": 1.851037083595223, | |
| "grad_norm": 1.262511968612671, | |
| "learning_rate": 1.689053637852417e-05, | |
| "loss": 0.3699, | |
| "step": 8835 | |
| }, | |
| { | |
| "epoch": 1.8541797611565052, | |
| "grad_norm": 0.9396837949752808, | |
| "learning_rate": 1.681034745372161e-05, | |
| "loss": 0.3793, | |
| "step": 8850 | |
| }, | |
| { | |
| "epoch": 1.8573224387177876, | |
| "grad_norm": 1.3683308362960815, | |
| "learning_rate": 1.6730252805835145e-05, | |
| "loss": 0.3633, | |
| "step": 8865 | |
| }, | |
| { | |
| "epoch": 1.8604651162790697, | |
| "grad_norm": 1.2032579183578491, | |
| "learning_rate": 1.6650253356891247e-05, | |
| "loss": 0.3644, | |
| "step": 8880 | |
| }, | |
| { | |
| "epoch": 1.8636077938403521, | |
| "grad_norm": 1.1967633962631226, | |
| "learning_rate": 1.6570350027820485e-05, | |
| "loss": 0.3737, | |
| "step": 8895 | |
| }, | |
| { | |
| "epoch": 1.8667504714016343, | |
| "grad_norm": 1.4144322872161865, | |
| "learning_rate": 1.6490543738446927e-05, | |
| "loss": 0.3816, | |
| "step": 8910 | |
| }, | |
| { | |
| "epoch": 1.8698931489629165, | |
| "grad_norm": 1.4581791162490845, | |
| "learning_rate": 1.6410835407477513e-05, | |
| "loss": 0.3189, | |
| "step": 8925 | |
| }, | |
| { | |
| "epoch": 1.8730358265241986, | |
| "grad_norm": 1.2554900646209717, | |
| "learning_rate": 1.6331225952491557e-05, | |
| "loss": 0.3555, | |
| "step": 8940 | |
| }, | |
| { | |
| "epoch": 1.8761785040854808, | |
| "grad_norm": 1.4458445310592651, | |
| "learning_rate": 1.6251716289930134e-05, | |
| "loss": 0.4001, | |
| "step": 8955 | |
| }, | |
| { | |
| "epoch": 1.879321181646763, | |
| "grad_norm": 1.4509528875350952, | |
| "learning_rate": 1.6172307335085512e-05, | |
| "loss": 0.4032, | |
| "step": 8970 | |
| }, | |
| { | |
| "epoch": 1.8824638592080452, | |
| "grad_norm": 1.3516335487365723, | |
| "learning_rate": 1.6093000002090657e-05, | |
| "loss": 0.4087, | |
| "step": 8985 | |
| }, | |
| { | |
| "epoch": 1.8856065367693273, | |
| "grad_norm": 1.1090672016143799, | |
| "learning_rate": 1.6013795203908703e-05, | |
| "loss": 0.3573, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 1.8887492143306097, | |
| "grad_norm": 1.2857966423034668, | |
| "learning_rate": 1.593469385232243e-05, | |
| "loss": 0.4204, | |
| "step": 9015 | |
| }, | |
| { | |
| "epoch": 1.8918918918918919, | |
| "grad_norm": 1.1753884553909302, | |
| "learning_rate": 1.5855696857923738e-05, | |
| "loss": 0.4041, | |
| "step": 9030 | |
| }, | |
| { | |
| "epoch": 1.895034569453174, | |
| "grad_norm": 1.3764643669128418, | |
| "learning_rate": 1.577680513010325e-05, | |
| "loss": 0.3901, | |
| "step": 9045 | |
| }, | |
| { | |
| "epoch": 1.8981772470144564, | |
| "grad_norm": 1.2634403705596924, | |
| "learning_rate": 1.569801957703975e-05, | |
| "loss": 0.3669, | |
| "step": 9060 | |
| }, | |
| { | |
| "epoch": 1.9013199245757386, | |
| "grad_norm": 1.501197338104248, | |
| "learning_rate": 1.5619341105689793e-05, | |
| "loss": 0.3875, | |
| "step": 9075 | |
| }, | |
| { | |
| "epoch": 1.9044626021370208, | |
| "grad_norm": 1.1498409509658813, | |
| "learning_rate": 1.5540770621777213e-05, | |
| "loss": 0.3769, | |
| "step": 9090 | |
| }, | |
| { | |
| "epoch": 1.907605279698303, | |
| "grad_norm": 1.2901723384857178, | |
| "learning_rate": 1.5462309029782756e-05, | |
| "loss": 0.4069, | |
| "step": 9105 | |
| }, | |
| { | |
| "epoch": 1.9107479572595851, | |
| "grad_norm": 1.2987323999404907, | |
| "learning_rate": 1.5383957232933623e-05, | |
| "loss": 0.3264, | |
| "step": 9120 | |
| }, | |
| { | |
| "epoch": 1.9138906348208673, | |
| "grad_norm": 1.0844594240188599, | |
| "learning_rate": 1.5305716133193056e-05, | |
| "loss": 0.352, | |
| "step": 9135 | |
| }, | |
| { | |
| "epoch": 1.9170333123821495, | |
| "grad_norm": 1.4493502378463745, | |
| "learning_rate": 1.5227586631250047e-05, | |
| "loss": 0.4362, | |
| "step": 9150 | |
| }, | |
| { | |
| "epoch": 1.9201759899434316, | |
| "grad_norm": 1.2252168655395508, | |
| "learning_rate": 1.5149569626508848e-05, | |
| "loss": 0.3463, | |
| "step": 9165 | |
| }, | |
| { | |
| "epoch": 1.923318667504714, | |
| "grad_norm": 1.2073407173156738, | |
| "learning_rate": 1.5071666017078705e-05, | |
| "loss": 0.3452, | |
| "step": 9180 | |
| }, | |
| { | |
| "epoch": 1.9264613450659962, | |
| "grad_norm": 0.9203445315361023, | |
| "learning_rate": 1.4993876699763467e-05, | |
| "loss": 0.3588, | |
| "step": 9195 | |
| }, | |
| { | |
| "epoch": 1.9296040226272786, | |
| "grad_norm": 1.270068645477295, | |
| "learning_rate": 1.4916202570051319e-05, | |
| "loss": 0.3777, | |
| "step": 9210 | |
| }, | |
| { | |
| "epoch": 1.9327467001885608, | |
| "grad_norm": 1.1798357963562012, | |
| "learning_rate": 1.4838644522104416e-05, | |
| "loss": 0.3975, | |
| "step": 9225 | |
| }, | |
| { | |
| "epoch": 1.935889377749843, | |
| "grad_norm": 1.4530518054962158, | |
| "learning_rate": 1.476120344874861e-05, | |
| "loss": 0.4299, | |
| "step": 9240 | |
| }, | |
| { | |
| "epoch": 1.939032055311125, | |
| "grad_norm": 1.449532151222229, | |
| "learning_rate": 1.4683880241463197e-05, | |
| "loss": 0.4051, | |
| "step": 9255 | |
| }, | |
| { | |
| "epoch": 1.9421747328724073, | |
| "grad_norm": 1.4117298126220703, | |
| "learning_rate": 1.460667579037061e-05, | |
| "loss": 0.3639, | |
| "step": 9270 | |
| }, | |
| { | |
| "epoch": 1.9453174104336894, | |
| "grad_norm": 1.2169469594955444, | |
| "learning_rate": 1.452959098422621e-05, | |
| "loss": 0.357, | |
| "step": 9285 | |
| }, | |
| { | |
| "epoch": 1.9484600879949716, | |
| "grad_norm": 1.243122935295105, | |
| "learning_rate": 1.4452626710408017e-05, | |
| "loss": 0.3618, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 1.9516027655562538, | |
| "grad_norm": 1.175661563873291, | |
| "learning_rate": 1.4375783854906555e-05, | |
| "loss": 0.3524, | |
| "step": 9315 | |
| }, | |
| { | |
| "epoch": 1.9547454431175362, | |
| "grad_norm": 1.468005895614624, | |
| "learning_rate": 1.4299063302314597e-05, | |
| "loss": 0.3667, | |
| "step": 9330 | |
| }, | |
| { | |
| "epoch": 1.9578881206788183, | |
| "grad_norm": 1.145400047302246, | |
| "learning_rate": 1.4222465935816975e-05, | |
| "loss": 0.4047, | |
| "step": 9345 | |
| }, | |
| { | |
| "epoch": 1.9610307982401005, | |
| "grad_norm": 1.3986377716064453, | |
| "learning_rate": 1.4145992637180492e-05, | |
| "loss": 0.3254, | |
| "step": 9360 | |
| }, | |
| { | |
| "epoch": 1.964173475801383, | |
| "grad_norm": 1.3191365003585815, | |
| "learning_rate": 1.4069644286743669e-05, | |
| "loss": 0.3564, | |
| "step": 9375 | |
| }, | |
| { | |
| "epoch": 1.967316153362665, | |
| "grad_norm": 1.48728346824646, | |
| "learning_rate": 1.3993421763406672e-05, | |
| "loss": 0.3196, | |
| "step": 9390 | |
| }, | |
| { | |
| "epoch": 1.9704588309239472, | |
| "grad_norm": 1.3215950727462769, | |
| "learning_rate": 1.3917325944621195e-05, | |
| "loss": 0.3826, | |
| "step": 9405 | |
| }, | |
| { | |
| "epoch": 1.9736015084852294, | |
| "grad_norm": 1.3539785146713257, | |
| "learning_rate": 1.3841357706380348e-05, | |
| "loss": 0.392, | |
| "step": 9420 | |
| }, | |
| { | |
| "epoch": 1.9767441860465116, | |
| "grad_norm": 1.0365345478057861, | |
| "learning_rate": 1.3765517923208554e-05, | |
| "loss": 0.3862, | |
| "step": 9435 | |
| }, | |
| { | |
| "epoch": 1.9798868636077938, | |
| "grad_norm": 1.2735167741775513, | |
| "learning_rate": 1.3689807468151491e-05, | |
| "loss": 0.372, | |
| "step": 9450 | |
| }, | |
| { | |
| "epoch": 1.983029541169076, | |
| "grad_norm": 1.4106998443603516, | |
| "learning_rate": 1.3614227212766079e-05, | |
| "loss": 0.3768, | |
| "step": 9465 | |
| }, | |
| { | |
| "epoch": 1.9861722187303583, | |
| "grad_norm": 1.568157434463501, | |
| "learning_rate": 1.3538778027110402e-05, | |
| "loss": 0.3453, | |
| "step": 9480 | |
| }, | |
| { | |
| "epoch": 1.9893148962916405, | |
| "grad_norm": 1.4247443675994873, | |
| "learning_rate": 1.3463460779733706e-05, | |
| "loss": 0.407, | |
| "step": 9495 | |
| }, | |
| { | |
| "epoch": 1.9924575738529227, | |
| "grad_norm": 1.2098503112792969, | |
| "learning_rate": 1.3388276337666384e-05, | |
| "loss": 0.3444, | |
| "step": 9510 | |
| }, | |
| { | |
| "epoch": 1.995600251414205, | |
| "grad_norm": 1.054401159286499, | |
| "learning_rate": 1.3313225566410042e-05, | |
| "loss": 0.3342, | |
| "step": 9525 | |
| }, | |
| { | |
| "epoch": 1.9987429289754872, | |
| "grad_norm": 1.186824917793274, | |
| "learning_rate": 1.3238309329927511e-05, | |
| "loss": 0.3322, | |
| "step": 9540 | |
| }, | |
| { | |
| "epoch": 2.0018856065367694, | |
| "grad_norm": 1.0764572620391846, | |
| "learning_rate": 1.3163528490632854e-05, | |
| "loss": 0.3444, | |
| "step": 9555 | |
| }, | |
| { | |
| "epoch": 2.0050282840980516, | |
| "grad_norm": 1.051069974899292, | |
| "learning_rate": 1.3088883909381531e-05, | |
| "loss": 0.2928, | |
| "step": 9570 | |
| }, | |
| { | |
| "epoch": 2.0081709616593337, | |
| "grad_norm": 1.2765467166900635, | |
| "learning_rate": 1.3014376445460391e-05, | |
| "loss": 0.303, | |
| "step": 9585 | |
| }, | |
| { | |
| "epoch": 2.011313639220616, | |
| "grad_norm": 0.9927627444267273, | |
| "learning_rate": 1.2940006956577871e-05, | |
| "loss": 0.2736, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 2.014456316781898, | |
| "grad_norm": 1.6037464141845703, | |
| "learning_rate": 1.2865776298854043e-05, | |
| "loss": 0.2862, | |
| "step": 9615 | |
| }, | |
| { | |
| "epoch": 2.0175989943431802, | |
| "grad_norm": 1.486846923828125, | |
| "learning_rate": 1.2791685326810826e-05, | |
| "loss": 0.3303, | |
| "step": 9630 | |
| }, | |
| { | |
| "epoch": 2.0207416719044624, | |
| "grad_norm": 1.5033382177352905, | |
| "learning_rate": 1.2717734893362102e-05, | |
| "loss": 0.273, | |
| "step": 9645 | |
| }, | |
| { | |
| "epoch": 2.023884349465745, | |
| "grad_norm": 1.7398715019226074, | |
| "learning_rate": 1.2643925849803895e-05, | |
| "loss": 0.3412, | |
| "step": 9660 | |
| }, | |
| { | |
| "epoch": 2.027027027027027, | |
| "grad_norm": 1.2956515550613403, | |
| "learning_rate": 1.2570259045804628e-05, | |
| "loss": 0.371, | |
| "step": 9675 | |
| }, | |
| { | |
| "epoch": 2.0301697045883094, | |
| "grad_norm": 1.6283161640167236, | |
| "learning_rate": 1.2496735329395286e-05, | |
| "loss": 0.3437, | |
| "step": 9690 | |
| }, | |
| { | |
| "epoch": 2.0333123821495915, | |
| "grad_norm": 1.208808183670044, | |
| "learning_rate": 1.2423355546959664e-05, | |
| "loss": 0.3402, | |
| "step": 9705 | |
| }, | |
| { | |
| "epoch": 2.0364550597108737, | |
| "grad_norm": 1.0130226612091064, | |
| "learning_rate": 1.2350120543224625e-05, | |
| "loss": 0.3091, | |
| "step": 9720 | |
| }, | |
| { | |
| "epoch": 2.039597737272156, | |
| "grad_norm": 1.4891202449798584, | |
| "learning_rate": 1.2277031161250398e-05, | |
| "loss": 0.3595, | |
| "step": 9735 | |
| }, | |
| { | |
| "epoch": 2.042740414833438, | |
| "grad_norm": 1.399242877960205, | |
| "learning_rate": 1.2204088242420866e-05, | |
| "loss": 0.2866, | |
| "step": 9750 | |
| }, | |
| { | |
| "epoch": 2.04588309239472, | |
| "grad_norm": 1.6362804174423218, | |
| "learning_rate": 1.2131292626433843e-05, | |
| "loss": 0.3116, | |
| "step": 9765 | |
| }, | |
| { | |
| "epoch": 2.0490257699560024, | |
| "grad_norm": 1.3457330465316772, | |
| "learning_rate": 1.2058645151291436e-05, | |
| "loss": 0.3473, | |
| "step": 9780 | |
| }, | |
| { | |
| "epoch": 2.0521684475172846, | |
| "grad_norm": 1.0016905069351196, | |
| "learning_rate": 1.198614665329042e-05, | |
| "loss": 0.3299, | |
| "step": 9795 | |
| }, | |
| { | |
| "epoch": 2.0553111250785667, | |
| "grad_norm": 1.6363437175750732, | |
| "learning_rate": 1.1913797967012585e-05, | |
| "loss": 0.2997, | |
| "step": 9810 | |
| }, | |
| { | |
| "epoch": 2.0584538026398493, | |
| "grad_norm": 1.3227770328521729, | |
| "learning_rate": 1.1841599925315106e-05, | |
| "loss": 0.312, | |
| "step": 9825 | |
| }, | |
| { | |
| "epoch": 2.0615964802011315, | |
| "grad_norm": 1.6865644454956055, | |
| "learning_rate": 1.1769553359321017e-05, | |
| "loss": 0.2977, | |
| "step": 9840 | |
| }, | |
| { | |
| "epoch": 2.0647391577624137, | |
| "grad_norm": 1.7184381484985352, | |
| "learning_rate": 1.169765909840957e-05, | |
| "loss": 0.2997, | |
| "step": 9855 | |
| }, | |
| { | |
| "epoch": 2.067881835323696, | |
| "grad_norm": 1.0318830013275146, | |
| "learning_rate": 1.1625917970206759e-05, | |
| "loss": 0.3017, | |
| "step": 9870 | |
| }, | |
| { | |
| "epoch": 2.071024512884978, | |
| "grad_norm": 1.549784779548645, | |
| "learning_rate": 1.155433080057573e-05, | |
| "loss": 0.3203, | |
| "step": 9885 | |
| }, | |
| { | |
| "epoch": 2.07416719044626, | |
| "grad_norm": 1.5676542520523071, | |
| "learning_rate": 1.1482898413607333e-05, | |
| "loss": 0.3512, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 2.0773098680075424, | |
| "grad_norm": 1.68881356716156, | |
| "learning_rate": 1.1411621631610575e-05, | |
| "loss": 0.3201, | |
| "step": 9915 | |
| }, | |
| { | |
| "epoch": 2.0804525455688245, | |
| "grad_norm": 1.3327656984329224, | |
| "learning_rate": 1.1340501275103178e-05, | |
| "loss": 0.3129, | |
| "step": 9930 | |
| }, | |
| { | |
| "epoch": 2.0835952231301067, | |
| "grad_norm": 1.5713459253311157, | |
| "learning_rate": 1.1269538162802196e-05, | |
| "loss": 0.3212, | |
| "step": 9945 | |
| }, | |
| { | |
| "epoch": 2.086737900691389, | |
| "grad_norm": 1.3707289695739746, | |
| "learning_rate": 1.1198733111614474e-05, | |
| "loss": 0.2978, | |
| "step": 9960 | |
| }, | |
| { | |
| "epoch": 2.0898805782526715, | |
| "grad_norm": 1.3866550922393799, | |
| "learning_rate": 1.1128086936627321e-05, | |
| "loss": 0.353, | |
| "step": 9975 | |
| }, | |
| { | |
| "epoch": 2.0930232558139537, | |
| "grad_norm": 1.3355560302734375, | |
| "learning_rate": 1.1057600451099104e-05, | |
| "loss": 0.2947, | |
| "step": 9990 | |
| }, | |
| { | |
| "epoch": 2.096165933375236, | |
| "grad_norm": 1.3299508094787598, | |
| "learning_rate": 1.0987274466449907e-05, | |
| "loss": 0.2719, | |
| "step": 10005 | |
| }, | |
| { | |
| "epoch": 2.099308610936518, | |
| "grad_norm": 1.4944045543670654, | |
| "learning_rate": 1.0917109792252173e-05, | |
| "loss": 0.3074, | |
| "step": 10020 | |
| }, | |
| { | |
| "epoch": 2.1024512884978, | |
| "grad_norm": 1.238981008529663, | |
| "learning_rate": 1.084710723622136e-05, | |
| "loss": 0.3253, | |
| "step": 10035 | |
| }, | |
| { | |
| "epoch": 2.1055939660590823, | |
| "grad_norm": 1.7395031452178955, | |
| "learning_rate": 1.0777267604206703e-05, | |
| "loss": 0.3404, | |
| "step": 10050 | |
| }, | |
| { | |
| "epoch": 2.1087366436203645, | |
| "grad_norm": 1.597024917602539, | |
| "learning_rate": 1.0707591700181874e-05, | |
| "loss": 0.3362, | |
| "step": 10065 | |
| }, | |
| { | |
| "epoch": 2.1118793211816467, | |
| "grad_norm": 1.5733188390731812, | |
| "learning_rate": 1.0638080326235777e-05, | |
| "loss": 0.3694, | |
| "step": 10080 | |
| }, | |
| { | |
| "epoch": 2.115021998742929, | |
| "grad_norm": 1.2697248458862305, | |
| "learning_rate": 1.0568734282563272e-05, | |
| "loss": 0.3231, | |
| "step": 10095 | |
| }, | |
| { | |
| "epoch": 2.118164676304211, | |
| "grad_norm": 1.410846471786499, | |
| "learning_rate": 1.049955436745601e-05, | |
| "loss": 0.3175, | |
| "step": 10110 | |
| }, | |
| { | |
| "epoch": 2.121307353865493, | |
| "grad_norm": 1.4120702743530273, | |
| "learning_rate": 1.0430541377293191e-05, | |
| "loss": 0.3534, | |
| "step": 10125 | |
| }, | |
| { | |
| "epoch": 2.124450031426776, | |
| "grad_norm": 1.8276065587997437, | |
| "learning_rate": 1.0361696106532442e-05, | |
| "loss": 0.3332, | |
| "step": 10140 | |
| }, | |
| { | |
| "epoch": 2.127592708988058, | |
| "grad_norm": 1.6806981563568115, | |
| "learning_rate": 1.0293019347700658e-05, | |
| "loss": 0.2967, | |
| "step": 10155 | |
| }, | |
| { | |
| "epoch": 2.13073538654934, | |
| "grad_norm": 2.0087246894836426, | |
| "learning_rate": 1.0224511891384853e-05, | |
| "loss": 0.3439, | |
| "step": 10170 | |
| }, | |
| { | |
| "epoch": 2.1338780641106223, | |
| "grad_norm": 1.5151036977767944, | |
| "learning_rate": 1.015617452622309e-05, | |
| "loss": 0.3344, | |
| "step": 10185 | |
| }, | |
| { | |
| "epoch": 2.1370207416719045, | |
| "grad_norm": 1.1880221366882324, | |
| "learning_rate": 1.008800803889537e-05, | |
| "loss": 0.2934, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 2.1401634192331866, | |
| "grad_norm": 1.1785838603973389, | |
| "learning_rate": 1.0020013214114657e-05, | |
| "loss": 0.3163, | |
| "step": 10215 | |
| }, | |
| { | |
| "epoch": 2.143306096794469, | |
| "grad_norm": 1.2505255937576294, | |
| "learning_rate": 9.952190834617728e-06, | |
| "loss": 0.3166, | |
| "step": 10230 | |
| }, | |
| { | |
| "epoch": 2.146448774355751, | |
| "grad_norm": 2.049252510070801, | |
| "learning_rate": 9.884541681156226e-06, | |
| "loss": 0.3077, | |
| "step": 10245 | |
| }, | |
| { | |
| "epoch": 2.149591451917033, | |
| "grad_norm": 1.616794466972351, | |
| "learning_rate": 9.817066532487701e-06, | |
| "loss": 0.3077, | |
| "step": 10260 | |
| }, | |
| { | |
| "epoch": 2.1527341294783153, | |
| "grad_norm": 1.339815378189087, | |
| "learning_rate": 9.749766165366567e-06, | |
| "loss": 0.3528, | |
| "step": 10275 | |
| }, | |
| { | |
| "epoch": 2.155876807039598, | |
| "grad_norm": 1.4637688398361206, | |
| "learning_rate": 9.682641354535244e-06, | |
| "loss": 0.3619, | |
| "step": 10290 | |
| }, | |
| { | |
| "epoch": 2.15901948460088, | |
| "grad_norm": 1.2227802276611328, | |
| "learning_rate": 9.615692872715154e-06, | |
| "loss": 0.3413, | |
| "step": 10305 | |
| }, | |
| { | |
| "epoch": 2.1621621621621623, | |
| "grad_norm": 1.7328176498413086, | |
| "learning_rate": 9.548921490597917e-06, | |
| "loss": 0.3127, | |
| "step": 10320 | |
| }, | |
| { | |
| "epoch": 2.1653048397234445, | |
| "grad_norm": 1.122909665107727, | |
| "learning_rate": 9.482327976836392e-06, | |
| "loss": 0.2989, | |
| "step": 10335 | |
| }, | |
| { | |
| "epoch": 2.1684475172847266, | |
| "grad_norm": 1.163944959640503, | |
| "learning_rate": 9.415913098035895e-06, | |
| "loss": 0.3264, | |
| "step": 10350 | |
| }, | |
| { | |
| "epoch": 2.171590194846009, | |
| "grad_norm": 1.4139958620071411, | |
| "learning_rate": 9.349677618745347e-06, | |
| "loss": 0.2845, | |
| "step": 10365 | |
| }, | |
| { | |
| "epoch": 2.174732872407291, | |
| "grad_norm": 1.749042272567749, | |
| "learning_rate": 9.28362230144846e-06, | |
| "loss": 0.3336, | |
| "step": 10380 | |
| }, | |
| { | |
| "epoch": 2.177875549968573, | |
| "grad_norm": 1.489220142364502, | |
| "learning_rate": 9.217747906554969e-06, | |
| "loss": 0.299, | |
| "step": 10395 | |
| }, | |
| { | |
| "epoch": 2.1810182275298553, | |
| "grad_norm": 1.2497318983078003, | |
| "learning_rate": 9.152055192391903e-06, | |
| "loss": 0.2956, | |
| "step": 10410 | |
| }, | |
| { | |
| "epoch": 2.1841609050911375, | |
| "grad_norm": 1.4486489295959473, | |
| "learning_rate": 9.086544915194831e-06, | |
| "loss": 0.3065, | |
| "step": 10425 | |
| }, | |
| { | |
| "epoch": 2.1873035826524196, | |
| "grad_norm": 1.4671967029571533, | |
| "learning_rate": 9.021217829099143e-06, | |
| "loss": 0.3275, | |
| "step": 10440 | |
| }, | |
| { | |
| "epoch": 2.1904462602137023, | |
| "grad_norm": 1.387172818183899, | |
| "learning_rate": 8.956074686131396e-06, | |
| "loss": 0.2766, | |
| "step": 10455 | |
| }, | |
| { | |
| "epoch": 2.1935889377749844, | |
| "grad_norm": 1.0154411792755127, | |
| "learning_rate": 8.89111623620065e-06, | |
| "loss": 0.3188, | |
| "step": 10470 | |
| }, | |
| { | |
| "epoch": 2.1967316153362666, | |
| "grad_norm": 1.452532172203064, | |
| "learning_rate": 8.826343227089843e-06, | |
| "loss": 0.3148, | |
| "step": 10485 | |
| }, | |
| { | |
| "epoch": 2.1998742928975488, | |
| "grad_norm": 1.309695839881897, | |
| "learning_rate": 8.761756404447144e-06, | |
| "loss": 0.2735, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 2.203016970458831, | |
| "grad_norm": 1.652197003364563, | |
| "learning_rate": 8.69735651177741e-06, | |
| "loss": 0.3238, | |
| "step": 10515 | |
| }, | |
| { | |
| "epoch": 2.206159648020113, | |
| "grad_norm": 1.330776572227478, | |
| "learning_rate": 8.633144290433629e-06, | |
| "loss": 0.3433, | |
| "step": 10530 | |
| }, | |
| { | |
| "epoch": 2.2093023255813953, | |
| "grad_norm": 1.5660831928253174, | |
| "learning_rate": 8.56912047960834e-06, | |
| "loss": 0.3275, | |
| "step": 10545 | |
| }, | |
| { | |
| "epoch": 2.2124450031426774, | |
| "grad_norm": 1.1177830696105957, | |
| "learning_rate": 8.50528581632519e-06, | |
| "loss": 0.3697, | |
| "step": 10560 | |
| }, | |
| { | |
| "epoch": 2.2155876807039596, | |
| "grad_norm": 1.4742639064788818, | |
| "learning_rate": 8.441641035430381e-06, | |
| "loss": 0.3099, | |
| "step": 10575 | |
| }, | |
| { | |
| "epoch": 2.218730358265242, | |
| "grad_norm": 1.505416750907898, | |
| "learning_rate": 8.378186869584275e-06, | |
| "loss": 0.33, | |
| "step": 10590 | |
| }, | |
| { | |
| "epoch": 2.2218730358265244, | |
| "grad_norm": 1.5553947687149048, | |
| "learning_rate": 8.314924049252895e-06, | |
| "loss": 0.3302, | |
| "step": 10605 | |
| }, | |
| { | |
| "epoch": 2.2250157133878066, | |
| "grad_norm": 1.5330064296722412, | |
| "learning_rate": 8.251853302699578e-06, | |
| "loss": 0.3387, | |
| "step": 10620 | |
| }, | |
| { | |
| "epoch": 2.2281583909490887, | |
| "grad_norm": 1.2511600255966187, | |
| "learning_rate": 8.188975355976557e-06, | |
| "loss": 0.2764, | |
| "step": 10635 | |
| }, | |
| { | |
| "epoch": 2.231301068510371, | |
| "grad_norm": 1.3672597408294678, | |
| "learning_rate": 8.126290932916599e-06, | |
| "loss": 0.3554, | |
| "step": 10650 | |
| }, | |
| { | |
| "epoch": 2.234443746071653, | |
| "grad_norm": 1.28493332862854, | |
| "learning_rate": 8.06380075512468e-06, | |
| "loss": 0.3377, | |
| "step": 10665 | |
| }, | |
| { | |
| "epoch": 2.2375864236329353, | |
| "grad_norm": 1.5767827033996582, | |
| "learning_rate": 8.001505541969698e-06, | |
| "loss": 0.328, | |
| "step": 10680 | |
| }, | |
| { | |
| "epoch": 2.2407291011942174, | |
| "grad_norm": 1.3858174085617065, | |
| "learning_rate": 7.939406010576167e-06, | |
| "loss": 0.2975, | |
| "step": 10695 | |
| }, | |
| { | |
| "epoch": 2.2438717787554996, | |
| "grad_norm": 1.6385616064071655, | |
| "learning_rate": 7.877502875815961e-06, | |
| "loss": 0.3297, | |
| "step": 10710 | |
| }, | |
| { | |
| "epoch": 2.2470144563167818, | |
| "grad_norm": 1.4886940717697144, | |
| "learning_rate": 7.815796850300095e-06, | |
| "loss": 0.3159, | |
| "step": 10725 | |
| }, | |
| { | |
| "epoch": 2.250157133878064, | |
| "grad_norm": 1.1138700246810913, | |
| "learning_rate": 7.754288644370528e-06, | |
| "loss": 0.336, | |
| "step": 10740 | |
| }, | |
| { | |
| "epoch": 2.253299811439346, | |
| "grad_norm": 1.5991181135177612, | |
| "learning_rate": 7.692978966091977e-06, | |
| "loss": 0.3252, | |
| "step": 10755 | |
| }, | |
| { | |
| "epoch": 2.2564424890006287, | |
| "grad_norm": 1.1452405452728271, | |
| "learning_rate": 7.631868521243757e-06, | |
| "loss": 0.316, | |
| "step": 10770 | |
| }, | |
| { | |
| "epoch": 2.259585166561911, | |
| "grad_norm": 1.069392204284668, | |
| "learning_rate": 7.57095801331166e-06, | |
| "loss": 0.3167, | |
| "step": 10785 | |
| }, | |
| { | |
| "epoch": 2.262727844123193, | |
| "grad_norm": 1.717702865600586, | |
| "learning_rate": 7.510248143479876e-06, | |
| "loss": 0.3426, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 2.2658705216844752, | |
| "grad_norm": 1.7524367570877075, | |
| "learning_rate": 7.4497396106229134e-06, | |
| "loss": 0.3732, | |
| "step": 10815 | |
| }, | |
| { | |
| "epoch": 2.2690131992457574, | |
| "grad_norm": 1.937584638595581, | |
| "learning_rate": 7.38943311129752e-06, | |
| "loss": 0.3333, | |
| "step": 10830 | |
| }, | |
| { | |
| "epoch": 2.2721558768070396, | |
| "grad_norm": 1.3948473930358887, | |
| "learning_rate": 7.329329339734722e-06, | |
| "loss": 0.3149, | |
| "step": 10845 | |
| }, | |
| { | |
| "epoch": 2.2752985543683217, | |
| "grad_norm": 1.588791012763977, | |
| "learning_rate": 7.269428987831783e-06, | |
| "loss": 0.3433, | |
| "step": 10860 | |
| }, | |
| { | |
| "epoch": 2.278441231929604, | |
| "grad_norm": 1.2459790706634521, | |
| "learning_rate": 7.209732745144254e-06, | |
| "loss": 0.2659, | |
| "step": 10875 | |
| }, | |
| { | |
| "epoch": 2.281583909490886, | |
| "grad_norm": 1.0872770547866821, | |
| "learning_rate": 7.150241298878055e-06, | |
| "loss": 0.2956, | |
| "step": 10890 | |
| }, | |
| { | |
| "epoch": 2.2847265870521687, | |
| "grad_norm": 1.6503065824508667, | |
| "learning_rate": 7.090955333881555e-06, | |
| "loss": 0.3258, | |
| "step": 10905 | |
| }, | |
| { | |
| "epoch": 2.287869264613451, | |
| "grad_norm": 1.3506873846054077, | |
| "learning_rate": 7.0318755326376576e-06, | |
| "loss": 0.2789, | |
| "step": 10920 | |
| }, | |
| { | |
| "epoch": 2.291011942174733, | |
| "grad_norm": 1.3215200901031494, | |
| "learning_rate": 6.973002575255974e-06, | |
| "loss": 0.3325, | |
| "step": 10935 | |
| }, | |
| { | |
| "epoch": 2.294154619736015, | |
| "grad_norm": 1.4247123003005981, | |
| "learning_rate": 6.914337139465004e-06, | |
| "loss": 0.3329, | |
| "step": 10950 | |
| }, | |
| { | |
| "epoch": 2.2972972972972974, | |
| "grad_norm": 1.0532140731811523, | |
| "learning_rate": 6.85587990060432e-06, | |
| "loss": 0.2541, | |
| "step": 10965 | |
| }, | |
| { | |
| "epoch": 2.3004399748585795, | |
| "grad_norm": 1.6737048625946045, | |
| "learning_rate": 6.797631531616769e-06, | |
| "loss": 0.3642, | |
| "step": 10980 | |
| }, | |
| { | |
| "epoch": 2.3035826524198617, | |
| "grad_norm": 1.2676361799240112, | |
| "learning_rate": 6.739592703040759e-06, | |
| "loss": 0.2897, | |
| "step": 10995 | |
| }, | |
| { | |
| "epoch": 2.306725329981144, | |
| "grad_norm": 1.5627233982086182, | |
| "learning_rate": 6.681764083002534e-06, | |
| "loss": 0.3278, | |
| "step": 11010 | |
| }, | |
| { | |
| "epoch": 2.309868007542426, | |
| "grad_norm": 1.7141146659851074, | |
| "learning_rate": 6.624146337208484e-06, | |
| "loss": 0.3139, | |
| "step": 11025 | |
| }, | |
| { | |
| "epoch": 2.313010685103708, | |
| "grad_norm": 1.1188994646072388, | |
| "learning_rate": 6.566740128937451e-06, | |
| "loss": 0.295, | |
| "step": 11040 | |
| }, | |
| { | |
| "epoch": 2.3161533626649904, | |
| "grad_norm": 1.5478028059005737, | |
| "learning_rate": 6.509546119033152e-06, | |
| "loss": 0.3149, | |
| "step": 11055 | |
| }, | |
| { | |
| "epoch": 2.3192960402262726, | |
| "grad_norm": 1.1058639287948608, | |
| "learning_rate": 6.4525649658965045e-06, | |
| "loss": 0.274, | |
| "step": 11070 | |
| }, | |
| { | |
| "epoch": 2.322438717787555, | |
| "grad_norm": 1.5267043113708496, | |
| "learning_rate": 6.395797325478106e-06, | |
| "loss": 0.3099, | |
| "step": 11085 | |
| }, | |
| { | |
| "epoch": 2.3255813953488373, | |
| "grad_norm": 1.4159321784973145, | |
| "learning_rate": 6.339243851270635e-06, | |
| "loss": 0.3495, | |
| "step": 11100 | |
| }, | |
| { | |
| "epoch": 2.3287240729101195, | |
| "grad_norm": 1.2933320999145508, | |
| "learning_rate": 6.282905194301375e-06, | |
| "loss": 0.2708, | |
| "step": 11115 | |
| }, | |
| { | |
| "epoch": 2.3318667504714017, | |
| "grad_norm": 1.9966567754745483, | |
| "learning_rate": 6.226782003124676e-06, | |
| "loss": 0.2899, | |
| "step": 11130 | |
| }, | |
| { | |
| "epoch": 2.335009428032684, | |
| "grad_norm": 1.3963077068328857, | |
| "learning_rate": 6.170874923814499e-06, | |
| "loss": 0.3259, | |
| "step": 11145 | |
| }, | |
| { | |
| "epoch": 2.338152105593966, | |
| "grad_norm": 1.3655591011047363, | |
| "learning_rate": 6.115184599957033e-06, | |
| "loss": 0.289, | |
| "step": 11160 | |
| }, | |
| { | |
| "epoch": 2.341294783155248, | |
| "grad_norm": 1.4125938415527344, | |
| "learning_rate": 6.059711672643195e-06, | |
| "loss": 0.291, | |
| "step": 11175 | |
| }, | |
| { | |
| "epoch": 2.3444374607165304, | |
| "grad_norm": 2.017850875854492, | |
| "learning_rate": 6.004456780461315e-06, | |
| "loss": 0.3044, | |
| "step": 11190 | |
| }, | |
| { | |
| "epoch": 2.3475801382778125, | |
| "grad_norm": 1.441328525543213, | |
| "learning_rate": 5.949420559489752e-06, | |
| "loss": 0.3245, | |
| "step": 11205 | |
| }, | |
| { | |
| "epoch": 2.350722815839095, | |
| "grad_norm": 1.799134373664856, | |
| "learning_rate": 5.894603643289601e-06, | |
| "loss": 0.3593, | |
| "step": 11220 | |
| }, | |
| { | |
| "epoch": 2.3538654934003773, | |
| "grad_norm": 1.8016554117202759, | |
| "learning_rate": 5.840006662897388e-06, | |
| "loss": 0.2787, | |
| "step": 11235 | |
| }, | |
| { | |
| "epoch": 2.3570081709616595, | |
| "grad_norm": 1.4649808406829834, | |
| "learning_rate": 5.785630246817781e-06, | |
| "loss": 0.3168, | |
| "step": 11250 | |
| }, | |
| { | |
| "epoch": 2.3601508485229417, | |
| "grad_norm": 1.3161333799362183, | |
| "learning_rate": 5.731475021016383e-06, | |
| "loss": 0.3732, | |
| "step": 11265 | |
| }, | |
| { | |
| "epoch": 2.363293526084224, | |
| "grad_norm": 1.663887858390808, | |
| "learning_rate": 5.677541608912526e-06, | |
| "loss": 0.2998, | |
| "step": 11280 | |
| }, | |
| { | |
| "epoch": 2.366436203645506, | |
| "grad_norm": 1.439397931098938, | |
| "learning_rate": 5.623830631372087e-06, | |
| "loss": 0.3206, | |
| "step": 11295 | |
| }, | |
| { | |
| "epoch": 2.369578881206788, | |
| "grad_norm": 1.6403486728668213, | |
| "learning_rate": 5.570342706700324e-06, | |
| "loss": 0.3565, | |
| "step": 11310 | |
| }, | |
| { | |
| "epoch": 2.3727215587680703, | |
| "grad_norm": 1.6395245790481567, | |
| "learning_rate": 5.517078450634799e-06, | |
| "loss": 0.294, | |
| "step": 11325 | |
| }, | |
| { | |
| "epoch": 2.3758642363293525, | |
| "grad_norm": 1.496952772140503, | |
| "learning_rate": 5.464038476338237e-06, | |
| "loss": 0.2963, | |
| "step": 11340 | |
| }, | |
| { | |
| "epoch": 2.3790069138906347, | |
| "grad_norm": 1.9148141145706177, | |
| "learning_rate": 5.411223394391529e-06, | |
| "loss": 0.3353, | |
| "step": 11355 | |
| }, | |
| { | |
| "epoch": 2.382149591451917, | |
| "grad_norm": 1.4077427387237549, | |
| "learning_rate": 5.3586338127866396e-06, | |
| "loss": 0.3174, | |
| "step": 11370 | |
| }, | |
| { | |
| "epoch": 2.385292269013199, | |
| "grad_norm": 1.5252655744552612, | |
| "learning_rate": 5.306270336919661e-06, | |
| "loss": 0.3134, | |
| "step": 11385 | |
| }, | |
| { | |
| "epoch": 2.3884349465744816, | |
| "grad_norm": 1.5777688026428223, | |
| "learning_rate": 5.254133569583808e-06, | |
| "loss": 0.3309, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 2.391577624135764, | |
| "grad_norm": 1.7088990211486816, | |
| "learning_rate": 5.2022241109624805e-06, | |
| "loss": 0.2441, | |
| "step": 11415 | |
| }, | |
| { | |
| "epoch": 2.394720301697046, | |
| "grad_norm": 1.7140231132507324, | |
| "learning_rate": 5.150542558622415e-06, | |
| "loss": 0.3053, | |
| "step": 11430 | |
| }, | |
| { | |
| "epoch": 2.397862979258328, | |
| "grad_norm": 3.6586174964904785, | |
| "learning_rate": 5.099089507506705e-06, | |
| "loss": 0.3079, | |
| "step": 11445 | |
| }, | |
| { | |
| "epoch": 2.4010056568196103, | |
| "grad_norm": 1.752259612083435, | |
| "learning_rate": 5.047865549928024e-06, | |
| "loss": 0.324, | |
| "step": 11460 | |
| }, | |
| { | |
| "epoch": 2.4041483343808925, | |
| "grad_norm": 1.5753651857376099, | |
| "learning_rate": 4.996871275561779e-06, | |
| "loss": 0.3128, | |
| "step": 11475 | |
| }, | |
| { | |
| "epoch": 2.4072910119421747, | |
| "grad_norm": 1.9012105464935303, | |
| "learning_rate": 4.946107271439343e-06, | |
| "loss": 0.3764, | |
| "step": 11490 | |
| }, | |
| { | |
| "epoch": 2.410433689503457, | |
| "grad_norm": 1.4729382991790771, | |
| "learning_rate": 4.895574121941285e-06, | |
| "loss": 0.2755, | |
| "step": 11505 | |
| }, | |
| { | |
| "epoch": 2.413576367064739, | |
| "grad_norm": 1.4175302982330322, | |
| "learning_rate": 4.845272408790621e-06, | |
| "loss": 0.3121, | |
| "step": 11520 | |
| }, | |
| { | |
| "epoch": 2.4167190446260216, | |
| "grad_norm": 1.7722225189208984, | |
| "learning_rate": 4.795202711046168e-06, | |
| "loss": 0.2744, | |
| "step": 11535 | |
| }, | |
| { | |
| "epoch": 2.4198617221873038, | |
| "grad_norm": 1.4909186363220215, | |
| "learning_rate": 4.74536560509582e-06, | |
| "loss": 0.3025, | |
| "step": 11550 | |
| }, | |
| { | |
| "epoch": 2.423004399748586, | |
| "grad_norm": 1.8246691226959229, | |
| "learning_rate": 4.695761664649964e-06, | |
| "loss": 0.3324, | |
| "step": 11565 | |
| }, | |
| { | |
| "epoch": 2.426147077309868, | |
| "grad_norm": 1.7963186502456665, | |
| "learning_rate": 4.646391460734837e-06, | |
| "loss": 0.3575, | |
| "step": 11580 | |
| }, | |
| { | |
| "epoch": 2.4292897548711503, | |
| "grad_norm": 1.5770527124404907, | |
| "learning_rate": 4.5972555616859816e-06, | |
| "loss": 0.2908, | |
| "step": 11595 | |
| }, | |
| { | |
| "epoch": 2.4324324324324325, | |
| "grad_norm": 1.617647409439087, | |
| "learning_rate": 4.548354533141677e-06, | |
| "loss": 0.2994, | |
| "step": 11610 | |
| }, | |
| { | |
| "epoch": 2.4355751099937146, | |
| "grad_norm": 1.745650291442871, | |
| "learning_rate": 4.49968893803645e-06, | |
| "loss": 0.3361, | |
| "step": 11625 | |
| }, | |
| { | |
| "epoch": 2.438717787554997, | |
| "grad_norm": 1.0638154745101929, | |
| "learning_rate": 4.451259336594596e-06, | |
| "loss": 0.3368, | |
| "step": 11640 | |
| }, | |
| { | |
| "epoch": 2.441860465116279, | |
| "grad_norm": 1.482951045036316, | |
| "learning_rate": 4.403066286323693e-06, | |
| "loss": 0.3004, | |
| "step": 11655 | |
| }, | |
| { | |
| "epoch": 2.445003142677561, | |
| "grad_norm": 1.4275717735290527, | |
| "learning_rate": 4.355110342008231e-06, | |
| "loss": 0.2826, | |
| "step": 11670 | |
| }, | |
| { | |
| "epoch": 2.4481458202388433, | |
| "grad_norm": 1.4426920413970947, | |
| "learning_rate": 4.307392055703182e-06, | |
| "loss": 0.2944, | |
| "step": 11685 | |
| }, | |
| { | |
| "epoch": 2.4512884978001255, | |
| "grad_norm": 1.5074379444122314, | |
| "learning_rate": 4.259911976727712e-06, | |
| "loss": 0.3222, | |
| "step": 11700 | |
| }, | |
| { | |
| "epoch": 2.454431175361408, | |
| "grad_norm": 1.3746048212051392, | |
| "learning_rate": 4.212670651658768e-06, | |
| "loss": 0.317, | |
| "step": 11715 | |
| }, | |
| { | |
| "epoch": 2.4575738529226903, | |
| "grad_norm": 1.6050078868865967, | |
| "learning_rate": 4.165668624324845e-06, | |
| "loss": 0.3172, | |
| "step": 11730 | |
| }, | |
| { | |
| "epoch": 2.4607165304839724, | |
| "grad_norm": 1.2552024126052856, | |
| "learning_rate": 4.118906435799724e-06, | |
| "loss": 0.2816, | |
| "step": 11745 | |
| }, | |
| { | |
| "epoch": 2.4638592080452546, | |
| "grad_norm": 1.3392716646194458, | |
| "learning_rate": 4.0723846243962084e-06, | |
| "loss": 0.3155, | |
| "step": 11760 | |
| }, | |
| { | |
| "epoch": 2.4670018856065368, | |
| "grad_norm": 1.5874278545379639, | |
| "learning_rate": 4.026103725659977e-06, | |
| "loss": 0.2603, | |
| "step": 11775 | |
| }, | |
| { | |
| "epoch": 2.470144563167819, | |
| "grad_norm": 1.235484004020691, | |
| "learning_rate": 3.980064272363362e-06, | |
| "loss": 0.2499, | |
| "step": 11790 | |
| }, | |
| { | |
| "epoch": 2.473287240729101, | |
| "grad_norm": 1.6743351221084595, | |
| "learning_rate": 3.934266794499275e-06, | |
| "loss": 0.3402, | |
| "step": 11805 | |
| }, | |
| { | |
| "epoch": 2.4764299182903833, | |
| "grad_norm": 1.4384301900863647, | |
| "learning_rate": 3.888711819275048e-06, | |
| "loss": 0.3176, | |
| "step": 11820 | |
| }, | |
| { | |
| "epoch": 2.4795725958516655, | |
| "grad_norm": 1.4185879230499268, | |
| "learning_rate": 3.84339987110641e-06, | |
| "loss": 0.3183, | |
| "step": 11835 | |
| }, | |
| { | |
| "epoch": 2.482715273412948, | |
| "grad_norm": 1.382876992225647, | |
| "learning_rate": 3.7983314716114384e-06, | |
| "loss": 0.3044, | |
| "step": 11850 | |
| }, | |
| { | |
| "epoch": 2.4858579509742302, | |
| "grad_norm": 1.7051907777786255, | |
| "learning_rate": 3.7535071396045286e-06, | |
| "loss": 0.3701, | |
| "step": 11865 | |
| }, | |
| { | |
| "epoch": 2.4890006285355124, | |
| "grad_norm": 1.6134312152862549, | |
| "learning_rate": 3.708927391090447e-06, | |
| "loss": 0.2941, | |
| "step": 11880 | |
| }, | |
| { | |
| "epoch": 2.4921433060967946, | |
| "grad_norm": 1.5831973552703857, | |
| "learning_rate": 3.664592739258399e-06, | |
| "loss": 0.33, | |
| "step": 11895 | |
| }, | |
| { | |
| "epoch": 2.4952859836580767, | |
| "grad_norm": 1.5520756244659424, | |
| "learning_rate": 3.6205036944761045e-06, | |
| "loss": 0.3087, | |
| "step": 11910 | |
| }, | |
| { | |
| "epoch": 2.498428661219359, | |
| "grad_norm": 1.497530460357666, | |
| "learning_rate": 3.5766607642839093e-06, | |
| "loss": 0.3003, | |
| "step": 11925 | |
| }, | |
| { | |
| "epoch": 2.501571338780641, | |
| "grad_norm": 1.3204107284545898, | |
| "learning_rate": 3.5330644533889705e-06, | |
| "loss": 0.284, | |
| "step": 11940 | |
| }, | |
| { | |
| "epoch": 2.5047140163419233, | |
| "grad_norm": 1.4598573446273804, | |
| "learning_rate": 3.489715263659435e-06, | |
| "loss": 0.2783, | |
| "step": 11955 | |
| }, | |
| { | |
| "epoch": 2.5078566939032054, | |
| "grad_norm": 1.5349574089050293, | |
| "learning_rate": 3.4466136941186724e-06, | |
| "loss": 0.2826, | |
| "step": 11970 | |
| }, | |
| { | |
| "epoch": 2.5109993714644876, | |
| "grad_norm": 1.3122080564498901, | |
| "learning_rate": 3.403760240939502e-06, | |
| "loss": 0.2675, | |
| "step": 11985 | |
| }, | |
| { | |
| "epoch": 2.5141420490257698, | |
| "grad_norm": 1.218714714050293, | |
| "learning_rate": 3.361155397438501e-06, | |
| "loss": 0.3582, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 2.517284726587052, | |
| "grad_norm": 1.8126921653747559, | |
| "learning_rate": 3.3187996540703424e-06, | |
| "loss": 0.2697, | |
| "step": 12015 | |
| }, | |
| { | |
| "epoch": 2.520427404148334, | |
| "grad_norm": 1.4559165239334106, | |
| "learning_rate": 3.276693498422104e-06, | |
| "loss": 0.3061, | |
| "step": 12030 | |
| }, | |
| { | |
| "epoch": 2.5235700817096167, | |
| "grad_norm": 1.0276938676834106, | |
| "learning_rate": 3.234837415207706e-06, | |
| "loss": 0.3437, | |
| "step": 12045 | |
| }, | |
| { | |
| "epoch": 2.526712759270899, | |
| "grad_norm": 1.4260108470916748, | |
| "learning_rate": 3.193231886262288e-06, | |
| "loss": 0.282, | |
| "step": 12060 | |
| }, | |
| { | |
| "epoch": 2.529855436832181, | |
| "grad_norm": 1.7475075721740723, | |
| "learning_rate": 3.1518773905366976e-06, | |
| "loss": 0.3306, | |
| "step": 12075 | |
| }, | |
| { | |
| "epoch": 2.5329981143934632, | |
| "grad_norm": 1.1481621265411377, | |
| "learning_rate": 3.1107744040919427e-06, | |
| "loss": 0.2692, | |
| "step": 12090 | |
| }, | |
| { | |
| "epoch": 2.5361407919547454, | |
| "grad_norm": 1.8862768411636353, | |
| "learning_rate": 3.0699234000937464e-06, | |
| "loss": 0.332, | |
| "step": 12105 | |
| }, | |
| { | |
| "epoch": 2.5392834695160276, | |
| "grad_norm": 1.4870737791061401, | |
| "learning_rate": 3.0293248488070745e-06, | |
| "loss": 0.3344, | |
| "step": 12120 | |
| }, | |
| { | |
| "epoch": 2.5424261470773097, | |
| "grad_norm": 1.7676063776016235, | |
| "learning_rate": 2.9889792175907318e-06, | |
| "loss": 0.3323, | |
| "step": 12135 | |
| }, | |
| { | |
| "epoch": 2.5455688246385924, | |
| "grad_norm": 1.3961862325668335, | |
| "learning_rate": 2.9488869708919674e-06, | |
| "loss": 0.3279, | |
| "step": 12150 | |
| }, | |
| { | |
| "epoch": 2.5487115021998745, | |
| "grad_norm": 1.2494407892227173, | |
| "learning_rate": 2.9090485702411603e-06, | |
| "loss": 0.3043, | |
| "step": 12165 | |
| }, | |
| { | |
| "epoch": 2.5518541797611567, | |
| "grad_norm": 2.1194069385528564, | |
| "learning_rate": 2.869464474246483e-06, | |
| "loss": 0.3251, | |
| "step": 12180 | |
| }, | |
| { | |
| "epoch": 2.554996857322439, | |
| "grad_norm": 1.5678242444992065, | |
| "learning_rate": 2.8301351385886214e-06, | |
| "loss": 0.3134, | |
| "step": 12195 | |
| }, | |
| { | |
| "epoch": 2.558139534883721, | |
| "grad_norm": 1.7995771169662476, | |
| "learning_rate": 2.7910610160155256e-06, | |
| "loss": 0.3218, | |
| "step": 12210 | |
| }, | |
| { | |
| "epoch": 2.561282212445003, | |
| "grad_norm": 1.077495813369751, | |
| "learning_rate": 2.7522425563372202e-06, | |
| "loss": 0.2961, | |
| "step": 12225 | |
| }, | |
| { | |
| "epoch": 2.5644248900062854, | |
| "grad_norm": 1.7993483543395996, | |
| "learning_rate": 2.7136802064206157e-06, | |
| "loss": 0.3097, | |
| "step": 12240 | |
| }, | |
| { | |
| "epoch": 2.5675675675675675, | |
| "grad_norm": 1.5372523069381714, | |
| "learning_rate": 2.675374410184345e-06, | |
| "loss": 0.2836, | |
| "step": 12255 | |
| }, | |
| { | |
| "epoch": 2.5707102451288497, | |
| "grad_norm": 1.4500757455825806, | |
| "learning_rate": 2.6373256085936742e-06, | |
| "loss": 0.3154, | |
| "step": 12270 | |
| }, | |
| { | |
| "epoch": 2.573852922690132, | |
| "grad_norm": 1.4548457860946655, | |
| "learning_rate": 2.5995342396554325e-06, | |
| "loss": 0.3113, | |
| "step": 12285 | |
| }, | |
| { | |
| "epoch": 2.576995600251414, | |
| "grad_norm": 1.9645068645477295, | |
| "learning_rate": 2.562000738412945e-06, | |
| "loss": 0.3444, | |
| "step": 12300 | |
| }, | |
| { | |
| "epoch": 2.5801382778126962, | |
| "grad_norm": 1.7881463766098022, | |
| "learning_rate": 2.5247255369410418e-06, | |
| "loss": 0.2974, | |
| "step": 12315 | |
| }, | |
| { | |
| "epoch": 2.5832809553739784, | |
| "grad_norm": 1.7925788164138794, | |
| "learning_rate": 2.4877090643410927e-06, | |
| "loss": 0.2944, | |
| "step": 12330 | |
| }, | |
| { | |
| "epoch": 2.586423632935261, | |
| "grad_norm": 1.5786759853363037, | |
| "learning_rate": 2.4509517467360356e-06, | |
| "loss": 0.3785, | |
| "step": 12345 | |
| }, | |
| { | |
| "epoch": 2.589566310496543, | |
| "grad_norm": 1.4962717294692993, | |
| "learning_rate": 2.4144540072654987e-06, | |
| "loss": 0.3267, | |
| "step": 12360 | |
| }, | |
| { | |
| "epoch": 2.5927089880578253, | |
| "grad_norm": 1.163743257522583, | |
| "learning_rate": 2.378216266080929e-06, | |
| "loss": 0.2757, | |
| "step": 12375 | |
| }, | |
| { | |
| "epoch": 2.5958516656191075, | |
| "grad_norm": 1.7964270114898682, | |
| "learning_rate": 2.342238940340746e-06, | |
| "loss": 0.2904, | |
| "step": 12390 | |
| }, | |
| { | |
| "epoch": 2.5989943431803897, | |
| "grad_norm": 1.7889028787612915, | |
| "learning_rate": 2.3065224442055333e-06, | |
| "loss": 0.3064, | |
| "step": 12405 | |
| }, | |
| { | |
| "epoch": 2.602137020741672, | |
| "grad_norm": 1.5097829103469849, | |
| "learning_rate": 2.271067188833281e-06, | |
| "loss": 0.3401, | |
| "step": 12420 | |
| }, | |
| { | |
| "epoch": 2.605279698302954, | |
| "grad_norm": 1.4333211183547974, | |
| "learning_rate": 2.235873582374659e-06, | |
| "loss": 0.2794, | |
| "step": 12435 | |
| }, | |
| { | |
| "epoch": 2.608422375864236, | |
| "grad_norm": 1.2477611303329468, | |
| "learning_rate": 2.200942029968309e-06, | |
| "loss": 0.2935, | |
| "step": 12450 | |
| }, | |
| { | |
| "epoch": 2.611565053425519, | |
| "grad_norm": 1.7559458017349243, | |
| "learning_rate": 2.166272933736177e-06, | |
| "loss": 0.3258, | |
| "step": 12465 | |
| }, | |
| { | |
| "epoch": 2.614707730986801, | |
| "grad_norm": 1.6621719598770142, | |
| "learning_rate": 2.1318666927788834e-06, | |
| "loss": 0.3111, | |
| "step": 12480 | |
| }, | |
| { | |
| "epoch": 2.617850408548083, | |
| "grad_norm": 1.6579554080963135, | |
| "learning_rate": 2.0977237031711506e-06, | |
| "loss": 0.2611, | |
| "step": 12495 | |
| }, | |
| { | |
| "epoch": 2.6209930861093653, | |
| "grad_norm": 1.7369964122772217, | |
| "learning_rate": 2.063844357957223e-06, | |
| "loss": 0.3577, | |
| "step": 12510 | |
| }, | |
| { | |
| "epoch": 2.6241357636706475, | |
| "grad_norm": 1.6332292556762695, | |
| "learning_rate": 2.0302290471463314e-06, | |
| "loss": 0.2942, | |
| "step": 12525 | |
| }, | |
| { | |
| "epoch": 2.6272784412319297, | |
| "grad_norm": 1.5578200817108154, | |
| "learning_rate": 1.996878157708243e-06, | |
| "loss": 0.2695, | |
| "step": 12540 | |
| }, | |
| { | |
| "epoch": 2.630421118793212, | |
| "grad_norm": 1.5188201665878296, | |
| "learning_rate": 1.963792073568757e-06, | |
| "loss": 0.3078, | |
| "step": 12555 | |
| }, | |
| { | |
| "epoch": 2.633563796354494, | |
| "grad_norm": 1.8250635862350464, | |
| "learning_rate": 1.9309711756053367e-06, | |
| "loss": 0.3146, | |
| "step": 12570 | |
| }, | |
| { | |
| "epoch": 2.636706473915776, | |
| "grad_norm": 1.7131030559539795, | |
| "learning_rate": 1.8984158416426728e-06, | |
| "loss": 0.3182, | |
| "step": 12585 | |
| }, | |
| { | |
| "epoch": 2.6398491514770583, | |
| "grad_norm": 1.473404884338379, | |
| "learning_rate": 1.8661264464483852e-06, | |
| "loss": 0.2727, | |
| "step": 12600 | |
| }, | |
| { | |
| "epoch": 2.6429918290383405, | |
| "grad_norm": 1.508779764175415, | |
| "learning_rate": 1.8341033617286645e-06, | |
| "loss": 0.3448, | |
| "step": 12615 | |
| }, | |
| { | |
| "epoch": 2.6461345065996227, | |
| "grad_norm": 1.147560477256775, | |
| "learning_rate": 1.8023469561240126e-06, | |
| "loss": 0.2783, | |
| "step": 12630 | |
| }, | |
| { | |
| "epoch": 2.649277184160905, | |
| "grad_norm": 1.760060429573059, | |
| "learning_rate": 1.770857595205011e-06, | |
| "loss": 0.3152, | |
| "step": 12645 | |
| }, | |
| { | |
| "epoch": 2.6524198617221875, | |
| "grad_norm": 1.4739596843719482, | |
| "learning_rate": 1.7396356414680959e-06, | |
| "loss": 0.29, | |
| "step": 12660 | |
| }, | |
| { | |
| "epoch": 2.6555625392834696, | |
| "grad_norm": 1.567877173423767, | |
| "learning_rate": 1.7086814543313816e-06, | |
| "loss": 0.2672, | |
| "step": 12675 | |
| }, | |
| { | |
| "epoch": 2.658705216844752, | |
| "grad_norm": 1.3326002359390259, | |
| "learning_rate": 1.6779953901305295e-06, | |
| "loss": 0.251, | |
| "step": 12690 | |
| }, | |
| { | |
| "epoch": 2.661847894406034, | |
| "grad_norm": 1.3788151741027832, | |
| "learning_rate": 1.647577802114661e-06, | |
| "loss": 0.3416, | |
| "step": 12705 | |
| }, | |
| { | |
| "epoch": 2.664990571967316, | |
| "grad_norm": 1.7790052890777588, | |
| "learning_rate": 1.6174290404422726e-06, | |
| "loss": 0.2999, | |
| "step": 12720 | |
| }, | |
| { | |
| "epoch": 2.6681332495285983, | |
| "grad_norm": 1.4312305450439453, | |
| "learning_rate": 1.5875494521771922e-06, | |
| "loss": 0.3305, | |
| "step": 12735 | |
| }, | |
| { | |
| "epoch": 2.6712759270898805, | |
| "grad_norm": 1.6938543319702148, | |
| "learning_rate": 1.5579393812846316e-06, | |
| "loss": 0.3117, | |
| "step": 12750 | |
| }, | |
| { | |
| "epoch": 2.6744186046511627, | |
| "grad_norm": 1.5854291915893555, | |
| "learning_rate": 1.528599168627165e-06, | |
| "loss": 0.3289, | |
| "step": 12765 | |
| }, | |
| { | |
| "epoch": 2.6775612822124453, | |
| "grad_norm": 1.1590096950531006, | |
| "learning_rate": 1.4995291519608602e-06, | |
| "loss": 0.283, | |
| "step": 12780 | |
| }, | |
| { | |
| "epoch": 2.6807039597737274, | |
| "grad_norm": 1.068301796913147, | |
| "learning_rate": 1.470729665931353e-06, | |
| "loss": 0.331, | |
| "step": 12795 | |
| }, | |
| { | |
| "epoch": 2.6838466373350096, | |
| "grad_norm": 1.2185308933258057, | |
| "learning_rate": 1.4422010420700182e-06, | |
| "loss": 0.3014, | |
| "step": 12810 | |
| }, | |
| { | |
| "epoch": 2.686989314896292, | |
| "grad_norm": 1.4308061599731445, | |
| "learning_rate": 1.413943608790133e-06, | |
| "loss": 0.2939, | |
| "step": 12825 | |
| }, | |
| { | |
| "epoch": 2.690131992457574, | |
| "grad_norm": 1.1259864568710327, | |
| "learning_rate": 1.385957691383119e-06, | |
| "loss": 0.2669, | |
| "step": 12840 | |
| }, | |
| { | |
| "epoch": 2.693274670018856, | |
| "grad_norm": 1.5093046426773071, | |
| "learning_rate": 1.3582436120147729e-06, | |
| "loss": 0.3374, | |
| "step": 12855 | |
| }, | |
| { | |
| "epoch": 2.6964173475801383, | |
| "grad_norm": 1.3771803379058838, | |
| "learning_rate": 1.3308016897215807e-06, | |
| "loss": 0.2783, | |
| "step": 12870 | |
| }, | |
| { | |
| "epoch": 2.6995600251414205, | |
| "grad_norm": 2.384852409362793, | |
| "learning_rate": 1.3036322404070296e-06, | |
| "loss": 0.3162, | |
| "step": 12885 | |
| }, | |
| { | |
| "epoch": 2.700188560653677, | |
| "eval_accuracy": 0.8853943711763073, | |
| "eval_loss": 0.4137997329235077, | |
| "eval_runtime": 1196.9935, | |
| "eval_samples_per_second": 3.998, | |
| "eval_steps_per_second": 1.0, | |
| "step": 12888 | |
| }, | |
| { | |
| "epoch": 2.7027027027027026, | |
| "grad_norm": 1.673790693283081, | |
| "learning_rate": 1.2767355768379702e-06, | |
| "loss": 0.2855, | |
| "step": 12900 | |
| }, | |
| { | |
| "epoch": 2.705845380263985, | |
| "grad_norm": 1.8752899169921875, | |
| "learning_rate": 1.2501120086410411e-06, | |
| "loss": 0.3085, | |
| "step": 12915 | |
| }, | |
| { | |
| "epoch": 2.708988057825267, | |
| "grad_norm": 1.8645318746566772, | |
| "learning_rate": 1.2237618422990733e-06, | |
| "loss": 0.3068, | |
| "step": 12930 | |
| }, | |
| { | |
| "epoch": 2.712130735386549, | |
| "grad_norm": 1.9585272073745728, | |
| "learning_rate": 1.1976853811475675e-06, | |
| "loss": 0.3283, | |
| "step": 12945 | |
| }, | |
| { | |
| "epoch": 2.7152734129478313, | |
| "grad_norm": 1.7527602910995483, | |
| "learning_rate": 1.1718829253712204e-06, | |
| "loss": 0.3222, | |
| "step": 12960 | |
| }, | |
| { | |
| "epoch": 2.718416090509114, | |
| "grad_norm": 1.3966923952102661, | |
| "learning_rate": 1.1463547720004546e-06, | |
| "loss": 0.3092, | |
| "step": 12975 | |
| }, | |
| { | |
| "epoch": 2.721558768070396, | |
| "grad_norm": 1.3295458555221558, | |
| "learning_rate": 1.1211012149080074e-06, | |
| "loss": 0.3237, | |
| "step": 12990 | |
| }, | |
| { | |
| "epoch": 2.7247014456316783, | |
| "grad_norm": 0.9988710284233093, | |
| "learning_rate": 1.0961225448055307e-06, | |
| "loss": 0.3216, | |
| "step": 13005 | |
| }, | |
| { | |
| "epoch": 2.7278441231929604, | |
| "grad_norm": 1.6158466339111328, | |
| "learning_rate": 1.0714190492402715e-06, | |
| "loss": 0.3017, | |
| "step": 13020 | |
| }, | |
| { | |
| "epoch": 2.7309868007542426, | |
| "grad_norm": 1.4756746292114258, | |
| "learning_rate": 1.0469910125917358e-06, | |
| "loss": 0.3169, | |
| "step": 13035 | |
| }, | |
| { | |
| "epoch": 2.7341294783155248, | |
| "grad_norm": 1.3889656066894531, | |
| "learning_rate": 1.0228387160684333e-06, | |
| "loss": 0.3754, | |
| "step": 13050 | |
| }, | |
| { | |
| "epoch": 2.737272155876807, | |
| "grad_norm": 1.2530293464660645, | |
| "learning_rate": 9.989624377046258e-07, | |
| "loss": 0.2958, | |
| "step": 13065 | |
| }, | |
| { | |
| "epoch": 2.740414833438089, | |
| "grad_norm": 1.8963161706924438, | |
| "learning_rate": 9.753624523571425e-07, | |
| "loss": 0.3641, | |
| "step": 13080 | |
| }, | |
| { | |
| "epoch": 2.7435575109993717, | |
| "grad_norm": 1.4623044729232788, | |
| "learning_rate": 9.520390317021955e-07, | |
| "loss": 0.3061, | |
| "step": 13095 | |
| }, | |
| { | |
| "epoch": 2.746700188560654, | |
| "grad_norm": 1.604202151298523, | |
| "learning_rate": 9.289924442322767e-07, | |
| "loss": 0.2785, | |
| "step": 13110 | |
| }, | |
| { | |
| "epoch": 2.749842866121936, | |
| "grad_norm": 1.8192863464355469, | |
| "learning_rate": 9.062229552530471e-07, | |
| "loss": 0.3169, | |
| "step": 13125 | |
| }, | |
| { | |
| "epoch": 2.7529855436832182, | |
| "grad_norm": 1.419291377067566, | |
| "learning_rate": 8.83730826880294e-07, | |
| "loss": 0.3015, | |
| "step": 13140 | |
| }, | |
| { | |
| "epoch": 2.7561282212445004, | |
| "grad_norm": 1.5753535032272339, | |
| "learning_rate": 8.615163180369035e-07, | |
| "loss": 0.284, | |
| "step": 13155 | |
| }, | |
| { | |
| "epoch": 2.7592708988057826, | |
| "grad_norm": 1.789189338684082, | |
| "learning_rate": 8.395796844498815e-07, | |
| "loss": 0.3423, | |
| "step": 13170 | |
| }, | |
| { | |
| "epoch": 2.7624135763670647, | |
| "grad_norm": 1.343781590461731, | |
| "learning_rate": 8.17921178647435e-07, | |
| "loss": 0.3119, | |
| "step": 13185 | |
| }, | |
| { | |
| "epoch": 2.765556253928347, | |
| "grad_norm": 1.652388572692871, | |
| "learning_rate": 7.96541049956026e-07, | |
| "loss": 0.3219, | |
| "step": 13200 | |
| }, | |
| { | |
| "epoch": 2.768698931489629, | |
| "grad_norm": 1.597399353981018, | |
| "learning_rate": 7.754395444975221e-07, | |
| "loss": 0.2873, | |
| "step": 13215 | |
| }, | |
| { | |
| "epoch": 2.7718416090509113, | |
| "grad_norm": 1.3452566862106323, | |
| "learning_rate": 7.546169051863672e-07, | |
| "loss": 0.3125, | |
| "step": 13230 | |
| }, | |
| { | |
| "epoch": 2.7749842866121934, | |
| "grad_norm": 1.605913758277893, | |
| "learning_rate": 7.340733717267678e-07, | |
| "loss": 0.278, | |
| "step": 13245 | |
| }, | |
| { | |
| "epoch": 2.7781269641734756, | |
| "grad_norm": 1.465397596359253, | |
| "learning_rate": 7.138091806099589e-07, | |
| "loss": 0.3208, | |
| "step": 13260 | |
| }, | |
| { | |
| "epoch": 2.7812696417347578, | |
| "grad_norm": 1.7374017238616943, | |
| "learning_rate": 6.938245651114506e-07, | |
| "loss": 0.2933, | |
| "step": 13275 | |
| }, | |
| { | |
| "epoch": 2.7844123192960404, | |
| "grad_norm": 1.9815653562545776, | |
| "learning_rate": 6.741197552883771e-07, | |
| "loss": 0.3335, | |
| "step": 13290 | |
| }, | |
| { | |
| "epoch": 2.7875549968573226, | |
| "grad_norm": 1.4085747003555298, | |
| "learning_rate": 6.546949779768136e-07, | |
| "loss": 0.2711, | |
| "step": 13305 | |
| }, | |
| { | |
| "epoch": 2.7906976744186047, | |
| "grad_norm": 1.6339495182037354, | |
| "learning_rate": 6.355504567891912e-07, | |
| "loss": 0.3331, | |
| "step": 13320 | |
| }, | |
| { | |
| "epoch": 2.793840351979887, | |
| "grad_norm": 1.441635251045227, | |
| "learning_rate": 6.166864121117167e-07, | |
| "loss": 0.3628, | |
| "step": 13335 | |
| }, | |
| { | |
| "epoch": 2.796983029541169, | |
| "grad_norm": 1.4819507598876953, | |
| "learning_rate": 5.981030611018234e-07, | |
| "loss": 0.2825, | |
| "step": 13350 | |
| }, | |
| { | |
| "epoch": 2.8001257071024512, | |
| "grad_norm": 1.5747650861740112, | |
| "learning_rate": 5.798006176856802e-07, | |
| "loss": 0.3144, | |
| "step": 13365 | |
| }, | |
| { | |
| "epoch": 2.8032683846637334, | |
| "grad_norm": 1.4870857000350952, | |
| "learning_rate": 5.617792925557363e-07, | |
| "loss": 0.3289, | |
| "step": 13380 | |
| }, | |
| { | |
| "epoch": 2.8064110622250156, | |
| "grad_norm": 1.7161614894866943, | |
| "learning_rate": 5.440392931682859e-07, | |
| "loss": 0.3379, | |
| "step": 13395 | |
| }, | |
| { | |
| "epoch": 2.809553739786298, | |
| "grad_norm": 0.8529698848724365, | |
| "learning_rate": 5.265808237410824e-07, | |
| "loss": 0.3143, | |
| "step": 13410 | |
| }, | |
| { | |
| "epoch": 2.8126964173475804, | |
| "grad_norm": 1.6342661380767822, | |
| "learning_rate": 5.094040852509779e-07, | |
| "loss": 0.3144, | |
| "step": 13425 | |
| }, | |
| { | |
| "epoch": 2.8158390949088625, | |
| "grad_norm": 1.4123117923736572, | |
| "learning_rate": 4.925092754316352e-07, | |
| "loss": 0.3407, | |
| "step": 13440 | |
| }, | |
| { | |
| "epoch": 2.8189817724701447, | |
| "grad_norm": 1.3898142576217651, | |
| "learning_rate": 4.7589658877122967e-07, | |
| "loss": 0.3385, | |
| "step": 13455 | |
| }, | |
| { | |
| "epoch": 2.822124450031427, | |
| "grad_norm": 1.6428829431533813, | |
| "learning_rate": 4.5956621651020994e-07, | |
| "loss": 0.2963, | |
| "step": 13470 | |
| }, | |
| { | |
| "epoch": 2.825267127592709, | |
| "grad_norm": 1.465915322303772, | |
| "learning_rate": 4.4351834663910465e-07, | |
| "loss": 0.3302, | |
| "step": 13485 | |
| }, | |
| { | |
| "epoch": 2.828409805153991, | |
| "grad_norm": 1.8282034397125244, | |
| "learning_rate": 4.277531638963689e-07, | |
| "loss": 0.3171, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 2.8315524827152734, | |
| "grad_norm": 2.015639305114746, | |
| "learning_rate": 4.122708497662275e-07, | |
| "loss": 0.3633, | |
| "step": 13515 | |
| }, | |
| { | |
| "epoch": 2.8346951602765555, | |
| "grad_norm": 1.0915390253067017, | |
| "learning_rate": 3.97071582476613e-07, | |
| "loss": 0.3, | |
| "step": 13530 | |
| }, | |
| { | |
| "epoch": 2.8378378378378377, | |
| "grad_norm": 0.9291322827339172, | |
| "learning_rate": 3.821555369971086e-07, | |
| "loss": 0.3471, | |
| "step": 13545 | |
| }, | |
| { | |
| "epoch": 2.84098051539912, | |
| "grad_norm": 1.6048222780227661, | |
| "learning_rate": 3.6752288503691945e-07, | |
| "loss": 0.3209, | |
| "step": 13560 | |
| }, | |
| { | |
| "epoch": 2.844123192960402, | |
| "grad_norm": 1.6999403238296509, | |
| "learning_rate": 3.5317379504291316e-07, | |
| "loss": 0.3446, | |
| "step": 13575 | |
| }, | |
| { | |
| "epoch": 2.8472658705216842, | |
| "grad_norm": 2.1094415187835693, | |
| "learning_rate": 3.391084321976656e-07, | |
| "loss": 0.3502, | |
| "step": 13590 | |
| }, | |
| { | |
| "epoch": 2.850408548082967, | |
| "grad_norm": 1.3436388969421387, | |
| "learning_rate": 3.2532695841758496e-07, | |
| "loss": 0.3167, | |
| "step": 13605 | |
| }, | |
| { | |
| "epoch": 2.853551225644249, | |
| "grad_norm": 1.470632553100586, | |
| "learning_rate": 3.118295323510101e-07, | |
| "loss": 0.3063, | |
| "step": 13620 | |
| }, | |
| { | |
| "epoch": 2.856693903205531, | |
| "grad_norm": 1.0371286869049072, | |
| "learning_rate": 2.9861630937641494e-07, | |
| "loss": 0.3034, | |
| "step": 13635 | |
| }, | |
| { | |
| "epoch": 2.8598365807668134, | |
| "grad_norm": 1.7494783401489258, | |
| "learning_rate": 2.8568744160061e-07, | |
| "loss": 0.2834, | |
| "step": 13650 | |
| }, | |
| { | |
| "epoch": 2.8629792583280955, | |
| "grad_norm": 1.5144836902618408, | |
| "learning_rate": 2.730430778569909e-07, | |
| "loss": 0.3142, | |
| "step": 13665 | |
| }, | |
| { | |
| "epoch": 2.8661219358893777, | |
| "grad_norm": 1.8125107288360596, | |
| "learning_rate": 2.606833637038231e-07, | |
| "loss": 0.3513, | |
| "step": 13680 | |
| }, | |
| { | |
| "epoch": 2.86926461345066, | |
| "grad_norm": 1.099411129951477, | |
| "learning_rate": 2.4860844142256257e-07, | |
| "loss": 0.3025, | |
| "step": 13695 | |
| }, | |
| { | |
| "epoch": 2.872407291011942, | |
| "grad_norm": 1.8955268859863281, | |
| "learning_rate": 2.3681845001623515e-07, | |
| "loss": 0.3418, | |
| "step": 13710 | |
| }, | |
| { | |
| "epoch": 2.8755499685732246, | |
| "grad_norm": 1.2657068967819214, | |
| "learning_rate": 2.2531352520781535e-07, | |
| "loss": 0.2709, | |
| "step": 13725 | |
| }, | |
| { | |
| "epoch": 2.878692646134507, | |
| "grad_norm": 1.8179534673690796, | |
| "learning_rate": 2.140937994386777e-07, | |
| "loss": 0.3291, | |
| "step": 13740 | |
| }, | |
| { | |
| "epoch": 2.881835323695789, | |
| "grad_norm": 1.7901382446289062, | |
| "learning_rate": 2.031594018670674e-07, | |
| "loss": 0.3132, | |
| "step": 13755 | |
| }, | |
| { | |
| "epoch": 2.884978001257071, | |
| "grad_norm": 1.1521648168563843, | |
| "learning_rate": 1.9251045836661263e-07, | |
| "loss": 0.2764, | |
| "step": 13770 | |
| }, | |
| { | |
| "epoch": 2.8881206788183533, | |
| "grad_norm": 1.2185838222503662, | |
| "learning_rate": 1.8214709152487575e-07, | |
| "loss": 0.3465, | |
| "step": 13785 | |
| }, | |
| { | |
| "epoch": 2.8912633563796355, | |
| "grad_norm": 1.640515685081482, | |
| "learning_rate": 1.720694206419432e-07, | |
| "loss": 0.315, | |
| "step": 13800 | |
| }, | |
| { | |
| "epoch": 2.8944060339409177, | |
| "grad_norm": 1.314355731010437, | |
| "learning_rate": 1.6227756172905729e-07, | |
| "loss": 0.2685, | |
| "step": 13815 | |
| }, | |
| { | |
| "epoch": 2.8975487115022, | |
| "grad_norm": 1.2538273334503174, | |
| "learning_rate": 1.527716275072699e-07, | |
| "loss": 0.3432, | |
| "step": 13830 | |
| }, | |
| { | |
| "epoch": 2.900691389063482, | |
| "grad_norm": 1.3175392150878906, | |
| "learning_rate": 1.435517274061493e-07, | |
| "loss": 0.2969, | |
| "step": 13845 | |
| }, | |
| { | |
| "epoch": 2.903834066624764, | |
| "grad_norm": 1.512818694114685, | |
| "learning_rate": 1.346179675625253e-07, | |
| "loss": 0.2804, | |
| "step": 13860 | |
| }, | |
| { | |
| "epoch": 2.9069767441860463, | |
| "grad_norm": 1.2288899421691895, | |
| "learning_rate": 1.2597045081926551e-07, | |
| "loss": 0.3092, | |
| "step": 13875 | |
| }, | |
| { | |
| "epoch": 2.9101194217473285, | |
| "grad_norm": 1.157689094543457, | |
| "learning_rate": 1.1760927672408161e-07, | |
| "loss": 0.3075, | |
| "step": 13890 | |
| }, | |
| { | |
| "epoch": 2.9132620993086107, | |
| "grad_norm": 1.6113057136535645, | |
| "learning_rate": 1.0953454152839993e-07, | |
| "loss": 0.3319, | |
| "step": 13905 | |
| }, | |
| { | |
| "epoch": 2.9164047768698933, | |
| "grad_norm": 1.4615386724472046, | |
| "learning_rate": 1.0174633818623991e-07, | |
| "loss": 0.306, | |
| "step": 13920 | |
| }, | |
| { | |
| "epoch": 2.9195474544311755, | |
| "grad_norm": 1.0442296266555786, | |
| "learning_rate": 9.424475635315122e-08, | |
| "loss": 0.3057, | |
| "step": 13935 | |
| }, | |
| { | |
| "epoch": 2.9226901319924576, | |
| "grad_norm": 1.2906923294067383, | |
| "learning_rate": 8.702988238517562e-08, | |
| "loss": 0.2989, | |
| "step": 13950 | |
| }, | |
| { | |
| "epoch": 2.92583280955374, | |
| "grad_norm": 1.6215356588363647, | |
| "learning_rate": 8.010179933786167e-08, | |
| "loss": 0.324, | |
| "step": 13965 | |
| }, | |
| { | |
| "epoch": 2.928975487115022, | |
| "grad_norm": 1.602383017539978, | |
| "learning_rate": 7.346058696530156e-08, | |
| "loss": 0.381, | |
| "step": 13980 | |
| }, | |
| { | |
| "epoch": 2.932118164676304, | |
| "grad_norm": 1.5103670358657837, | |
| "learning_rate": 6.710632171921527e-08, | |
| "loss": 0.3379, | |
| "step": 13995 | |
| }, | |
| { | |
| "epoch": 2.9352608422375863, | |
| "grad_norm": 1.6660419702529907, | |
| "learning_rate": 6.103907674807064e-08, | |
| "loss": 0.312, | |
| "step": 14010 | |
| }, | |
| { | |
| "epoch": 2.9384035197988685, | |
| "grad_norm": 1.0635946989059448, | |
| "learning_rate": 5.52589218962396e-08, | |
| "loss": 0.2964, | |
| "step": 14025 | |
| }, | |
| { | |
| "epoch": 2.941546197360151, | |
| "grad_norm": 1.247497797012329, | |
| "learning_rate": 4.976592370319611e-08, | |
| "loss": 0.2952, | |
| "step": 14040 | |
| }, | |
| { | |
| "epoch": 2.9446888749214333, | |
| "grad_norm": 1.4133594036102295, | |
| "learning_rate": 4.456014540275e-08, | |
| "loss": 0.2696, | |
| "step": 14055 | |
| }, | |
| { | |
| "epoch": 2.9478315524827154, | |
| "grad_norm": 1.5689040422439575, | |
| "learning_rate": 3.964164692231709e-08, | |
| "loss": 0.341, | |
| "step": 14070 | |
| }, | |
| { | |
| "epoch": 2.9509742300439976, | |
| "grad_norm": 1.2708498239517212, | |
| "learning_rate": 3.5010484882233574e-08, | |
| "loss": 0.3055, | |
| "step": 14085 | |
| }, | |
| { | |
| "epoch": 2.95411690760528, | |
| "grad_norm": 1.7094337940216064, | |
| "learning_rate": 3.066671259510101e-08, | |
| "loss": 0.3289, | |
| "step": 14100 | |
| }, | |
| { | |
| "epoch": 2.957259585166562, | |
| "grad_norm": 1.60092294216156, | |
| "learning_rate": 2.6610380065170136e-08, | |
| "loss": 0.2657, | |
| "step": 14115 | |
| }, | |
| { | |
| "epoch": 2.960402262727844, | |
| "grad_norm": 1.0856350660324097, | |
| "learning_rate": 2.284153398777189e-08, | |
| "loss": 0.3139, | |
| "step": 14130 | |
| }, | |
| { | |
| "epoch": 2.9635449402891263, | |
| "grad_norm": 1.8443694114685059, | |
| "learning_rate": 1.936021774877339e-08, | |
| "loss": 0.2993, | |
| "step": 14145 | |
| }, | |
| { | |
| "epoch": 2.9666876178504085, | |
| "grad_norm": 1.4500629901885986, | |
| "learning_rate": 1.616647142408112e-08, | |
| "loss": 0.2914, | |
| "step": 14160 | |
| }, | |
| { | |
| "epoch": 2.9698302954116906, | |
| "grad_norm": 1.634055256843567, | |
| "learning_rate": 1.3260331779182955e-08, | |
| "loss": 0.3251, | |
| "step": 14175 | |
| }, | |
| { | |
| "epoch": 2.972972972972973, | |
| "grad_norm": 1.6882349252700806, | |
| "learning_rate": 1.0641832268717955e-08, | |
| "loss": 0.2889, | |
| "step": 14190 | |
| }, | |
| { | |
| "epoch": 2.976115650534255, | |
| "grad_norm": 1.6775078773498535, | |
| "learning_rate": 8.311003036098885e-09, | |
| "loss": 0.2957, | |
| "step": 14205 | |
| }, | |
| { | |
| "epoch": 2.979258328095537, | |
| "grad_norm": 2.209030866622925, | |
| "learning_rate": 6.267870913156948e-09, | |
| "loss": 0.3114, | |
| "step": 14220 | |
| }, | |
| { | |
| "epoch": 2.9824010056568198, | |
| "grad_norm": 1.3158173561096191, | |
| "learning_rate": 4.512459419839243e-09, | |
| "loss": 0.293, | |
| "step": 14235 | |
| }, | |
| { | |
| "epoch": 2.985543683218102, | |
| "grad_norm": 1.2444883584976196, | |
| "learning_rate": 3.0447887639367676e-09, | |
| "loss": 0.2313, | |
| "step": 14250 | |
| }, | |
| { | |
| "epoch": 2.988686360779384, | |
| "grad_norm": 1.1739709377288818, | |
| "learning_rate": 1.8648758408512656e-09, | |
| "loss": 0.3228, | |
| "step": 14265 | |
| }, | |
| { | |
| "epoch": 2.9918290383406663, | |
| "grad_norm": 1.4359891414642334, | |
| "learning_rate": 9.72734233398165e-10, | |
| "loss": 0.2946, | |
| "step": 14280 | |
| }, | |
| { | |
| "epoch": 2.9949717159019484, | |
| "grad_norm": 1.3152233362197876, | |
| "learning_rate": 3.6837421165669685e-10, | |
| "loss": 0.2678, | |
| "step": 14295 | |
| }, | |
| { | |
| "epoch": 2.9981143934632306, | |
| "grad_norm": 1.9656248092651367, | |
| "learning_rate": 5.1802732842221036e-11, | |
| "loss": 0.2903, | |
| "step": 14310 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "step": 14319, | |
| "total_flos": 5.387086302585815e+18, | |
| "train_loss": 0.4039875044737109, | |
| "train_runtime": 21568.6928, | |
| "train_samples_per_second": 2.655, | |
| "train_steps_per_second": 0.664 | |
| } | |
| ], | |
| "logging_steps": 15, | |
| "max_steps": 14319, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 4296, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 5.387086302585815e+18, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |