| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 5.405405405405405, |
| "eval_steps": 500, |
| "global_step": 5000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.010810810810810811, |
| "grad_norm": 14.200489044189453, |
| "learning_rate": 3.6e-07, |
| "loss": 0.9925, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.021621621621621623, |
| "grad_norm": 12.673724174499512, |
| "learning_rate": 7.6e-07, |
| "loss": 1.0094, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.032432432432432434, |
| "grad_norm": 8.55466365814209, |
| "learning_rate": 1.1600000000000001e-06, |
| "loss": 0.9405, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.043243243243243246, |
| "grad_norm": 7.453739643096924, |
| "learning_rate": 1.56e-06, |
| "loss": 0.6896, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.05405405405405406, |
| "grad_norm": 2.2464568614959717, |
| "learning_rate": 1.9600000000000003e-06, |
| "loss": 0.4138, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.06486486486486487, |
| "grad_norm": 2.7017674446105957, |
| "learning_rate": 2.3600000000000003e-06, |
| "loss": 0.3357, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.07567567567567568, |
| "grad_norm": 2.866603374481201, |
| "learning_rate": 2.7600000000000003e-06, |
| "loss": 0.2873, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.08648648648648649, |
| "grad_norm": 2.4940736293792725, |
| "learning_rate": 3.1600000000000002e-06, |
| "loss": 0.2408, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.0972972972972973, |
| "grad_norm": 1.9782941341400146, |
| "learning_rate": 3.5600000000000002e-06, |
| "loss": 0.2165, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.10810810810810811, |
| "grad_norm": 1.1495152711868286, |
| "learning_rate": 3.96e-06, |
| "loss": 0.1948, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.11891891891891893, |
| "grad_norm": 1.5376442670822144, |
| "learning_rate": 4.360000000000001e-06, |
| "loss": 0.1975, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.12972972972972974, |
| "grad_norm": 1.1928683519363403, |
| "learning_rate": 4.76e-06, |
| "loss": 0.17, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.14054054054054055, |
| "grad_norm": 1.3887287378311157, |
| "learning_rate": 5.1600000000000006e-06, |
| "loss": 0.173, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.15135135135135136, |
| "grad_norm": 1.9856598377227783, |
| "learning_rate": 5.560000000000001e-06, |
| "loss": 0.1637, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.16216216216216217, |
| "grad_norm": 1.1034481525421143, |
| "learning_rate": 5.9600000000000005e-06, |
| "loss": 0.153, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.17297297297297298, |
| "grad_norm": 1.4809794425964355, |
| "learning_rate": 6.360000000000001e-06, |
| "loss": 0.1402, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.1837837837837838, |
| "grad_norm": 1.930794596672058, |
| "learning_rate": 6.760000000000001e-06, |
| "loss": 0.1449, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.1945945945945946, |
| "grad_norm": 0.9831984639167786, |
| "learning_rate": 7.16e-06, |
| "loss": 0.1351, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.20540540540540542, |
| "grad_norm": 1.3090108633041382, |
| "learning_rate": 7.5600000000000005e-06, |
| "loss": 0.1318, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.21621621621621623, |
| "grad_norm": 1.528368353843689, |
| "learning_rate": 7.960000000000002e-06, |
| "loss": 0.1316, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.22702702702702704, |
| "grad_norm": 1.0049641132354736, |
| "learning_rate": 8.36e-06, |
| "loss": 0.1266, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.23783783783783785, |
| "grad_norm": 1.335607647895813, |
| "learning_rate": 8.76e-06, |
| "loss": 0.1183, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.24864864864864866, |
| "grad_norm": 1.2054740190505981, |
| "learning_rate": 9.16e-06, |
| "loss": 0.1135, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.2594594594594595, |
| "grad_norm": 1.3126459121704102, |
| "learning_rate": 9.56e-06, |
| "loss": 0.1124, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.2702702702702703, |
| "grad_norm": 1.4163024425506592, |
| "learning_rate": 9.960000000000001e-06, |
| "loss": 0.1101, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.2810810810810811, |
| "grad_norm": 1.3630675077438354, |
| "learning_rate": 9.99991141987856e-06, |
| "loss": 0.0985, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.2918918918918919, |
| "grad_norm": 1.3381731510162354, |
| "learning_rate": 9.999605221019082e-06, |
| "loss": 0.1247, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.3027027027027027, |
| "grad_norm": 1.2101716995239258, |
| "learning_rate": 9.99908032323076e-06, |
| "loss": 0.1104, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.31351351351351353, |
| "grad_norm": 1.7090423107147217, |
| "learning_rate": 9.998336749474329e-06, |
| "loss": 0.1165, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.32432432432432434, |
| "grad_norm": 1.0286948680877686, |
| "learning_rate": 9.997374532276108e-06, |
| "loss": 0.1002, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.33513513513513515, |
| "grad_norm": 0.8209099173545837, |
| "learning_rate": 9.996193713726596e-06, |
| "loss": 0.1076, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.34594594594594597, |
| "grad_norm": 0.8612469434738159, |
| "learning_rate": 9.994794345478625e-06, |
| "loss": 0.1062, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.3567567567567568, |
| "grad_norm": 1.2024775743484497, |
| "learning_rate": 9.99317648874509e-06, |
| "loss": 0.0998, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.3675675675675676, |
| "grad_norm": 0.9072966575622559, |
| "learning_rate": 9.991340214296293e-06, |
| "loss": 0.0884, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.3783783783783784, |
| "grad_norm": 0.7544043660163879, |
| "learning_rate": 9.98928560245682e-06, |
| "loss": 0.0838, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.3891891891891892, |
| "grad_norm": 1.3930832147598267, |
| "learning_rate": 9.987012743102051e-06, |
| "loss": 0.0953, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 0.6797852516174316, |
| "learning_rate": 9.984521735654218e-06, |
| "loss": 0.0972, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.41081081081081083, |
| "grad_norm": 0.9434889554977417, |
| "learning_rate": 9.981812689078058e-06, |
| "loss": 0.0842, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.42162162162162165, |
| "grad_norm": 1.2180750370025635, |
| "learning_rate": 9.978885721876041e-06, |
| "loss": 0.096, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.43243243243243246, |
| "grad_norm": 1.0367884635925293, |
| "learning_rate": 9.975740962083197e-06, |
| "loss": 0.0925, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.44324324324324327, |
| "grad_norm": 1.0367573499679565, |
| "learning_rate": 9.972378547261505e-06, |
| "loss": 0.0877, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.4540540540540541, |
| "grad_norm": 1.2332873344421387, |
| "learning_rate": 9.968798624493885e-06, |
| "loss": 0.0847, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.4648648648648649, |
| "grad_norm": 1.3428494930267334, |
| "learning_rate": 9.965001350377755e-06, |
| "loss": 0.0893, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.4756756756756757, |
| "grad_norm": 0.8041930198669434, |
| "learning_rate": 9.960986891018182e-06, |
| "loss": 0.0791, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.4864864864864865, |
| "grad_norm": 0.8222742676734924, |
| "learning_rate": 9.95675542202063e-06, |
| "loss": 0.0843, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.4972972972972973, |
| "grad_norm": 0.9690313935279846, |
| "learning_rate": 9.952307128483257e-06, |
| "loss": 0.0821, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.5081081081081081, |
| "grad_norm": 0.9667937755584717, |
| "learning_rate": 9.947642204988835e-06, |
| "loss": 0.0833, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.518918918918919, |
| "grad_norm": 0.7906189560890198, |
| "learning_rate": 9.942760855596228e-06, |
| "loss": 0.0751, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.5297297297297298, |
| "grad_norm": 0.847240149974823, |
| "learning_rate": 9.93766329383147e-06, |
| "loss": 0.075, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.5405405405405406, |
| "grad_norm": 0.5874003767967224, |
| "learning_rate": 9.932349742678433e-06, |
| "loss": 0.0782, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.5513513513513514, |
| "grad_norm": 0.6482784748077393, |
| "learning_rate": 9.926820434569052e-06, |
| "loss": 0.0759, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.5621621621621622, |
| "grad_norm": 0.7045950293540955, |
| "learning_rate": 9.92107561137318e-06, |
| "loss": 0.0721, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.572972972972973, |
| "grad_norm": 0.6596692204475403, |
| "learning_rate": 9.915115524387988e-06, |
| "loss": 0.0746, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.5837837837837838, |
| "grad_norm": 0.7132977843284607, |
| "learning_rate": 9.908940434326996e-06, |
| "loss": 0.0753, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.5945945945945946, |
| "grad_norm": 0.742100179195404, |
| "learning_rate": 9.902550611308646e-06, |
| "loss": 0.0709, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.6054054054054054, |
| "grad_norm": 0.8214631080627441, |
| "learning_rate": 9.895946334844495e-06, |
| "loss": 0.0635, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.6162162162162163, |
| "grad_norm": 0.6747580766677856, |
| "learning_rate": 9.88912789382699e-06, |
| "loss": 0.0664, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.6270270270270271, |
| "grad_norm": 1.0222914218902588, |
| "learning_rate": 9.88209558651683e-06, |
| "loss": 0.0647, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.6378378378378379, |
| "grad_norm": 0.8500170707702637, |
| "learning_rate": 9.874849720529921e-06, |
| "loss": 0.078, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.6486486486486487, |
| "grad_norm": 0.9567283391952515, |
| "learning_rate": 9.867390612823915e-06, |
| "loss": 0.07, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.6594594594594595, |
| "grad_norm": 0.8195412158966064, |
| "learning_rate": 9.859718589684344e-06, |
| "loss": 0.0744, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.6702702702702703, |
| "grad_norm": 1.1124755144119263, |
| "learning_rate": 9.851833986710354e-06, |
| "loss": 0.068, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.6810810810810811, |
| "grad_norm": 1.030849575996399, |
| "learning_rate": 9.843737148800023e-06, |
| "loss": 0.068, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.6918918918918919, |
| "grad_norm": 1.150592565536499, |
| "learning_rate": 9.835428430135273e-06, |
| "loss": 0.0726, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.7027027027027027, |
| "grad_norm": 1.5807855129241943, |
| "learning_rate": 9.82690819416637e-06, |
| "loss": 0.0678, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.7135135135135136, |
| "grad_norm": 1.2938547134399414, |
| "learning_rate": 9.818176813596042e-06, |
| "loss": 0.0639, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.7243243243243244, |
| "grad_norm": 0.9378499388694763, |
| "learning_rate": 9.80923467036316e-06, |
| "loss": 0.0701, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.7351351351351352, |
| "grad_norm": 0.6736406683921814, |
| "learning_rate": 9.800082155626035e-06, |
| "loss": 0.0688, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.745945945945946, |
| "grad_norm": 0.8763739466667175, |
| "learning_rate": 9.790719669745312e-06, |
| "loss": 0.0588, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.7567567567567568, |
| "grad_norm": 0.7410522103309631, |
| "learning_rate": 9.781147622266457e-06, |
| "loss": 0.0712, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.7675675675675676, |
| "grad_norm": 0.6820585131645203, |
| "learning_rate": 9.771366431901832e-06, |
| "loss": 0.0678, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.7783783783783784, |
| "grad_norm": 0.5506922006607056, |
| "learning_rate": 9.761376526512394e-06, |
| "loss": 0.0607, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.7891891891891892, |
| "grad_norm": 0.8286749720573425, |
| "learning_rate": 9.751178343088963e-06, |
| "loss": 0.0623, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 0.6345208883285522, |
| "learning_rate": 9.740772327733124e-06, |
| "loss": 0.055, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.8108108108108109, |
| "grad_norm": 0.6936842203140259, |
| "learning_rate": 9.730158935637697e-06, |
| "loss": 0.06, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.8216216216216217, |
| "grad_norm": 0.898246169090271, |
| "learning_rate": 9.719338631066835e-06, |
| "loss": 0.0487, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.8324324324324325, |
| "grad_norm": 1.2851463556289673, |
| "learning_rate": 9.708311887335713e-06, |
| "loss": 0.0589, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.8432432432432433, |
| "grad_norm": 0.8996430039405823, |
| "learning_rate": 9.697079186789823e-06, |
| "loss": 0.0569, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.8540540540540541, |
| "grad_norm": 0.8897648453712463, |
| "learning_rate": 9.685641020783878e-06, |
| "loss": 0.059, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.8648648648648649, |
| "grad_norm": 0.6341675519943237, |
| "learning_rate": 9.67399788966031e-06, |
| "loss": 0.0575, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.8756756756756757, |
| "grad_norm": 0.6399983167648315, |
| "learning_rate": 9.662150302727395e-06, |
| "loss": 0.0601, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.8864864864864865, |
| "grad_norm": 0.6879423260688782, |
| "learning_rate": 9.650098778236967e-06, |
| "loss": 0.0598, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.8972972972972973, |
| "grad_norm": 0.44590917229652405, |
| "learning_rate": 9.63784384336175e-06, |
| "loss": 0.0547, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.9081081081081082, |
| "grad_norm": 0.8224856853485107, |
| "learning_rate": 9.62538603417229e-06, |
| "loss": 0.0562, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.918918918918919, |
| "grad_norm": 0.9134641289710999, |
| "learning_rate": 9.612725895613526e-06, |
| "loss": 0.0606, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.9297297297297298, |
| "grad_norm": 0.9926605820655823, |
| "learning_rate": 9.599863981480927e-06, |
| "loss": 0.0587, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.9405405405405406, |
| "grad_norm": 0.7019346356391907, |
| "learning_rate": 9.586800854396284e-06, |
| "loss": 0.0558, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.9513513513513514, |
| "grad_norm": 0.5580384731292725, |
| "learning_rate": 9.573537085783096e-06, |
| "loss": 0.052, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.9621621621621622, |
| "grad_norm": 0.7283103466033936, |
| "learning_rate": 9.560073255841572e-06, |
| "loss": 0.051, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.972972972972973, |
| "grad_norm": 1.0219670534133911, |
| "learning_rate": 9.546409953523248e-06, |
| "loss": 0.0606, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.9837837837837838, |
| "grad_norm": 0.5650193095207214, |
| "learning_rate": 9.53254777650523e-06, |
| "loss": 0.0472, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.9945945945945946, |
| "grad_norm": 0.9416384696960449, |
| "learning_rate": 9.518487331164048e-06, |
| "loss": 0.0592, |
| "step": 920 |
| }, |
| { |
| "epoch": 1.0054054054054054, |
| "grad_norm": 1.0085505247116089, |
| "learning_rate": 9.504229232549135e-06, |
| "loss": 0.0517, |
| "step": 930 |
| }, |
| { |
| "epoch": 1.0162162162162163, |
| "grad_norm": 0.7915866374969482, |
| "learning_rate": 9.489774104355908e-06, |
| "loss": 0.0502, |
| "step": 940 |
| }, |
| { |
| "epoch": 1.027027027027027, |
| "grad_norm": 0.7969734072685242, |
| "learning_rate": 9.475122578898508e-06, |
| "loss": 0.0546, |
| "step": 950 |
| }, |
| { |
| "epoch": 1.037837837837838, |
| "grad_norm": 0.7960900068283081, |
| "learning_rate": 9.46027529708212e-06, |
| "loss": 0.0468, |
| "step": 960 |
| }, |
| { |
| "epoch": 1.0486486486486486, |
| "grad_norm": 0.43782392144203186, |
| "learning_rate": 9.445232908374948e-06, |
| "loss": 0.0475, |
| "step": 970 |
| }, |
| { |
| "epoch": 1.0594594594594595, |
| "grad_norm": 0.7317707538604736, |
| "learning_rate": 9.429996070779808e-06, |
| "loss": 0.0468, |
| "step": 980 |
| }, |
| { |
| "epoch": 1.0702702702702702, |
| "grad_norm": 0.5017679929733276, |
| "learning_rate": 9.414565450805333e-06, |
| "loss": 0.052, |
| "step": 990 |
| }, |
| { |
| "epoch": 1.0810810810810811, |
| "grad_norm": 0.679476797580719, |
| "learning_rate": 9.398941723436832e-06, |
| "loss": 0.0501, |
| "step": 1000 |
| }, |
| { |
| "epoch": 1.0918918918918918, |
| "grad_norm": 0.7387028932571411, |
| "learning_rate": 9.383125572106752e-06, |
| "loss": 0.0479, |
| "step": 1010 |
| }, |
| { |
| "epoch": 1.1027027027027028, |
| "grad_norm": 0.691663920879364, |
| "learning_rate": 9.367117688664792e-06, |
| "loss": 0.0468, |
| "step": 1020 |
| }, |
| { |
| "epoch": 1.1135135135135135, |
| "grad_norm": 0.7472488880157471, |
| "learning_rate": 9.35091877334763e-06, |
| "loss": 0.0509, |
| "step": 1030 |
| }, |
| { |
| "epoch": 1.1243243243243244, |
| "grad_norm": 0.6413132548332214, |
| "learning_rate": 9.334529534748298e-06, |
| "loss": 0.0512, |
| "step": 1040 |
| }, |
| { |
| "epoch": 1.135135135135135, |
| "grad_norm": 0.808664858341217, |
| "learning_rate": 9.317950689785188e-06, |
| "loss": 0.0429, |
| "step": 1050 |
| }, |
| { |
| "epoch": 1.145945945945946, |
| "grad_norm": 0.9747439622879028, |
| "learning_rate": 9.301182963670688e-06, |
| "loss": 0.0568, |
| "step": 1060 |
| }, |
| { |
| "epoch": 1.1567567567567567, |
| "grad_norm": 0.66274094581604, |
| "learning_rate": 9.284227089879456e-06, |
| "loss": 0.0545, |
| "step": 1070 |
| }, |
| { |
| "epoch": 1.1675675675675676, |
| "grad_norm": 0.803703248500824, |
| "learning_rate": 9.267083810116341e-06, |
| "loss": 0.0489, |
| "step": 1080 |
| }, |
| { |
| "epoch": 1.1783783783783783, |
| "grad_norm": 0.9180741310119629, |
| "learning_rate": 9.249753874283937e-06, |
| "loss": 0.0394, |
| "step": 1090 |
| }, |
| { |
| "epoch": 1.1891891891891893, |
| "grad_norm": 0.6170795559883118, |
| "learning_rate": 9.232238040449779e-06, |
| "loss": 0.0499, |
| "step": 1100 |
| }, |
| { |
| "epoch": 1.2, |
| "grad_norm": 0.6339899301528931, |
| "learning_rate": 9.214537074813181e-06, |
| "loss": 0.0431, |
| "step": 1110 |
| }, |
| { |
| "epoch": 1.2108108108108109, |
| "grad_norm": 0.7984320521354675, |
| "learning_rate": 9.196651751671725e-06, |
| "loss": 0.0484, |
| "step": 1120 |
| }, |
| { |
| "epoch": 1.2216216216216216, |
| "grad_norm": 0.6766464710235596, |
| "learning_rate": 9.178582853387383e-06, |
| "loss": 0.0507, |
| "step": 1130 |
| }, |
| { |
| "epoch": 1.2324324324324325, |
| "grad_norm": 0.8145914077758789, |
| "learning_rate": 9.160331170352304e-06, |
| "loss": 0.0471, |
| "step": 1140 |
| }, |
| { |
| "epoch": 1.2432432432432432, |
| "grad_norm": 0.7077579498291016, |
| "learning_rate": 9.14189750095423e-06, |
| "loss": 0.0492, |
| "step": 1150 |
| }, |
| { |
| "epoch": 1.2540540540540541, |
| "grad_norm": 0.7460741400718689, |
| "learning_rate": 9.123282651541577e-06, |
| "loss": 0.0421, |
| "step": 1160 |
| }, |
| { |
| "epoch": 1.2648648648648648, |
| "grad_norm": 0.888500452041626, |
| "learning_rate": 9.104487436388161e-06, |
| "loss": 0.0458, |
| "step": 1170 |
| }, |
| { |
| "epoch": 1.2756756756756757, |
| "grad_norm": 0.6311141848564148, |
| "learning_rate": 9.085512677657582e-06, |
| "loss": 0.0455, |
| "step": 1180 |
| }, |
| { |
| "epoch": 1.2864864864864864, |
| "grad_norm": 0.7180992960929871, |
| "learning_rate": 9.066359205367258e-06, |
| "loss": 0.0463, |
| "step": 1190 |
| }, |
| { |
| "epoch": 1.2972972972972974, |
| "grad_norm": 0.860873281955719, |
| "learning_rate": 9.047027857352113e-06, |
| "loss": 0.046, |
| "step": 1200 |
| }, |
| { |
| "epoch": 1.308108108108108, |
| "grad_norm": 0.7998963594436646, |
| "learning_rate": 9.027519479227934e-06, |
| "loss": 0.0432, |
| "step": 1210 |
| }, |
| { |
| "epoch": 1.318918918918919, |
| "grad_norm": 0.6902336478233337, |
| "learning_rate": 9.007834924354384e-06, |
| "loss": 0.0454, |
| "step": 1220 |
| }, |
| { |
| "epoch": 1.3297297297297297, |
| "grad_norm": 0.6556903123855591, |
| "learning_rate": 8.987975053797655e-06, |
| "loss": 0.0483, |
| "step": 1230 |
| }, |
| { |
| "epoch": 1.3405405405405406, |
| "grad_norm": 0.722432017326355, |
| "learning_rate": 8.967940736292826e-06, |
| "loss": 0.0418, |
| "step": 1240 |
| }, |
| { |
| "epoch": 1.3513513513513513, |
| "grad_norm": 0.8098694086074829, |
| "learning_rate": 8.947732848205846e-06, |
| "loss": 0.0479, |
| "step": 1250 |
| }, |
| { |
| "epoch": 1.3621621621621622, |
| "grad_norm": 0.5581735968589783, |
| "learning_rate": 8.927352273495205e-06, |
| "loss": 0.0441, |
| "step": 1260 |
| }, |
| { |
| "epoch": 1.372972972972973, |
| "grad_norm": 0.6423065066337585, |
| "learning_rate": 8.906799903673264e-06, |
| "loss": 0.0466, |
| "step": 1270 |
| }, |
| { |
| "epoch": 1.3837837837837839, |
| "grad_norm": 0.6050016283988953, |
| "learning_rate": 8.88607663776726e-06, |
| "loss": 0.047, |
| "step": 1280 |
| }, |
| { |
| "epoch": 1.3945945945945946, |
| "grad_norm": 0.7292416095733643, |
| "learning_rate": 8.865183382279979e-06, |
| "loss": 0.0488, |
| "step": 1290 |
| }, |
| { |
| "epoch": 1.4054054054054055, |
| "grad_norm": 0.7246063947677612, |
| "learning_rate": 8.844121051150097e-06, |
| "loss": 0.0452, |
| "step": 1300 |
| }, |
| { |
| "epoch": 1.4162162162162162, |
| "grad_norm": 0.6597639918327332, |
| "learning_rate": 8.822890565712212e-06, |
| "loss": 0.0411, |
| "step": 1310 |
| }, |
| { |
| "epoch": 1.427027027027027, |
| "grad_norm": 0.780088484287262, |
| "learning_rate": 8.801492854656537e-06, |
| "loss": 0.0466, |
| "step": 1320 |
| }, |
| { |
| "epoch": 1.4378378378378378, |
| "grad_norm": 0.624079167842865, |
| "learning_rate": 8.779928853988269e-06, |
| "loss": 0.0407, |
| "step": 1330 |
| }, |
| { |
| "epoch": 1.4486486486486487, |
| "grad_norm": 0.4669192135334015, |
| "learning_rate": 8.758199506986655e-06, |
| "loss": 0.0419, |
| "step": 1340 |
| }, |
| { |
| "epoch": 1.4594594594594594, |
| "grad_norm": 0.43073180317878723, |
| "learning_rate": 8.73630576416373e-06, |
| "loss": 0.0419, |
| "step": 1350 |
| }, |
| { |
| "epoch": 1.4702702702702704, |
| "grad_norm": 0.6980085372924805, |
| "learning_rate": 8.714248583222727e-06, |
| "loss": 0.041, |
| "step": 1360 |
| }, |
| { |
| "epoch": 1.481081081081081, |
| "grad_norm": 0.7735833525657654, |
| "learning_rate": 8.692028929016196e-06, |
| "loss": 0.0357, |
| "step": 1370 |
| }, |
| { |
| "epoch": 1.491891891891892, |
| "grad_norm": 0.9316625595092773, |
| "learning_rate": 8.669647773503797e-06, |
| "loss": 0.0395, |
| "step": 1380 |
| }, |
| { |
| "epoch": 1.5027027027027027, |
| "grad_norm": 0.4638199210166931, |
| "learning_rate": 8.647106095709773e-06, |
| "loss": 0.0353, |
| "step": 1390 |
| }, |
| { |
| "epoch": 1.5135135135135136, |
| "grad_norm": 0.766643762588501, |
| "learning_rate": 8.624404881680138e-06, |
| "loss": 0.0445, |
| "step": 1400 |
| }, |
| { |
| "epoch": 1.5243243243243243, |
| "grad_norm": 0.5198076367378235, |
| "learning_rate": 8.601545124439535e-06, |
| "loss": 0.0391, |
| "step": 1410 |
| }, |
| { |
| "epoch": 1.535135135135135, |
| "grad_norm": 0.8132079243659973, |
| "learning_rate": 8.578527823947801e-06, |
| "loss": 0.0453, |
| "step": 1420 |
| }, |
| { |
| "epoch": 1.545945945945946, |
| "grad_norm": 0.8266350030899048, |
| "learning_rate": 8.555353987056224e-06, |
| "loss": 0.0458, |
| "step": 1430 |
| }, |
| { |
| "epoch": 1.5567567567567568, |
| "grad_norm": 0.6028828620910645, |
| "learning_rate": 8.532024627463504e-06, |
| "loss": 0.0409, |
| "step": 1440 |
| }, |
| { |
| "epoch": 1.5675675675675675, |
| "grad_norm": 0.555508017539978, |
| "learning_rate": 8.508540765671407e-06, |
| "loss": 0.0452, |
| "step": 1450 |
| }, |
| { |
| "epoch": 1.5783783783783782, |
| "grad_norm": 0.6225217580795288, |
| "learning_rate": 8.484903428940121e-06, |
| "loss": 0.0441, |
| "step": 1460 |
| }, |
| { |
| "epoch": 1.5891891891891892, |
| "grad_norm": 0.7838477492332458, |
| "learning_rate": 8.461113651243333e-06, |
| "loss": 0.0442, |
| "step": 1470 |
| }, |
| { |
| "epoch": 1.6, |
| "grad_norm": 0.8195562362670898, |
| "learning_rate": 8.437172473222987e-06, |
| "loss": 0.0465, |
| "step": 1480 |
| }, |
| { |
| "epoch": 1.6108108108108108, |
| "grad_norm": 0.6786017417907715, |
| "learning_rate": 8.413080942143767e-06, |
| "loss": 0.0385, |
| "step": 1490 |
| }, |
| { |
| "epoch": 1.6216216216216215, |
| "grad_norm": 0.6811516284942627, |
| "learning_rate": 8.388840111847288e-06, |
| "loss": 0.0467, |
| "step": 1500 |
| }, |
| { |
| "epoch": 1.6324324324324324, |
| "grad_norm": 0.7284950017929077, |
| "learning_rate": 8.364451042705999e-06, |
| "loss": 0.0427, |
| "step": 1510 |
| }, |
| { |
| "epoch": 1.6432432432432433, |
| "grad_norm": 0.8170714378356934, |
| "learning_rate": 8.33991480157679e-06, |
| "loss": 0.0428, |
| "step": 1520 |
| }, |
| { |
| "epoch": 1.654054054054054, |
| "grad_norm": 1.0552774667739868, |
| "learning_rate": 8.315232461754338e-06, |
| "loss": 0.0433, |
| "step": 1530 |
| }, |
| { |
| "epoch": 1.6648648648648647, |
| "grad_norm": 0.6368141174316406, |
| "learning_rate": 8.290405102924144e-06, |
| "loss": 0.0448, |
| "step": 1540 |
| }, |
| { |
| "epoch": 1.6756756756756757, |
| "grad_norm": 0.40666061639785767, |
| "learning_rate": 8.265433811115316e-06, |
| "loss": 0.0411, |
| "step": 1550 |
| }, |
| { |
| "epoch": 1.6864864864864866, |
| "grad_norm": 0.44923198223114014, |
| "learning_rate": 8.24031967865305e-06, |
| "loss": 0.0396, |
| "step": 1560 |
| }, |
| { |
| "epoch": 1.6972972972972973, |
| "grad_norm": 0.7264629602432251, |
| "learning_rate": 8.215063804110858e-06, |
| "loss": 0.041, |
| "step": 1570 |
| }, |
| { |
| "epoch": 1.708108108108108, |
| "grad_norm": 0.46326103806495667, |
| "learning_rate": 8.189667292262513e-06, |
| "loss": 0.0382, |
| "step": 1580 |
| }, |
| { |
| "epoch": 1.718918918918919, |
| "grad_norm": 0.7099355459213257, |
| "learning_rate": 8.164131254033716e-06, |
| "loss": 0.044, |
| "step": 1590 |
| }, |
| { |
| "epoch": 1.7297297297297298, |
| "grad_norm": 0.827971339225769, |
| "learning_rate": 8.138456806453503e-06, |
| "loss": 0.0401, |
| "step": 1600 |
| }, |
| { |
| "epoch": 1.7405405405405405, |
| "grad_norm": 0.5584285855293274, |
| "learning_rate": 8.112645072605386e-06, |
| "loss": 0.0374, |
| "step": 1610 |
| }, |
| { |
| "epoch": 1.7513513513513512, |
| "grad_norm": 0.6379075646400452, |
| "learning_rate": 8.086697181578223e-06, |
| "loss": 0.0357, |
| "step": 1620 |
| }, |
| { |
| "epoch": 1.7621621621621621, |
| "grad_norm": 0.6546282768249512, |
| "learning_rate": 8.060614268416823e-06, |
| "loss": 0.0387, |
| "step": 1630 |
| }, |
| { |
| "epoch": 1.772972972972973, |
| "grad_norm": 0.665284276008606, |
| "learning_rate": 8.034397474072308e-06, |
| "loss": 0.0394, |
| "step": 1640 |
| }, |
| { |
| "epoch": 1.7837837837837838, |
| "grad_norm": 0.6494997143745422, |
| "learning_rate": 8.008047945352194e-06, |
| "loss": 0.0451, |
| "step": 1650 |
| }, |
| { |
| "epoch": 1.7945945945945945, |
| "grad_norm": 0.6128862500190735, |
| "learning_rate": 7.981566834870225e-06, |
| "loss": 0.0357, |
| "step": 1660 |
| }, |
| { |
| "epoch": 1.8054054054054054, |
| "grad_norm": 0.6705856919288635, |
| "learning_rate": 7.954955300995961e-06, |
| "loss": 0.0362, |
| "step": 1670 |
| }, |
| { |
| "epoch": 1.8162162162162163, |
| "grad_norm": 0.6312499642372131, |
| "learning_rate": 7.928214507804104e-06, |
| "loss": 0.0464, |
| "step": 1680 |
| }, |
| { |
| "epoch": 1.827027027027027, |
| "grad_norm": 0.5211047530174255, |
| "learning_rate": 7.901345625023577e-06, |
| "loss": 0.0421, |
| "step": 1690 |
| }, |
| { |
| "epoch": 1.8378378378378377, |
| "grad_norm": 0.5969777703285217, |
| "learning_rate": 7.874349827986354e-06, |
| "loss": 0.039, |
| "step": 1700 |
| }, |
| { |
| "epoch": 1.8486486486486486, |
| "grad_norm": 0.6680732369422913, |
| "learning_rate": 7.847228297576052e-06, |
| "loss": 0.0379, |
| "step": 1710 |
| }, |
| { |
| "epoch": 1.8594594594594596, |
| "grad_norm": 0.7429904937744141, |
| "learning_rate": 7.819982220176276e-06, |
| "loss": 0.0366, |
| "step": 1720 |
| }, |
| { |
| "epoch": 1.8702702702702703, |
| "grad_norm": 0.6381489634513855, |
| "learning_rate": 7.792612787618714e-06, |
| "loss": 0.0464, |
| "step": 1730 |
| }, |
| { |
| "epoch": 1.881081081081081, |
| "grad_norm": 0.5638653635978699, |
| "learning_rate": 7.76512119713101e-06, |
| "loss": 0.037, |
| "step": 1740 |
| }, |
| { |
| "epoch": 1.8918918918918919, |
| "grad_norm": 0.44251424074172974, |
| "learning_rate": 7.73750865128439e-06, |
| "loss": 0.0449, |
| "step": 1750 |
| }, |
| { |
| "epoch": 1.9027027027027028, |
| "grad_norm": 0.6562517881393433, |
| "learning_rate": 7.70977635794107e-06, |
| "loss": 0.0436, |
| "step": 1760 |
| }, |
| { |
| "epoch": 1.9135135135135135, |
| "grad_norm": 0.656298041343689, |
| "learning_rate": 7.681925530201392e-06, |
| "loss": 0.0396, |
| "step": 1770 |
| }, |
| { |
| "epoch": 1.9243243243243242, |
| "grad_norm": 0.650246798992157, |
| "learning_rate": 7.65395738635079e-06, |
| "loss": 0.0364, |
| "step": 1780 |
| }, |
| { |
| "epoch": 1.9351351351351351, |
| "grad_norm": 0.7400831580162048, |
| "learning_rate": 7.6258731498064796e-06, |
| "loss": 0.0417, |
| "step": 1790 |
| }, |
| { |
| "epoch": 1.945945945945946, |
| "grad_norm": 0.6492830514907837, |
| "learning_rate": 7.597674049063948e-06, |
| "loss": 0.0401, |
| "step": 1800 |
| }, |
| { |
| "epoch": 1.9567567567567568, |
| "grad_norm": 0.47661155462265015, |
| "learning_rate": 7.5693613176432115e-06, |
| "loss": 0.0368, |
| "step": 1810 |
| }, |
| { |
| "epoch": 1.9675675675675675, |
| "grad_norm": 1.1524066925048828, |
| "learning_rate": 7.540936194034865e-06, |
| "loss": 0.037, |
| "step": 1820 |
| }, |
| { |
| "epoch": 1.9783783783783784, |
| "grad_norm": 0.88988196849823, |
| "learning_rate": 7.512399921645901e-06, |
| "loss": 0.0397, |
| "step": 1830 |
| }, |
| { |
| "epoch": 1.9891891891891893, |
| "grad_norm": 0.48117414116859436, |
| "learning_rate": 7.483753748745317e-06, |
| "loss": 0.0357, |
| "step": 1840 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.4710519015789032, |
| "learning_rate": 7.454998928409516e-06, |
| "loss": 0.0348, |
| "step": 1850 |
| }, |
| { |
| "epoch": 2.0108108108108107, |
| "grad_norm": 0.8176958560943604, |
| "learning_rate": 7.426136718467494e-06, |
| "loss": 0.0433, |
| "step": 1860 |
| }, |
| { |
| "epoch": 2.0216216216216214, |
| "grad_norm": 0.6511132717132568, |
| "learning_rate": 7.397168381445812e-06, |
| "loss": 0.0357, |
| "step": 1870 |
| }, |
| { |
| "epoch": 2.0324324324324325, |
| "grad_norm": 0.638883650302887, |
| "learning_rate": 7.3680951845133775e-06, |
| "loss": 0.0354, |
| "step": 1880 |
| }, |
| { |
| "epoch": 2.0432432432432432, |
| "grad_norm": 0.5587888956069946, |
| "learning_rate": 7.338918399426006e-06, |
| "loss": 0.0312, |
| "step": 1890 |
| }, |
| { |
| "epoch": 2.054054054054054, |
| "grad_norm": 0.658237636089325, |
| "learning_rate": 7.309639302470801e-06, |
| "loss": 0.0411, |
| "step": 1900 |
| }, |
| { |
| "epoch": 2.064864864864865, |
| "grad_norm": 0.7402287721633911, |
| "learning_rate": 7.280259174410312e-06, |
| "loss": 0.0368, |
| "step": 1910 |
| }, |
| { |
| "epoch": 2.075675675675676, |
| "grad_norm": 0.787990391254425, |
| "learning_rate": 7.250779300426518e-06, |
| "loss": 0.0405, |
| "step": 1920 |
| }, |
| { |
| "epoch": 2.0864864864864865, |
| "grad_norm": 0.6542930006980896, |
| "learning_rate": 7.22120097006461e-06, |
| "loss": 0.0382, |
| "step": 1930 |
| }, |
| { |
| "epoch": 2.097297297297297, |
| "grad_norm": 0.9772655367851257, |
| "learning_rate": 7.191525477176577e-06, |
| "loss": 0.0381, |
| "step": 1940 |
| }, |
| { |
| "epoch": 2.108108108108108, |
| "grad_norm": 0.4485587775707245, |
| "learning_rate": 7.161754119864616e-06, |
| "loss": 0.0352, |
| "step": 1950 |
| }, |
| { |
| "epoch": 2.118918918918919, |
| "grad_norm": 0.37433692812919617, |
| "learning_rate": 7.13188820042434e-06, |
| "loss": 0.0293, |
| "step": 1960 |
| }, |
| { |
| "epoch": 2.1297297297297297, |
| "grad_norm": 0.5695798397064209, |
| "learning_rate": 7.101929025287817e-06, |
| "loss": 0.0391, |
| "step": 1970 |
| }, |
| { |
| "epoch": 2.1405405405405404, |
| "grad_norm": 0.6900025010108948, |
| "learning_rate": 7.071877904966422e-06, |
| "loss": 0.041, |
| "step": 1980 |
| }, |
| { |
| "epoch": 2.1513513513513516, |
| "grad_norm": 0.6525390148162842, |
| "learning_rate": 7.04173615399351e-06, |
| "loss": 0.036, |
| "step": 1990 |
| }, |
| { |
| "epoch": 2.1621621621621623, |
| "grad_norm": 0.5852620601654053, |
| "learning_rate": 7.011505090866914e-06, |
| "loss": 0.0323, |
| "step": 2000 |
| }, |
| { |
| "epoch": 2.172972972972973, |
| "grad_norm": 0.5119943618774414, |
| "learning_rate": 6.981186037991271e-06, |
| "loss": 0.0322, |
| "step": 2010 |
| }, |
| { |
| "epoch": 2.1837837837837837, |
| "grad_norm": 0.5682677626609802, |
| "learning_rate": 6.950780321620174e-06, |
| "loss": 0.0382, |
| "step": 2020 |
| }, |
| { |
| "epoch": 2.1945945945945944, |
| "grad_norm": 0.5038251280784607, |
| "learning_rate": 6.920289271798158e-06, |
| "loss": 0.0344, |
| "step": 2030 |
| }, |
| { |
| "epoch": 2.2054054054054055, |
| "grad_norm": 0.6174978017807007, |
| "learning_rate": 6.889714222302517e-06, |
| "loss": 0.0396, |
| "step": 2040 |
| }, |
| { |
| "epoch": 2.2162162162162162, |
| "grad_norm": 0.5457316637039185, |
| "learning_rate": 6.8590565105849695e-06, |
| "loss": 0.035, |
| "step": 2050 |
| }, |
| { |
| "epoch": 2.227027027027027, |
| "grad_norm": 0.654593288898468, |
| "learning_rate": 6.82831747771314e-06, |
| "loss": 0.0329, |
| "step": 2060 |
| }, |
| { |
| "epoch": 2.237837837837838, |
| "grad_norm": 0.8348110318183899, |
| "learning_rate": 6.797498468311907e-06, |
| "loss": 0.0311, |
| "step": 2070 |
| }, |
| { |
| "epoch": 2.2486486486486488, |
| "grad_norm": 0.5480743050575256, |
| "learning_rate": 6.766600830504585e-06, |
| "loss": 0.0325, |
| "step": 2080 |
| }, |
| { |
| "epoch": 2.2594594594594595, |
| "grad_norm": 0.5144963264465332, |
| "learning_rate": 6.735625915853943e-06, |
| "loss": 0.0278, |
| "step": 2090 |
| }, |
| { |
| "epoch": 2.27027027027027, |
| "grad_norm": 0.5144929885864258, |
| "learning_rate": 6.7045750793030905e-06, |
| "loss": 0.0323, |
| "step": 2100 |
| }, |
| { |
| "epoch": 2.281081081081081, |
| "grad_norm": 0.6607171893119812, |
| "learning_rate": 6.673449679116215e-06, |
| "loss": 0.0303, |
| "step": 2110 |
| }, |
| { |
| "epoch": 2.291891891891892, |
| "grad_norm": 0.48313409090042114, |
| "learning_rate": 6.6422510768191485e-06, |
| "loss": 0.0293, |
| "step": 2120 |
| }, |
| { |
| "epoch": 2.3027027027027027, |
| "grad_norm": 0.5028293132781982, |
| "learning_rate": 6.610980637139826e-06, |
| "loss": 0.0359, |
| "step": 2130 |
| }, |
| { |
| "epoch": 2.3135135135135134, |
| "grad_norm": 0.584972620010376, |
| "learning_rate": 6.579639727948583e-06, |
| "loss": 0.0345, |
| "step": 2140 |
| }, |
| { |
| "epoch": 2.3243243243243246, |
| "grad_norm": 0.41404393315315247, |
| "learning_rate": 6.5482297201983155e-06, |
| "loss": 0.0367, |
| "step": 2150 |
| }, |
| { |
| "epoch": 2.3351351351351353, |
| "grad_norm": 0.6505828499794006, |
| "learning_rate": 6.516751987864518e-06, |
| "loss": 0.0327, |
| "step": 2160 |
| }, |
| { |
| "epoch": 2.345945945945946, |
| "grad_norm": 0.6071712374687195, |
| "learning_rate": 6.485207907885175e-06, |
| "loss": 0.0321, |
| "step": 2170 |
| }, |
| { |
| "epoch": 2.3567567567567567, |
| "grad_norm": 0.7137896418571472, |
| "learning_rate": 6.453598860100536e-06, |
| "loss": 0.0358, |
| "step": 2180 |
| }, |
| { |
| "epoch": 2.3675675675675674, |
| "grad_norm": 0.44680288434028625, |
| "learning_rate": 6.421926227192748e-06, |
| "loss": 0.0339, |
| "step": 2190 |
| }, |
| { |
| "epoch": 2.3783783783783785, |
| "grad_norm": 0.44672706723213196, |
| "learning_rate": 6.3901913946253815e-06, |
| "loss": 0.031, |
| "step": 2200 |
| }, |
| { |
| "epoch": 2.389189189189189, |
| "grad_norm": 0.4818141460418701, |
| "learning_rate": 6.358395750582817e-06, |
| "loss": 0.0293, |
| "step": 2210 |
| }, |
| { |
| "epoch": 2.4, |
| "grad_norm": 0.4907307028770447, |
| "learning_rate": 6.3265406859095325e-06, |
| "loss": 0.0321, |
| "step": 2220 |
| }, |
| { |
| "epoch": 2.410810810810811, |
| "grad_norm": 0.5125067234039307, |
| "learning_rate": 6.29462759404925e-06, |
| "loss": 0.0273, |
| "step": 2230 |
| }, |
| { |
| "epoch": 2.4216216216216218, |
| "grad_norm": 0.5038555860519409, |
| "learning_rate": 6.262657870983989e-06, |
| "loss": 0.0343, |
| "step": 2240 |
| }, |
| { |
| "epoch": 2.4324324324324325, |
| "grad_norm": 0.4431181848049164, |
| "learning_rate": 6.230632915173009e-06, |
| "loss": 0.0364, |
| "step": 2250 |
| }, |
| { |
| "epoch": 2.443243243243243, |
| "grad_norm": 0.8261221051216125, |
| "learning_rate": 6.198554127491622e-06, |
| "loss": 0.0335, |
| "step": 2260 |
| }, |
| { |
| "epoch": 2.454054054054054, |
| "grad_norm": 0.7294406294822693, |
| "learning_rate": 6.166422911169922e-06, |
| "loss": 0.0311, |
| "step": 2270 |
| }, |
| { |
| "epoch": 2.464864864864865, |
| "grad_norm": 0.5856850147247314, |
| "learning_rate": 6.1342406717314e-06, |
| "loss": 0.0382, |
| "step": 2280 |
| }, |
| { |
| "epoch": 2.4756756756756757, |
| "grad_norm": 0.5774531364440918, |
| "learning_rate": 6.102008816931466e-06, |
| "loss": 0.031, |
| "step": 2290 |
| }, |
| { |
| "epoch": 2.4864864864864864, |
| "grad_norm": 0.5902078747749329, |
| "learning_rate": 6.069728756695867e-06, |
| "loss": 0.0261, |
| "step": 2300 |
| }, |
| { |
| "epoch": 2.4972972972972975, |
| "grad_norm": 0.6393314599990845, |
| "learning_rate": 6.037401903059008e-06, |
| "loss": 0.0365, |
| "step": 2310 |
| }, |
| { |
| "epoch": 2.5081081081081082, |
| "grad_norm": 0.5464274883270264, |
| "learning_rate": 6.005029670102195e-06, |
| "loss": 0.0262, |
| "step": 2320 |
| }, |
| { |
| "epoch": 2.518918918918919, |
| "grad_norm": 0.43032026290893555, |
| "learning_rate": 5.972613473891766e-06, |
| "loss": 0.0308, |
| "step": 2330 |
| }, |
| { |
| "epoch": 2.5297297297297296, |
| "grad_norm": 0.522640585899353, |
| "learning_rate": 5.940154732417159e-06, |
| "loss": 0.0335, |
| "step": 2340 |
| }, |
| { |
| "epoch": 2.5405405405405403, |
| "grad_norm": 0.5012731552124023, |
| "learning_rate": 5.907654865528876e-06, |
| "loss": 0.0341, |
| "step": 2350 |
| }, |
| { |
| "epoch": 2.5513513513513515, |
| "grad_norm": 0.7608346939086914, |
| "learning_rate": 5.8751152948763815e-06, |
| "loss": 0.0303, |
| "step": 2360 |
| }, |
| { |
| "epoch": 2.562162162162162, |
| "grad_norm": 0.6261675953865051, |
| "learning_rate": 5.842537443845908e-06, |
| "loss": 0.0295, |
| "step": 2370 |
| }, |
| { |
| "epoch": 2.572972972972973, |
| "grad_norm": 0.5141987800598145, |
| "learning_rate": 5.809922737498198e-06, |
| "loss": 0.0275, |
| "step": 2380 |
| }, |
| { |
| "epoch": 2.583783783783784, |
| "grad_norm": 0.9153976440429688, |
| "learning_rate": 5.777272602506166e-06, |
| "loss": 0.037, |
| "step": 2390 |
| }, |
| { |
| "epoch": 2.5945945945945947, |
| "grad_norm": 0.4885156452655792, |
| "learning_rate": 5.744588467092483e-06, |
| "loss": 0.0362, |
| "step": 2400 |
| }, |
| { |
| "epoch": 2.6054054054054054, |
| "grad_norm": 0.6028148531913757, |
| "learning_rate": 5.711871760967119e-06, |
| "loss": 0.0304, |
| "step": 2410 |
| }, |
| { |
| "epoch": 2.616216216216216, |
| "grad_norm": 0.7090111374855042, |
| "learning_rate": 5.679123915264786e-06, |
| "loss": 0.0315, |
| "step": 2420 |
| }, |
| { |
| "epoch": 2.627027027027027, |
| "grad_norm": 0.4406765103340149, |
| "learning_rate": 5.646346362482342e-06, |
| "loss": 0.0293, |
| "step": 2430 |
| }, |
| { |
| "epoch": 2.637837837837838, |
| "grad_norm": 0.5371266603469849, |
| "learning_rate": 5.613540536416132e-06, |
| "loss": 0.0369, |
| "step": 2440 |
| }, |
| { |
| "epoch": 2.6486486486486487, |
| "grad_norm": 0.39742565155029297, |
| "learning_rate": 5.580707872099265e-06, |
| "loss": 0.0284, |
| "step": 2450 |
| }, |
| { |
| "epoch": 2.6594594594594594, |
| "grad_norm": 0.6796385049819946, |
| "learning_rate": 5.547849805738836e-06, |
| "loss": 0.0322, |
| "step": 2460 |
| }, |
| { |
| "epoch": 2.6702702702702705, |
| "grad_norm": 0.4403522312641144, |
| "learning_rate": 5.514967774653118e-06, |
| "loss": 0.0287, |
| "step": 2470 |
| }, |
| { |
| "epoch": 2.6810810810810812, |
| "grad_norm": 0.7354927659034729, |
| "learning_rate": 5.4820632172086745e-06, |
| "loss": 0.0335, |
| "step": 2480 |
| }, |
| { |
| "epoch": 2.691891891891892, |
| "grad_norm": 0.3618038296699524, |
| "learning_rate": 5.449137572757439e-06, |
| "loss": 0.027, |
| "step": 2490 |
| }, |
| { |
| "epoch": 2.7027027027027026, |
| "grad_norm": 0.5390015244483948, |
| "learning_rate": 5.41619228157377e-06, |
| "loss": 0.0313, |
| "step": 2500 |
| }, |
| { |
| "epoch": 2.7135135135135133, |
| "grad_norm": 0.636913537979126, |
| "learning_rate": 5.3832287847914276e-06, |
| "loss": 0.0324, |
| "step": 2510 |
| }, |
| { |
| "epoch": 2.7243243243243245, |
| "grad_norm": 0.6561273336410522, |
| "learning_rate": 5.35024852434055e-06, |
| "loss": 0.0275, |
| "step": 2520 |
| }, |
| { |
| "epoch": 2.735135135135135, |
| "grad_norm": 0.6739108562469482, |
| "learning_rate": 5.317252942884568e-06, |
| "loss": 0.0251, |
| "step": 2530 |
| }, |
| { |
| "epoch": 2.745945945945946, |
| "grad_norm": 0.35930028557777405, |
| "learning_rate": 5.284243483757109e-06, |
| "loss": 0.03, |
| "step": 2540 |
| }, |
| { |
| "epoch": 2.756756756756757, |
| "grad_norm": 0.43455588817596436, |
| "learning_rate": 5.251221590898848e-06, |
| "loss": 0.035, |
| "step": 2550 |
| }, |
| { |
| "epoch": 2.7675675675675677, |
| "grad_norm": 0.5473480224609375, |
| "learning_rate": 5.218188708794357e-06, |
| "loss": 0.0312, |
| "step": 2560 |
| }, |
| { |
| "epoch": 2.7783783783783784, |
| "grad_norm": 0.7597518563270569, |
| "learning_rate": 5.185146282408911e-06, |
| "loss": 0.0285, |
| "step": 2570 |
| }, |
| { |
| "epoch": 2.789189189189189, |
| "grad_norm": 0.7374501824378967, |
| "learning_rate": 5.1520957571252795e-06, |
| "loss": 0.0307, |
| "step": 2580 |
| }, |
| { |
| "epoch": 2.8, |
| "grad_norm": 0.7217923998832703, |
| "learning_rate": 5.119038578680511e-06, |
| "loss": 0.0367, |
| "step": 2590 |
| }, |
| { |
| "epoch": 2.810810810810811, |
| "grad_norm": 0.7415491938591003, |
| "learning_rate": 5.085976193102678e-06, |
| "loss": 0.0303, |
| "step": 2600 |
| }, |
| { |
| "epoch": 2.8216216216216217, |
| "grad_norm": 0.5876473784446716, |
| "learning_rate": 5.052910046647634e-06, |
| "loss": 0.0257, |
| "step": 2610 |
| }, |
| { |
| "epoch": 2.8324324324324324, |
| "grad_norm": 0.5944861769676208, |
| "learning_rate": 5.0198415857357465e-06, |
| "loss": 0.0311, |
| "step": 2620 |
| }, |
| { |
| "epoch": 2.8432432432432435, |
| "grad_norm": 0.34813442826271057, |
| "learning_rate": 4.986772256888623e-06, |
| "loss": 0.0257, |
| "step": 2630 |
| }, |
| { |
| "epoch": 2.854054054054054, |
| "grad_norm": 0.5532535910606384, |
| "learning_rate": 4.953703506665832e-06, |
| "loss": 0.0249, |
| "step": 2640 |
| }, |
| { |
| "epoch": 2.864864864864865, |
| "grad_norm": 0.5942780375480652, |
| "learning_rate": 4.9206367816016385e-06, |
| "loss": 0.0314, |
| "step": 2650 |
| }, |
| { |
| "epoch": 2.8756756756756756, |
| "grad_norm": 0.5041717886924744, |
| "learning_rate": 4.887573528141721e-06, |
| "loss": 0.0324, |
| "step": 2660 |
| }, |
| { |
| "epoch": 2.8864864864864863, |
| "grad_norm": 0.437028706073761, |
| "learning_rate": 4.854515192579892e-06, |
| "loss": 0.0298, |
| "step": 2670 |
| }, |
| { |
| "epoch": 2.8972972972972975, |
| "grad_norm": 0.5581991076469421, |
| "learning_rate": 4.821463220994848e-06, |
| "loss": 0.0323, |
| "step": 2680 |
| }, |
| { |
| "epoch": 2.908108108108108, |
| "grad_norm": 0.4572019577026367, |
| "learning_rate": 4.788419059186895e-06, |
| "loss": 0.0286, |
| "step": 2690 |
| }, |
| { |
| "epoch": 2.918918918918919, |
| "grad_norm": 0.37265264987945557, |
| "learning_rate": 4.75538415261472e-06, |
| "loss": 0.0297, |
| "step": 2700 |
| }, |
| { |
| "epoch": 2.92972972972973, |
| "grad_norm": 0.45635294914245605, |
| "learning_rate": 4.722359946332156e-06, |
| "loss": 0.0306, |
| "step": 2710 |
| }, |
| { |
| "epoch": 2.9405405405405407, |
| "grad_norm": 0.39567267894744873, |
| "learning_rate": 4.689347884924966e-06, |
| "loss": 0.0282, |
| "step": 2720 |
| }, |
| { |
| "epoch": 2.9513513513513514, |
| "grad_norm": 0.8368029594421387, |
| "learning_rate": 4.656349412447664e-06, |
| "loss": 0.033, |
| "step": 2730 |
| }, |
| { |
| "epoch": 2.962162162162162, |
| "grad_norm": 0.5868388414382935, |
| "learning_rate": 4.6233659723603374e-06, |
| "loss": 0.0299, |
| "step": 2740 |
| }, |
| { |
| "epoch": 2.972972972972973, |
| "grad_norm": 0.4864098131656647, |
| "learning_rate": 4.590399007465503e-06, |
| "loss": 0.0232, |
| "step": 2750 |
| }, |
| { |
| "epoch": 2.983783783783784, |
| "grad_norm": 0.6210123896598816, |
| "learning_rate": 4.557449959845005e-06, |
| "loss": 0.0287, |
| "step": 2760 |
| }, |
| { |
| "epoch": 2.9945945945945946, |
| "grad_norm": 0.7582560777664185, |
| "learning_rate": 4.524520270796927e-06, |
| "loss": 0.0291, |
| "step": 2770 |
| }, |
| { |
| "epoch": 3.0054054054054054, |
| "grad_norm": 0.5422881245613098, |
| "learning_rate": 4.491611380772545e-06, |
| "loss": 0.0265, |
| "step": 2780 |
| }, |
| { |
| "epoch": 3.016216216216216, |
| "grad_norm": 0.36191341280937195, |
| "learning_rate": 4.458724729313319e-06, |
| "loss": 0.0254, |
| "step": 2790 |
| }, |
| { |
| "epoch": 3.027027027027027, |
| "grad_norm": 0.6519547700881958, |
| "learning_rate": 4.425861754987921e-06, |
| "loss": 0.0302, |
| "step": 2800 |
| }, |
| { |
| "epoch": 3.037837837837838, |
| "grad_norm": 0.6648505330085754, |
| "learning_rate": 4.3930238953293096e-06, |
| "loss": 0.0272, |
| "step": 2810 |
| }, |
| { |
| "epoch": 3.0486486486486486, |
| "grad_norm": 0.6026409864425659, |
| "learning_rate": 4.360212586771847e-06, |
| "loss": 0.0255, |
| "step": 2820 |
| }, |
| { |
| "epoch": 3.0594594594594593, |
| "grad_norm": 0.7593157887458801, |
| "learning_rate": 4.327429264588463e-06, |
| "loss": 0.0299, |
| "step": 2830 |
| }, |
| { |
| "epoch": 3.0702702702702704, |
| "grad_norm": 0.5723231434822083, |
| "learning_rate": 4.294675362827872e-06, |
| "loss": 0.0292, |
| "step": 2840 |
| }, |
| { |
| "epoch": 3.081081081081081, |
| "grad_norm": 0.5857054591178894, |
| "learning_rate": 4.261952314251848e-06, |
| "loss": 0.025, |
| "step": 2850 |
| }, |
| { |
| "epoch": 3.091891891891892, |
| "grad_norm": 0.5336072444915771, |
| "learning_rate": 4.229261550272539e-06, |
| "loss": 0.0281, |
| "step": 2860 |
| }, |
| { |
| "epoch": 3.1027027027027025, |
| "grad_norm": 0.5887632966041565, |
| "learning_rate": 4.196604500889868e-06, |
| "loss": 0.0266, |
| "step": 2870 |
| }, |
| { |
| "epoch": 3.1135135135135137, |
| "grad_norm": 0.43469563126564026, |
| "learning_rate": 4.163982594628969e-06, |
| "loss": 0.0265, |
| "step": 2880 |
| }, |
| { |
| "epoch": 3.1243243243243244, |
| "grad_norm": 0.5681430101394653, |
| "learning_rate": 4.131397258477702e-06, |
| "loss": 0.0264, |
| "step": 2890 |
| }, |
| { |
| "epoch": 3.135135135135135, |
| "grad_norm": 0.4526456296443939, |
| "learning_rate": 4.098849917824232e-06, |
| "loss": 0.0283, |
| "step": 2900 |
| }, |
| { |
| "epoch": 3.145945945945946, |
| "grad_norm": 0.661095917224884, |
| "learning_rate": 4.066341996394677e-06, |
| "loss": 0.0384, |
| "step": 2910 |
| }, |
| { |
| "epoch": 3.156756756756757, |
| "grad_norm": 0.8155391812324524, |
| "learning_rate": 4.033874916190833e-06, |
| "loss": 0.0271, |
| "step": 2920 |
| }, |
| { |
| "epoch": 3.1675675675675676, |
| "grad_norm": 0.4132017493247986, |
| "learning_rate": 4.001450097427965e-06, |
| "loss": 0.0258, |
| "step": 2930 |
| }, |
| { |
| "epoch": 3.1783783783783783, |
| "grad_norm": 0.6691679954528809, |
| "learning_rate": 3.969068958472689e-06, |
| "loss": 0.0272, |
| "step": 2940 |
| }, |
| { |
| "epoch": 3.189189189189189, |
| "grad_norm": 0.5882253050804138, |
| "learning_rate": 3.936732915780923e-06, |
| "loss": 0.0238, |
| "step": 2950 |
| }, |
| { |
| "epoch": 3.2, |
| "grad_norm": 0.4746255576610565, |
| "learning_rate": 3.904443383835929e-06, |
| "loss": 0.0304, |
| "step": 2960 |
| }, |
| { |
| "epoch": 3.210810810810811, |
| "grad_norm": 0.3622106611728668, |
| "learning_rate": 3.872201775086437e-06, |
| "loss": 0.0303, |
| "step": 2970 |
| }, |
| { |
| "epoch": 3.2216216216216216, |
| "grad_norm": 0.39385557174682617, |
| "learning_rate": 3.840009499884862e-06, |
| "loss": 0.0247, |
| "step": 2980 |
| }, |
| { |
| "epoch": 3.2324324324324323, |
| "grad_norm": 0.5606188774108887, |
| "learning_rate": 3.8078679664256112e-06, |
| "loss": 0.0233, |
| "step": 2990 |
| }, |
| { |
| "epoch": 3.2432432432432434, |
| "grad_norm": 0.4499453008174896, |
| "learning_rate": 3.7757785806834808e-06, |
| "loss": 0.0282, |
| "step": 3000 |
| }, |
| { |
| "epoch": 3.254054054054054, |
| "grad_norm": 0.5054988265037537, |
| "learning_rate": 3.7437427463521557e-06, |
| "loss": 0.0248, |
| "step": 3010 |
| }, |
| { |
| "epoch": 3.264864864864865, |
| "grad_norm": 0.4411916732788086, |
| "learning_rate": 3.7117618647828168e-06, |
| "loss": 0.029, |
| "step": 3020 |
| }, |
| { |
| "epoch": 3.2756756756756755, |
| "grad_norm": 0.4525195360183716, |
| "learning_rate": 3.6798373349228255e-06, |
| "loss": 0.0263, |
| "step": 3030 |
| }, |
| { |
| "epoch": 3.2864864864864867, |
| "grad_norm": 0.3996464014053345, |
| "learning_rate": 3.647970553254538e-06, |
| "loss": 0.0218, |
| "step": 3040 |
| }, |
| { |
| "epoch": 3.2972972972972974, |
| "grad_norm": 0.5602717995643616, |
| "learning_rate": 3.6161629137342203e-06, |
| "loss": 0.0288, |
| "step": 3050 |
| }, |
| { |
| "epoch": 3.308108108108108, |
| "grad_norm": 0.45001763105392456, |
| "learning_rate": 3.5844158077310654e-06, |
| "loss": 0.0272, |
| "step": 3060 |
| }, |
| { |
| "epoch": 3.3189189189189188, |
| "grad_norm": 0.588201105594635, |
| "learning_rate": 3.5527306239663372e-06, |
| "loss": 0.0287, |
| "step": 3070 |
| }, |
| { |
| "epoch": 3.32972972972973, |
| "grad_norm": 0.6123380661010742, |
| "learning_rate": 3.5211087484526176e-06, |
| "loss": 0.0269, |
| "step": 3080 |
| }, |
| { |
| "epoch": 3.3405405405405406, |
| "grad_norm": 0.44060274958610535, |
| "learning_rate": 3.489551564433186e-06, |
| "loss": 0.0289, |
| "step": 3090 |
| }, |
| { |
| "epoch": 3.3513513513513513, |
| "grad_norm": 0.5079867243766785, |
| "learning_rate": 3.4580604523215008e-06, |
| "loss": 0.0251, |
| "step": 3100 |
| }, |
| { |
| "epoch": 3.362162162162162, |
| "grad_norm": 0.6919063925743103, |
| "learning_rate": 3.4266367896408214e-06, |
| "loss": 0.0337, |
| "step": 3110 |
| }, |
| { |
| "epoch": 3.372972972972973, |
| "grad_norm": 0.691513180732727, |
| "learning_rate": 3.3952819509639534e-06, |
| "loss": 0.026, |
| "step": 3120 |
| }, |
| { |
| "epoch": 3.383783783783784, |
| "grad_norm": 0.42166635394096375, |
| "learning_rate": 3.3639973078531163e-06, |
| "loss": 0.0258, |
| "step": 3130 |
| }, |
| { |
| "epoch": 3.3945945945945946, |
| "grad_norm": 0.5259663462638855, |
| "learning_rate": 3.332784228799947e-06, |
| "loss": 0.0271, |
| "step": 3140 |
| }, |
| { |
| "epoch": 3.4054054054054053, |
| "grad_norm": 0.4360649585723877, |
| "learning_rate": 3.301644079165638e-06, |
| "loss": 0.0267, |
| "step": 3150 |
| }, |
| { |
| "epoch": 3.4162162162162164, |
| "grad_norm": 0.6892200112342834, |
| "learning_rate": 3.27057822112122e-06, |
| "loss": 0.0249, |
| "step": 3160 |
| }, |
| { |
| "epoch": 3.427027027027027, |
| "grad_norm": 0.7275106906890869, |
| "learning_rate": 3.239588013587958e-06, |
| "loss": 0.0295, |
| "step": 3170 |
| }, |
| { |
| "epoch": 3.437837837837838, |
| "grad_norm": 0.39717885851860046, |
| "learning_rate": 3.208674812177926e-06, |
| "loss": 0.0255, |
| "step": 3180 |
| }, |
| { |
| "epoch": 3.4486486486486485, |
| "grad_norm": 0.7455564737319946, |
| "learning_rate": 3.1778399691346985e-06, |
| "loss": 0.0265, |
| "step": 3190 |
| }, |
| { |
| "epoch": 3.4594594594594597, |
| "grad_norm": 0.5156950950622559, |
| "learning_rate": 3.1470848332742005e-06, |
| "loss": 0.0313, |
| "step": 3200 |
| }, |
| { |
| "epoch": 3.4702702702702704, |
| "grad_norm": 0.48247015476226807, |
| "learning_rate": 3.1164107499257078e-06, |
| "loss": 0.0277, |
| "step": 3210 |
| }, |
| { |
| "epoch": 3.481081081081081, |
| "grad_norm": 0.5416699647903442, |
| "learning_rate": 3.0858190608729956e-06, |
| "loss": 0.0262, |
| "step": 3220 |
| }, |
| { |
| "epoch": 3.4918918918918918, |
| "grad_norm": 0.4909563362598419, |
| "learning_rate": 3.0553111042956478e-06, |
| "loss": 0.0263, |
| "step": 3230 |
| }, |
| { |
| "epoch": 3.5027027027027025, |
| "grad_norm": 0.6142066717147827, |
| "learning_rate": 3.024888214710517e-06, |
| "loss": 0.0241, |
| "step": 3240 |
| }, |
| { |
| "epoch": 3.5135135135135136, |
| "grad_norm": 0.5313824415206909, |
| "learning_rate": 2.9945517229133494e-06, |
| "loss": 0.0226, |
| "step": 3250 |
| }, |
| { |
| "epoch": 3.5243243243243243, |
| "grad_norm": 0.42341265082359314, |
| "learning_rate": 2.9643029559205727e-06, |
| "loss": 0.0327, |
| "step": 3260 |
| }, |
| { |
| "epoch": 3.535135135135135, |
| "grad_norm": 0.40098732709884644, |
| "learning_rate": 2.9341432369112483e-06, |
| "loss": 0.0254, |
| "step": 3270 |
| }, |
| { |
| "epoch": 3.545945945945946, |
| "grad_norm": 0.5539133548736572, |
| "learning_rate": 2.90407388516919e-06, |
| "loss": 0.0261, |
| "step": 3280 |
| }, |
| { |
| "epoch": 3.556756756756757, |
| "grad_norm": 0.5634763240814209, |
| "learning_rate": 2.8740962160252496e-06, |
| "loss": 0.0307, |
| "step": 3290 |
| }, |
| { |
| "epoch": 3.5675675675675675, |
| "grad_norm": 0.33248645067214966, |
| "learning_rate": 2.844211540799797e-06, |
| "loss": 0.0241, |
| "step": 3300 |
| }, |
| { |
| "epoch": 3.5783783783783782, |
| "grad_norm": 0.37290477752685547, |
| "learning_rate": 2.814421166745337e-06, |
| "loss": 0.0231, |
| "step": 3310 |
| }, |
| { |
| "epoch": 3.589189189189189, |
| "grad_norm": 0.5094774961471558, |
| "learning_rate": 2.7847263969893347e-06, |
| "loss": 0.0245, |
| "step": 3320 |
| }, |
| { |
| "epoch": 3.6, |
| "grad_norm": 0.5604079365730286, |
| "learning_rate": 2.7551285304772205e-06, |
| "loss": 0.0249, |
| "step": 3330 |
| }, |
| { |
| "epoch": 3.610810810810811, |
| "grad_norm": 0.49264541268348694, |
| "learning_rate": 2.725628861915557e-06, |
| "loss": 0.0239, |
| "step": 3340 |
| }, |
| { |
| "epoch": 3.6216216216216215, |
| "grad_norm": 0.5173695683479309, |
| "learning_rate": 2.6962286817154158e-06, |
| "loss": 0.0266, |
| "step": 3350 |
| }, |
| { |
| "epoch": 3.6324324324324326, |
| "grad_norm": 0.43412289023399353, |
| "learning_rate": 2.6669292759359166e-06, |
| "loss": 0.0259, |
| "step": 3360 |
| }, |
| { |
| "epoch": 3.6432432432432433, |
| "grad_norm": 0.33709916472435, |
| "learning_rate": 2.637731926227993e-06, |
| "loss": 0.0304, |
| "step": 3370 |
| }, |
| { |
| "epoch": 3.654054054054054, |
| "grad_norm": 0.4906066656112671, |
| "learning_rate": 2.608637909778303e-06, |
| "loss": 0.03, |
| "step": 3380 |
| }, |
| { |
| "epoch": 3.6648648648648647, |
| "grad_norm": 0.5424423217773438, |
| "learning_rate": 2.5796484992533773e-06, |
| "loss": 0.0267, |
| "step": 3390 |
| }, |
| { |
| "epoch": 3.6756756756756754, |
| "grad_norm": 0.5699389576911926, |
| "learning_rate": 2.550764962743947e-06, |
| "loss": 0.0297, |
| "step": 3400 |
| }, |
| { |
| "epoch": 3.6864864864864866, |
| "grad_norm": 0.43275460600852966, |
| "learning_rate": 2.5219885637094653e-06, |
| "loss": 0.0252, |
| "step": 3410 |
| }, |
| { |
| "epoch": 3.6972972972972973, |
| "grad_norm": 0.5598377585411072, |
| "learning_rate": 2.4933205609228534e-06, |
| "loss": 0.0275, |
| "step": 3420 |
| }, |
| { |
| "epoch": 3.708108108108108, |
| "grad_norm": 0.4491939842700958, |
| "learning_rate": 2.4647622084154195e-06, |
| "loss": 0.0241, |
| "step": 3430 |
| }, |
| { |
| "epoch": 3.718918918918919, |
| "grad_norm": 0.5898016691207886, |
| "learning_rate": 2.436314755422021e-06, |
| "loss": 0.0235, |
| "step": 3440 |
| }, |
| { |
| "epoch": 3.72972972972973, |
| "grad_norm": 0.5775002241134644, |
| "learning_rate": 2.407979446326411e-06, |
| "loss": 0.0279, |
| "step": 3450 |
| }, |
| { |
| "epoch": 3.7405405405405405, |
| "grad_norm": 0.3893280327320099, |
| "learning_rate": 2.3797575206067993e-06, |
| "loss": 0.0305, |
| "step": 3460 |
| }, |
| { |
| "epoch": 3.7513513513513512, |
| "grad_norm": 0.6161412596702576, |
| "learning_rate": 2.3516502127816455e-06, |
| "loss": 0.023, |
| "step": 3470 |
| }, |
| { |
| "epoch": 3.762162162162162, |
| "grad_norm": 0.4626004993915558, |
| "learning_rate": 2.323658752355647e-06, |
| "loss": 0.026, |
| "step": 3480 |
| }, |
| { |
| "epoch": 3.772972972972973, |
| "grad_norm": 0.40138059854507446, |
| "learning_rate": 2.2957843637659654e-06, |
| "loss": 0.023, |
| "step": 3490 |
| }, |
| { |
| "epoch": 3.7837837837837838, |
| "grad_norm": 0.6491943001747131, |
| "learning_rate": 2.268028266328655e-06, |
| "loss": 0.0294, |
| "step": 3500 |
| }, |
| { |
| "epoch": 3.7945945945945945, |
| "grad_norm": 0.6690925359725952, |
| "learning_rate": 2.2403916741853366e-06, |
| "loss": 0.0288, |
| "step": 3510 |
| }, |
| { |
| "epoch": 3.8054054054054056, |
| "grad_norm": 0.4034656286239624, |
| "learning_rate": 2.2128757962500817e-06, |
| "loss": 0.0223, |
| "step": 3520 |
| }, |
| { |
| "epoch": 3.8162162162162163, |
| "grad_norm": 0.6808702349662781, |
| "learning_rate": 2.1854818361565277e-06, |
| "loss": 0.0245, |
| "step": 3530 |
| }, |
| { |
| "epoch": 3.827027027027027, |
| "grad_norm": 0.5565137267112732, |
| "learning_rate": 2.1582109922052365e-06, |
| "loss": 0.0289, |
| "step": 3540 |
| }, |
| { |
| "epoch": 3.8378378378378377, |
| "grad_norm": 0.3042198419570923, |
| "learning_rate": 2.131064457311264e-06, |
| "loss": 0.0256, |
| "step": 3550 |
| }, |
| { |
| "epoch": 3.8486486486486484, |
| "grad_norm": 0.3782758414745331, |
| "learning_rate": 2.1040434189519924e-06, |
| "loss": 0.0255, |
| "step": 3560 |
| }, |
| { |
| "epoch": 3.8594594594594596, |
| "grad_norm": 0.5362565517425537, |
| "learning_rate": 2.0771490591151734e-06, |
| "loss": 0.0282, |
| "step": 3570 |
| }, |
| { |
| "epoch": 3.8702702702702703, |
| "grad_norm": 0.6471264958381653, |
| "learning_rate": 2.0503825542472315e-06, |
| "loss": 0.0296, |
| "step": 3580 |
| }, |
| { |
| "epoch": 3.881081081081081, |
| "grad_norm": 0.4816526174545288, |
| "learning_rate": 2.023745075201805e-06, |
| "loss": 0.0243, |
| "step": 3590 |
| }, |
| { |
| "epoch": 3.891891891891892, |
| "grad_norm": 0.3072303533554077, |
| "learning_rate": 1.9972377871885157e-06, |
| "loss": 0.0235, |
| "step": 3600 |
| }, |
| { |
| "epoch": 3.902702702702703, |
| "grad_norm": 0.5302412509918213, |
| "learning_rate": 1.9708618497220173e-06, |
| "loss": 0.0241, |
| "step": 3610 |
| }, |
| { |
| "epoch": 3.9135135135135135, |
| "grad_norm": 0.5970901250839233, |
| "learning_rate": 1.944618416571259e-06, |
| "loss": 0.0251, |
| "step": 3620 |
| }, |
| { |
| "epoch": 3.924324324324324, |
| "grad_norm": 0.45257118344306946, |
| "learning_rate": 1.9185086357090217e-06, |
| "loss": 0.0254, |
| "step": 3630 |
| }, |
| { |
| "epoch": 3.935135135135135, |
| "grad_norm": 0.5031522512435913, |
| "learning_rate": 1.8925336492617057e-06, |
| "loss": 0.029, |
| "step": 3640 |
| }, |
| { |
| "epoch": 3.945945945945946, |
| "grad_norm": 0.6579412221908569, |
| "learning_rate": 1.8666945934593668e-06, |
| "loss": 0.0259, |
| "step": 3650 |
| }, |
| { |
| "epoch": 3.9567567567567568, |
| "grad_norm": 0.589546799659729, |
| "learning_rate": 1.8409925985860128e-06, |
| "loss": 0.0191, |
| "step": 3660 |
| }, |
| { |
| "epoch": 3.9675675675675675, |
| "grad_norm": 0.5495330095291138, |
| "learning_rate": 1.8154287889301604e-06, |
| "loss": 0.0244, |
| "step": 3670 |
| }, |
| { |
| "epoch": 3.9783783783783786, |
| "grad_norm": 0.49903082847595215, |
| "learning_rate": 1.7900042827356611e-06, |
| "loss": 0.0302, |
| "step": 3680 |
| }, |
| { |
| "epoch": 3.9891891891891893, |
| "grad_norm": 0.6340963840484619, |
| "learning_rate": 1.7647201921527802e-06, |
| "loss": 0.0257, |
| "step": 3690 |
| }, |
| { |
| "epoch": 4.0, |
| "grad_norm": 0.40171554684638977, |
| "learning_rate": 1.739577623189545e-06, |
| "loss": 0.0228, |
| "step": 3700 |
| }, |
| { |
| "epoch": 4.010810810810811, |
| "grad_norm": 0.4687245488166809, |
| "learning_rate": 1.714577675663377e-06, |
| "loss": 0.0306, |
| "step": 3710 |
| }, |
| { |
| "epoch": 4.021621621621621, |
| "grad_norm": 0.5827450156211853, |
| "learning_rate": 1.6897214431529647e-06, |
| "loss": 0.0256, |
| "step": 3720 |
| }, |
| { |
| "epoch": 4.032432432432432, |
| "grad_norm": 0.4635016620159149, |
| "learning_rate": 1.6650100129504477e-06, |
| "loss": 0.0204, |
| "step": 3730 |
| }, |
| { |
| "epoch": 4.043243243243243, |
| "grad_norm": 0.5446805953979492, |
| "learning_rate": 1.6404444660138335e-06, |
| "loss": 0.0246, |
| "step": 3740 |
| }, |
| { |
| "epoch": 4.054054054054054, |
| "grad_norm": 0.7499637603759766, |
| "learning_rate": 1.616025876919725e-06, |
| "loss": 0.0256, |
| "step": 3750 |
| }, |
| { |
| "epoch": 4.064864864864865, |
| "grad_norm": 0.4591582715511322, |
| "learning_rate": 1.5917553138163171e-06, |
| "loss": 0.0233, |
| "step": 3760 |
| }, |
| { |
| "epoch": 4.075675675675676, |
| "grad_norm": 0.57037353515625, |
| "learning_rate": 1.5676338383766632e-06, |
| "loss": 0.0265, |
| "step": 3770 |
| }, |
| { |
| "epoch": 4.0864864864864865, |
| "grad_norm": 0.3500991463661194, |
| "learning_rate": 1.5436625057522446e-06, |
| "loss": 0.0219, |
| "step": 3780 |
| }, |
| { |
| "epoch": 4.097297297297297, |
| "grad_norm": 0.39382967352867126, |
| "learning_rate": 1.519842364526804e-06, |
| "loss": 0.0211, |
| "step": 3790 |
| }, |
| { |
| "epoch": 4.108108108108108, |
| "grad_norm": 0.49823036789894104, |
| "learning_rate": 1.4961744566704855e-06, |
| "loss": 0.0223, |
| "step": 3800 |
| }, |
| { |
| "epoch": 4.118918918918919, |
| "grad_norm": 0.5071790218353271, |
| "learning_rate": 1.4726598174942553e-06, |
| "loss": 0.0227, |
| "step": 3810 |
| }, |
| { |
| "epoch": 4.12972972972973, |
| "grad_norm": 0.6194488406181335, |
| "learning_rate": 1.4492994756046036e-06, |
| "loss": 0.0278, |
| "step": 3820 |
| }, |
| { |
| "epoch": 4.140540540540541, |
| "grad_norm": 0.404752641916275, |
| "learning_rate": 1.4260944528585646e-06, |
| "loss": 0.0262, |
| "step": 3830 |
| }, |
| { |
| "epoch": 4.151351351351352, |
| "grad_norm": 0.577710747718811, |
| "learning_rate": 1.4030457643190048e-06, |
| "loss": 0.0247, |
| "step": 3840 |
| }, |
| { |
| "epoch": 4.162162162162162, |
| "grad_norm": 0.4997677505016327, |
| "learning_rate": 1.380154418210231e-06, |
| "loss": 0.0215, |
| "step": 3850 |
| }, |
| { |
| "epoch": 4.172972972972973, |
| "grad_norm": 0.5820034146308899, |
| "learning_rate": 1.3574214158738763e-06, |
| "loss": 0.0299, |
| "step": 3860 |
| }, |
| { |
| "epoch": 4.183783783783784, |
| "grad_norm": 0.38549038767814636, |
| "learning_rate": 1.3348477517251102e-06, |
| "loss": 0.0224, |
| "step": 3870 |
| }, |
| { |
| "epoch": 4.194594594594594, |
| "grad_norm": 0.278469979763031, |
| "learning_rate": 1.312434413209131e-06, |
| "loss": 0.0238, |
| "step": 3880 |
| }, |
| { |
| "epoch": 4.205405405405405, |
| "grad_norm": 0.5217724442481995, |
| "learning_rate": 1.2901823807579727e-06, |
| "loss": 0.0222, |
| "step": 3890 |
| }, |
| { |
| "epoch": 4.216216216216216, |
| "grad_norm": 0.45270097255706787, |
| "learning_rate": 1.2680926277476245e-06, |
| "loss": 0.0241, |
| "step": 3900 |
| }, |
| { |
| "epoch": 4.227027027027027, |
| "grad_norm": 0.4428935647010803, |
| "learning_rate": 1.2461661204554398e-06, |
| "loss": 0.0243, |
| "step": 3910 |
| }, |
| { |
| "epoch": 4.237837837837838, |
| "grad_norm": 0.35106682777404785, |
| "learning_rate": 1.2244038180178836e-06, |
| "loss": 0.0228, |
| "step": 3920 |
| }, |
| { |
| "epoch": 4.248648648648649, |
| "grad_norm": 0.4548755884170532, |
| "learning_rate": 1.2028066723885611e-06, |
| "loss": 0.0244, |
| "step": 3930 |
| }, |
| { |
| "epoch": 4.2594594594594595, |
| "grad_norm": 0.3868198096752167, |
| "learning_rate": 1.1813756282965887e-06, |
| "loss": 0.0238, |
| "step": 3940 |
| }, |
| { |
| "epoch": 4.27027027027027, |
| "grad_norm": 0.59125816822052, |
| "learning_rate": 1.1601116232052639e-06, |
| "loss": 0.0253, |
| "step": 3950 |
| }, |
| { |
| "epoch": 4.281081081081081, |
| "grad_norm": 0.49321606755256653, |
| "learning_rate": 1.1390155872710517e-06, |
| "loss": 0.0267, |
| "step": 3960 |
| }, |
| { |
| "epoch": 4.291891891891892, |
| "grad_norm": 0.5783228874206543, |
| "learning_rate": 1.1180884433029088e-06, |
| "loss": 0.0246, |
| "step": 3970 |
| }, |
| { |
| "epoch": 4.302702702702703, |
| "grad_norm": 0.5392751097679138, |
| "learning_rate": 1.097331106721904e-06, |
| "loss": 0.0221, |
| "step": 3980 |
| }, |
| { |
| "epoch": 4.313513513513514, |
| "grad_norm": 0.44086018204689026, |
| "learning_rate": 1.076744485521186e-06, |
| "loss": 0.0242, |
| "step": 3990 |
| }, |
| { |
| "epoch": 4.324324324324325, |
| "grad_norm": 0.6276214122772217, |
| "learning_rate": 1.0563294802262558e-06, |
| "loss": 0.027, |
| "step": 4000 |
| }, |
| { |
| "epoch": 4.335135135135135, |
| "grad_norm": 0.586881697177887, |
| "learning_rate": 1.036086983855581e-06, |
| "loss": 0.0237, |
| "step": 4010 |
| }, |
| { |
| "epoch": 4.345945945945946, |
| "grad_norm": 0.5574650764465332, |
| "learning_rate": 1.0160178818815314e-06, |
| "loss": 0.0239, |
| "step": 4020 |
| }, |
| { |
| "epoch": 4.356756756756757, |
| "grad_norm": 0.45856261253356934, |
| "learning_rate": 9.961230521916387e-07, |
| "loss": 0.0237, |
| "step": 4030 |
| }, |
| { |
| "epoch": 4.367567567567567, |
| "grad_norm": 0.6503721475601196, |
| "learning_rate": 9.764033650502076e-07, |
| "loss": 0.0257, |
| "step": 4040 |
| }, |
| { |
| "epoch": 4.378378378378378, |
| "grad_norm": 0.4516863226890564, |
| "learning_rate": 9.568596830602345e-07, |
| "loss": 0.0216, |
| "step": 4050 |
| }, |
| { |
| "epoch": 4.389189189189189, |
| "grad_norm": 0.6652976870536804, |
| "learning_rate": 9.374928611256811e-07, |
| "loss": 0.0298, |
| "step": 4060 |
| }, |
| { |
| "epoch": 4.4, |
| "grad_norm": 0.524724006652832, |
| "learning_rate": 9.183037464140804e-07, |
| "loss": 0.0258, |
| "step": 4070 |
| }, |
| { |
| "epoch": 4.410810810810811, |
| "grad_norm": 0.5315241813659668, |
| "learning_rate": 8.992931783194736e-07, |
| "loss": 0.0218, |
| "step": 4080 |
| }, |
| { |
| "epoch": 4.421621621621622, |
| "grad_norm": 0.5122963786125183, |
| "learning_rate": 8.804619884256959e-07, |
| "loss": 0.0234, |
| "step": 4090 |
| }, |
| { |
| "epoch": 4.4324324324324325, |
| "grad_norm": 0.533340573310852, |
| "learning_rate": 8.618110004699976e-07, |
| "loss": 0.0253, |
| "step": 4100 |
| }, |
| { |
| "epoch": 4.443243243243243, |
| "grad_norm": 0.4996013343334198, |
| "learning_rate": 8.4334103030701e-07, |
| "loss": 0.0257, |
| "step": 4110 |
| }, |
| { |
| "epoch": 4.454054054054054, |
| "grad_norm": 0.5451450943946838, |
| "learning_rate": 8.250528858730661e-07, |
| "loss": 0.0223, |
| "step": 4120 |
| }, |
| { |
| "epoch": 4.464864864864865, |
| "grad_norm": 0.5494437217712402, |
| "learning_rate": 8.06947367150846e-07, |
| "loss": 0.0231, |
| "step": 4130 |
| }, |
| { |
| "epoch": 4.475675675675676, |
| "grad_norm": 0.5594083666801453, |
| "learning_rate": 7.890252661343939e-07, |
| "loss": 0.0258, |
| "step": 4140 |
| }, |
| { |
| "epoch": 4.486486486486487, |
| "grad_norm": 0.5314719080924988, |
| "learning_rate": 7.712873667944681e-07, |
| "loss": 0.0239, |
| "step": 4150 |
| }, |
| { |
| "epoch": 4.4972972972972975, |
| "grad_norm": 0.6392211318016052, |
| "learning_rate": 7.537344450442469e-07, |
| "loss": 0.0198, |
| "step": 4160 |
| }, |
| { |
| "epoch": 4.508108108108108, |
| "grad_norm": 0.33640533685684204, |
| "learning_rate": 7.36367268705393e-07, |
| "loss": 0.0233, |
| "step": 4170 |
| }, |
| { |
| "epoch": 4.518918918918919, |
| "grad_norm": 0.6524032354354858, |
| "learning_rate": 7.191865974744599e-07, |
| "loss": 0.0244, |
| "step": 4180 |
| }, |
| { |
| "epoch": 4.52972972972973, |
| "grad_norm": 0.5606145262718201, |
| "learning_rate": 7.021931828896666e-07, |
| "loss": 0.0235, |
| "step": 4190 |
| }, |
| { |
| "epoch": 4.54054054054054, |
| "grad_norm": 0.4877568483352661, |
| "learning_rate": 6.853877682980159e-07, |
| "loss": 0.0189, |
| "step": 4200 |
| }, |
| { |
| "epoch": 4.551351351351351, |
| "grad_norm": 0.4097627103328705, |
| "learning_rate": 6.687710888227849e-07, |
| "loss": 0.0181, |
| "step": 4210 |
| }, |
| { |
| "epoch": 4.562162162162162, |
| "grad_norm": 0.4605877101421356, |
| "learning_rate": 6.523438713313656e-07, |
| "loss": 0.0219, |
| "step": 4220 |
| }, |
| { |
| "epoch": 4.572972972972973, |
| "grad_norm": 0.5829401016235352, |
| "learning_rate": 6.361068344034665e-07, |
| "loss": 0.023, |
| "step": 4230 |
| }, |
| { |
| "epoch": 4.583783783783784, |
| "grad_norm": 0.431805819272995, |
| "learning_rate": 6.200606882996846e-07, |
| "loss": 0.023, |
| "step": 4240 |
| }, |
| { |
| "epoch": 4.594594594594595, |
| "grad_norm": 0.3898727297782898, |
| "learning_rate": 6.042061349304313e-07, |
| "loss": 0.0229, |
| "step": 4250 |
| }, |
| { |
| "epoch": 4.605405405405405, |
| "grad_norm": 0.7912748456001282, |
| "learning_rate": 5.885438678252342e-07, |
| "loss": 0.0284, |
| "step": 4260 |
| }, |
| { |
| "epoch": 4.616216216216216, |
| "grad_norm": 0.47734588384628296, |
| "learning_rate": 5.730745721023939e-07, |
| "loss": 0.0214, |
| "step": 4270 |
| }, |
| { |
| "epoch": 4.627027027027027, |
| "grad_norm": 0.750998318195343, |
| "learning_rate": 5.577989244390192e-07, |
| "loss": 0.0237, |
| "step": 4280 |
| }, |
| { |
| "epoch": 4.6378378378378375, |
| "grad_norm": 0.3779522776603699, |
| "learning_rate": 5.427175930414264e-07, |
| "loss": 0.0225, |
| "step": 4290 |
| }, |
| { |
| "epoch": 4.648648648648649, |
| "grad_norm": 0.44585949182510376, |
| "learning_rate": 5.27831237615905e-07, |
| "loss": 0.0236, |
| "step": 4300 |
| }, |
| { |
| "epoch": 4.65945945945946, |
| "grad_norm": 0.38612884283065796, |
| "learning_rate": 5.131405093398694e-07, |
| "loss": 0.0204, |
| "step": 4310 |
| }, |
| { |
| "epoch": 4.6702702702702705, |
| "grad_norm": 0.4791419804096222, |
| "learning_rate": 4.986460508333635e-07, |
| "loss": 0.0203, |
| "step": 4320 |
| }, |
| { |
| "epoch": 4.681081081081081, |
| "grad_norm": 0.28379178047180176, |
| "learning_rate": 4.843484961309597e-07, |
| "loss": 0.0213, |
| "step": 4330 |
| }, |
| { |
| "epoch": 4.691891891891892, |
| "grad_norm": 0.628464937210083, |
| "learning_rate": 4.7024847065401614e-07, |
| "loss": 0.0235, |
| "step": 4340 |
| }, |
| { |
| "epoch": 4.702702702702703, |
| "grad_norm": 0.3909755349159241, |
| "learning_rate": 4.5634659118332594e-07, |
| "loss": 0.0221, |
| "step": 4350 |
| }, |
| { |
| "epoch": 4.713513513513513, |
| "grad_norm": 0.5916475653648376, |
| "learning_rate": 4.4264346583213435e-07, |
| "loss": 0.0196, |
| "step": 4360 |
| }, |
| { |
| "epoch": 4.724324324324324, |
| "grad_norm": 0.6053293943405151, |
| "learning_rate": 4.291396940195347e-07, |
| "loss": 0.0237, |
| "step": 4370 |
| }, |
| { |
| "epoch": 4.735135135135135, |
| "grad_norm": 0.5997756123542786, |
| "learning_rate": 4.15835866444253e-07, |
| "loss": 0.0214, |
| "step": 4380 |
| }, |
| { |
| "epoch": 4.745945945945946, |
| "grad_norm": 0.2547402083873749, |
| "learning_rate": 4.027325650588043e-07, |
| "loss": 0.024, |
| "step": 4390 |
| }, |
| { |
| "epoch": 4.756756756756757, |
| "grad_norm": 0.5118744969367981, |
| "learning_rate": 3.89830363044042e-07, |
| "loss": 0.0284, |
| "step": 4400 |
| }, |
| { |
| "epoch": 4.767567567567568, |
| "grad_norm": 0.5891205072402954, |
| "learning_rate": 3.771298247840788e-07, |
| "loss": 0.0265, |
| "step": 4410 |
| }, |
| { |
| "epoch": 4.778378378378378, |
| "grad_norm": 0.5021476149559021, |
| "learning_rate": 3.6463150584160056e-07, |
| "loss": 0.0241, |
| "step": 4420 |
| }, |
| { |
| "epoch": 4.789189189189189, |
| "grad_norm": 0.589209794998169, |
| "learning_rate": 3.523359529335696e-07, |
| "loss": 0.0271, |
| "step": 4430 |
| }, |
| { |
| "epoch": 4.8, |
| "grad_norm": 0.52953040599823, |
| "learning_rate": 3.402437039073003e-07, |
| "loss": 0.0274, |
| "step": 4440 |
| }, |
| { |
| "epoch": 4.8108108108108105, |
| "grad_norm": 0.32550954818725586, |
| "learning_rate": 3.283552877169399e-07, |
| "loss": 0.0212, |
| "step": 4450 |
| }, |
| { |
| "epoch": 4.821621621621622, |
| "grad_norm": 0.7259662747383118, |
| "learning_rate": 3.1667122440032506e-07, |
| "loss": 0.028, |
| "step": 4460 |
| }, |
| { |
| "epoch": 4.832432432432433, |
| "grad_norm": 0.39751487970352173, |
| "learning_rate": 3.0519202505623513e-07, |
| "loss": 0.0211, |
| "step": 4470 |
| }, |
| { |
| "epoch": 4.8432432432432435, |
| "grad_norm": 0.39766958355903625, |
| "learning_rate": 2.939181918220385e-07, |
| "loss": 0.0236, |
| "step": 4480 |
| }, |
| { |
| "epoch": 4.854054054054054, |
| "grad_norm": 0.40745213627815247, |
| "learning_rate": 2.828502178517223e-07, |
| "loss": 0.026, |
| "step": 4490 |
| }, |
| { |
| "epoch": 4.864864864864865, |
| "grad_norm": 0.6851739287376404, |
| "learning_rate": 2.719885872943229e-07, |
| "loss": 0.022, |
| "step": 4500 |
| }, |
| { |
| "epoch": 4.875675675675676, |
| "grad_norm": 0.8467708230018616, |
| "learning_rate": 2.6133377527274906e-07, |
| "loss": 0.0293, |
| "step": 4510 |
| }, |
| { |
| "epoch": 4.886486486486486, |
| "grad_norm": 0.5121775269508362, |
| "learning_rate": 2.508862478629936e-07, |
| "loss": 0.0249, |
| "step": 4520 |
| }, |
| { |
| "epoch": 4.897297297297297, |
| "grad_norm": 0.46212148666381836, |
| "learning_rate": 2.4064646207375307e-07, |
| "loss": 0.0259, |
| "step": 4530 |
| }, |
| { |
| "epoch": 4.908108108108108, |
| "grad_norm": 0.41044148802757263, |
| "learning_rate": 2.3061486582642733e-07, |
| "loss": 0.0226, |
| "step": 4540 |
| }, |
| { |
| "epoch": 4.918918918918919, |
| "grad_norm": 0.3751397132873535, |
| "learning_rate": 2.2079189793553667e-07, |
| "loss": 0.0178, |
| "step": 4550 |
| }, |
| { |
| "epoch": 4.92972972972973, |
| "grad_norm": 0.6501022577285767, |
| "learning_rate": 2.111779880895165e-07, |
| "loss": 0.0194, |
| "step": 4560 |
| }, |
| { |
| "epoch": 4.940540540540541, |
| "grad_norm": 0.4015115201473236, |
| "learning_rate": 2.01773556831929e-07, |
| "loss": 0.0232, |
| "step": 4570 |
| }, |
| { |
| "epoch": 4.951351351351351, |
| "grad_norm": 0.40367844700813293, |
| "learning_rate": 1.9257901554306514e-07, |
| "loss": 0.0229, |
| "step": 4580 |
| }, |
| { |
| "epoch": 4.962162162162162, |
| "grad_norm": 0.39636364579200745, |
| "learning_rate": 1.835947664219445e-07, |
| "loss": 0.0232, |
| "step": 4590 |
| }, |
| { |
| "epoch": 4.972972972972973, |
| "grad_norm": 0.49914997816085815, |
| "learning_rate": 1.748212024687307e-07, |
| "loss": 0.0188, |
| "step": 4600 |
| }, |
| { |
| "epoch": 4.9837837837837835, |
| "grad_norm": 0.32945120334625244, |
| "learning_rate": 1.6625870746753148e-07, |
| "loss": 0.0261, |
| "step": 4610 |
| }, |
| { |
| "epoch": 4.994594594594595, |
| "grad_norm": 0.5340284705162048, |
| "learning_rate": 1.5790765596961855e-07, |
| "loss": 0.0264, |
| "step": 4620 |
| }, |
| { |
| "epoch": 5.005405405405406, |
| "grad_norm": 0.5107668042182922, |
| "learning_rate": 1.4976841327703717e-07, |
| "loss": 0.02, |
| "step": 4630 |
| }, |
| { |
| "epoch": 5.0162162162162165, |
| "grad_norm": 0.5458788275718689, |
| "learning_rate": 1.4184133542663014e-07, |
| "loss": 0.0245, |
| "step": 4640 |
| }, |
| { |
| "epoch": 5.027027027027027, |
| "grad_norm": 0.4814177453517914, |
| "learning_rate": 1.341267691744641e-07, |
| "loss": 0.0225, |
| "step": 4650 |
| }, |
| { |
| "epoch": 5.037837837837838, |
| "grad_norm": 0.48016414046287537, |
| "learning_rate": 1.2662505198065667e-07, |
| "loss": 0.0281, |
| "step": 4660 |
| }, |
| { |
| "epoch": 5.048648648648649, |
| "grad_norm": 0.3474937081336975, |
| "learning_rate": 1.193365119946216e-07, |
| "loss": 0.0226, |
| "step": 4670 |
| }, |
| { |
| "epoch": 5.059459459459459, |
| "grad_norm": 0.46155011653900146, |
| "learning_rate": 1.122614680407086e-07, |
| "loss": 0.0169, |
| "step": 4680 |
| }, |
| { |
| "epoch": 5.07027027027027, |
| "grad_norm": 0.4519842863082886, |
| "learning_rate": 1.054002296042611e-07, |
| "loss": 0.0253, |
| "step": 4690 |
| }, |
| { |
| "epoch": 5.081081081081081, |
| "grad_norm": 0.43510937690734863, |
| "learning_rate": 9.875309681807443e-08, |
| "loss": 0.0273, |
| "step": 4700 |
| }, |
| { |
| "epoch": 5.091891891891892, |
| "grad_norm": 0.5046071410179138, |
| "learning_rate": 9.232036044927062e-08, |
| "loss": 0.023, |
| "step": 4710 |
| }, |
| { |
| "epoch": 5.102702702702703, |
| "grad_norm": 0.5468009114265442, |
| "learning_rate": 8.61023018865792e-08, |
| "loss": 0.026, |
| "step": 4720 |
| }, |
| { |
| "epoch": 5.113513513513514, |
| "grad_norm": 0.5225476026535034, |
| "learning_rate": 8.009919312802372e-08, |
| "loss": 0.0234, |
| "step": 4730 |
| }, |
| { |
| "epoch": 5.124324324324324, |
| "grad_norm": 0.49468499422073364, |
| "learning_rate": 7.431129676902905e-08, |
| "loss": 0.0185, |
| "step": 4740 |
| }, |
| { |
| "epoch": 5.135135135135135, |
| "grad_norm": 0.5294644236564636, |
| "learning_rate": 6.873886599093216e-08, |
| "loss": 0.0244, |
| "step": 4750 |
| }, |
| { |
| "epoch": 5.145945945945946, |
| "grad_norm": 0.4790719151496887, |
| "learning_rate": 6.338214454990776e-08, |
| "loss": 0.0225, |
| "step": 4760 |
| }, |
| { |
| "epoch": 5.1567567567567565, |
| "grad_norm": 0.44608423113822937, |
| "learning_rate": 5.82413667663051e-08, |
| "loss": 0.0224, |
| "step": 4770 |
| }, |
| { |
| "epoch": 5.167567567567567, |
| "grad_norm": 0.444612592458725, |
| "learning_rate": 5.3316757514397245e-08, |
| "loss": 0.025, |
| "step": 4780 |
| }, |
| { |
| "epoch": 5.178378378378379, |
| "grad_norm": 0.5216858386993408, |
| "learning_rate": 4.860853221254791e-08, |
| "loss": 0.0202, |
| "step": 4790 |
| }, |
| { |
| "epoch": 5.1891891891891895, |
| "grad_norm": 0.510924756526947, |
| "learning_rate": 4.411689681378284e-08, |
| "loss": 0.0255, |
| "step": 4800 |
| }, |
| { |
| "epoch": 5.2, |
| "grad_norm": 0.36953872442245483, |
| "learning_rate": 3.984204779678646e-08, |
| "loss": 0.0197, |
| "step": 4810 |
| }, |
| { |
| "epoch": 5.210810810810811, |
| "grad_norm": 0.6091222763061523, |
| "learning_rate": 3.578417215730323e-08, |
| "loss": 0.0188, |
| "step": 4820 |
| }, |
| { |
| "epoch": 5.221621621621622, |
| "grad_norm": 0.4898870289325714, |
| "learning_rate": 3.194344739995803e-08, |
| "loss": 0.0239, |
| "step": 4830 |
| }, |
| { |
| "epoch": 5.232432432432432, |
| "grad_norm": 0.4123137593269348, |
| "learning_rate": 2.8320041530495724e-08, |
| "loss": 0.0317, |
| "step": 4840 |
| }, |
| { |
| "epoch": 5.243243243243243, |
| "grad_norm": 0.5520591735839844, |
| "learning_rate": 2.4914113048425393e-08, |
| "loss": 0.0225, |
| "step": 4850 |
| }, |
| { |
| "epoch": 5.254054054054054, |
| "grad_norm": 0.4832994043827057, |
| "learning_rate": 2.1725810940094182e-08, |
| "loss": 0.0259, |
| "step": 4860 |
| }, |
| { |
| "epoch": 5.264864864864865, |
| "grad_norm": 0.4356898069381714, |
| "learning_rate": 1.8755274672164202e-08, |
| "loss": 0.0237, |
| "step": 4870 |
| }, |
| { |
| "epoch": 5.275675675675676, |
| "grad_norm": 0.41619211435317993, |
| "learning_rate": 1.600263418551573e-08, |
| "loss": 0.0241, |
| "step": 4880 |
| }, |
| { |
| "epoch": 5.286486486486487, |
| "grad_norm": 0.6041860580444336, |
| "learning_rate": 1.3468009889559541e-08, |
| "loss": 0.0242, |
| "step": 4890 |
| }, |
| { |
| "epoch": 5.297297297297297, |
| "grad_norm": 0.5045064687728882, |
| "learning_rate": 1.1151512656975006e-08, |
| "loss": 0.0267, |
| "step": 4900 |
| }, |
| { |
| "epoch": 5.308108108108108, |
| "grad_norm": 0.270536333322525, |
| "learning_rate": 9.053243818853974e-09, |
| "loss": 0.0214, |
| "step": 4910 |
| }, |
| { |
| "epoch": 5.318918918918919, |
| "grad_norm": 0.643278181552887, |
| "learning_rate": 7.173295160273763e-09, |
| "loss": 0.0222, |
| "step": 4920 |
| }, |
| { |
| "epoch": 5.3297297297297295, |
| "grad_norm": 0.44826021790504456, |
| "learning_rate": 5.511748916279258e-09, |
| "loss": 0.0253, |
| "step": 4930 |
| }, |
| { |
| "epoch": 5.34054054054054, |
| "grad_norm": 0.4237731993198395, |
| "learning_rate": 4.068677768285234e-09, |
| "loss": 0.0219, |
| "step": 4940 |
| }, |
| { |
| "epoch": 5.351351351351352, |
| "grad_norm": 0.4591931700706482, |
| "learning_rate": 2.844144840898344e-09, |
| "loss": 0.0239, |
| "step": 4950 |
| }, |
| { |
| "epoch": 5.3621621621621625, |
| "grad_norm": 0.4851166009902954, |
| "learning_rate": 1.8382036991559938e-09, |
| "loss": 0.0193, |
| "step": 4960 |
| }, |
| { |
| "epoch": 5.372972972972973, |
| "grad_norm": 0.5183833241462708, |
| "learning_rate": 1.0508983461832156e-09, |
| "loss": 0.0232, |
| "step": 4970 |
| }, |
| { |
| "epoch": 5.383783783783784, |
| "grad_norm": 0.4397503435611725, |
| "learning_rate": 4.822632212653222e-10, |
| "loss": 0.0255, |
| "step": 4980 |
| }, |
| { |
| "epoch": 5.394594594594595, |
| "grad_norm": 0.5476964712142944, |
| "learning_rate": 1.323231983463291e-10, |
| "loss": 0.0207, |
| "step": 4990 |
| }, |
| { |
| "epoch": 5.405405405405405, |
| "grad_norm": 0.46734490990638733, |
| "learning_rate": 1.0935849353854722e-12, |
| "loss": 0.0245, |
| "step": 5000 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 5000, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 6, |
| "save_steps": 1000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 32, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|