| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.04182968498155174, |
| "eval_steps": 500, |
| "global_step": 23000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 1.818681955719641e-05, |
| "grad_norm": 2.1063554286956787, |
| "learning_rate": 0.0002, |
| "loss": 1.9357, |
| "step": 10 |
| }, |
| { |
| "epoch": 3.637363911439282e-05, |
| "grad_norm": 0.9359453320503235, |
| "learning_rate": 0.0002, |
| "loss": 0.2208, |
| "step": 20 |
| }, |
| { |
| "epoch": 5.4560458671589234e-05, |
| "grad_norm": 0.5420117378234863, |
| "learning_rate": 0.0002, |
| "loss": 0.1459, |
| "step": 30 |
| }, |
| { |
| "epoch": 7.274727822878565e-05, |
| "grad_norm": 0.05442357063293457, |
| "learning_rate": 0.0002, |
| "loss": 0.0823, |
| "step": 40 |
| }, |
| { |
| "epoch": 9.093409778598205e-05, |
| "grad_norm": 0.0005907653248868883, |
| "learning_rate": 0.0002, |
| "loss": 0.0005, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.00010912091734317847, |
| "grad_norm": 0.26516178250312805, |
| "learning_rate": 0.0002, |
| "loss": 1.0686, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.00012730773690037487, |
| "grad_norm": 0.44067099690437317, |
| "learning_rate": 0.0002, |
| "loss": 0.2613, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.0001454945564575713, |
| "grad_norm": 0.09356075525283813, |
| "learning_rate": 0.0002, |
| "loss": 0.1415, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.0001636813760147677, |
| "grad_norm": 0.017799921333789825, |
| "learning_rate": 0.0002, |
| "loss": 0.1013, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.0001818681955719641, |
| "grad_norm": 0.0018534553237259388, |
| "learning_rate": 0.0002, |
| "loss": 0.0001, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.00020005501512916052, |
| "grad_norm": 0.35472020506858826, |
| "learning_rate": 0.0002, |
| "loss": 0.73, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.00021824183468635694, |
| "grad_norm": 0.3880878686904907, |
| "learning_rate": 0.0002, |
| "loss": 0.1424, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.00023642865424355333, |
| "grad_norm": 0.19027432799339294, |
| "learning_rate": 0.0002, |
| "loss": 0.1173, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.00025461547380074975, |
| "grad_norm": 0.019047321751713753, |
| "learning_rate": 0.0002, |
| "loss": 0.0977, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.00027280229335794617, |
| "grad_norm": 0.0003795044613070786, |
| "learning_rate": 0.0002, |
| "loss": 0.0007, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.0002909891129151426, |
| "grad_norm": 0.08740618824958801, |
| "learning_rate": 0.0002, |
| "loss": 0.801, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.000309175932472339, |
| "grad_norm": 0.2661634087562561, |
| "learning_rate": 0.0002, |
| "loss": 0.1274, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.0003273627520295354, |
| "grad_norm": 0.05828547850251198, |
| "learning_rate": 0.0002, |
| "loss": 0.1184, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.0003455495715867318, |
| "grad_norm": 0.02175055630505085, |
| "learning_rate": 0.0002, |
| "loss": 0.0752, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.0003637363911439282, |
| "grad_norm": 0.0009504792396910489, |
| "learning_rate": 0.0002, |
| "loss": 0.0005, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.0003819232107011246, |
| "grad_norm": 0.25059741735458374, |
| "learning_rate": 0.0002, |
| "loss": 0.5125, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.00040011003025832104, |
| "grad_norm": 0.13256193697452545, |
| "learning_rate": 0.0002, |
| "loss": 0.1014, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.00041829684981551746, |
| "grad_norm": 0.09446375072002411, |
| "learning_rate": 0.0002, |
| "loss": 0.0896, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.0004364836693727139, |
| "grad_norm": 0.019389621913433075, |
| "learning_rate": 0.0002, |
| "loss": 0.0726, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.0004546704889299103, |
| "grad_norm": 0.0032304900232702494, |
| "learning_rate": 0.0002, |
| "loss": 0.0023, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.00047285730848710666, |
| "grad_norm": 2.5549609661102295, |
| "learning_rate": 0.0002, |
| "loss": 0.3884, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.0004910441280443031, |
| "grad_norm": 0.44937047362327576, |
| "learning_rate": 0.0002, |
| "loss": 0.1071, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.0005092309476014995, |
| "grad_norm": 0.1509999781847, |
| "learning_rate": 0.0002, |
| "loss": 0.0979, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.0005274177671586959, |
| "grad_norm": 0.006468054372817278, |
| "learning_rate": 0.0002, |
| "loss": 0.0611, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.0005456045867158923, |
| "grad_norm": 0.0002916739322245121, |
| "learning_rate": 0.0002, |
| "loss": 0.001, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.0005637914062730887, |
| "grad_norm": 0.23081810772418976, |
| "learning_rate": 0.0002, |
| "loss": 0.5894, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.0005819782258302852, |
| "grad_norm": 0.22755394876003265, |
| "learning_rate": 0.0002, |
| "loss": 0.114, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.0006001650453874816, |
| "grad_norm": 0.49973106384277344, |
| "learning_rate": 0.0002, |
| "loss": 0.093, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.000618351864944678, |
| "grad_norm": 0.08789435774087906, |
| "learning_rate": 0.0002, |
| "loss": 0.0745, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.0006365386845018744, |
| "grad_norm": 0.0058497479185462, |
| "learning_rate": 0.0002, |
| "loss": 0.0007, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.0006547255040590708, |
| "grad_norm": 0.30569636821746826, |
| "learning_rate": 0.0002, |
| "loss": 0.5169, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.0006729123236162671, |
| "grad_norm": 0.2783024311065674, |
| "learning_rate": 0.0002, |
| "loss": 0.13, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.0006910991431734636, |
| "grad_norm": 0.13052967190742493, |
| "learning_rate": 0.0002, |
| "loss": 0.0907, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.00070928596273066, |
| "grad_norm": 0.15066476166248322, |
| "learning_rate": 0.0002, |
| "loss": 0.0996, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.0007274727822878564, |
| "grad_norm": 0.0005865198327228427, |
| "learning_rate": 0.0002, |
| "loss": 0.0021, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.0007456596018450528, |
| "grad_norm": 0.31872233748435974, |
| "learning_rate": 0.0002, |
| "loss": 0.4507, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.0007638464214022492, |
| "grad_norm": 0.08874880522489548, |
| "learning_rate": 0.0002, |
| "loss": 0.136, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.0007820332409594457, |
| "grad_norm": 0.10985178500413895, |
| "learning_rate": 0.0002, |
| "loss": 0.0992, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.0008002200605166421, |
| "grad_norm": 0.10776215046644211, |
| "learning_rate": 0.0002, |
| "loss": 0.0661, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.0008184068800738385, |
| "grad_norm": 0.006612936966121197, |
| "learning_rate": 0.0002, |
| "loss": 0.0009, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.0008365936996310349, |
| "grad_norm": 0.2757071256637573, |
| "learning_rate": 0.0002, |
| "loss": 0.6376, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.0008547805191882313, |
| "grad_norm": 0.24748466908931732, |
| "learning_rate": 0.0002, |
| "loss": 0.1241, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.0008729673387454278, |
| "grad_norm": 0.1035066694021225, |
| "learning_rate": 0.0002, |
| "loss": 0.1008, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.0008911541583026242, |
| "grad_norm": 0.06515783071517944, |
| "learning_rate": 0.0002, |
| "loss": 0.0711, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.0009093409778598206, |
| "grad_norm": 0.011224807240068913, |
| "learning_rate": 0.0002, |
| "loss": 0.0004, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.000927527797417017, |
| "grad_norm": 0.2669332027435303, |
| "learning_rate": 0.0002, |
| "loss": 0.5618, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.0009457146169742133, |
| "grad_norm": 0.26048392057418823, |
| "learning_rate": 0.0002, |
| "loss": 0.1259, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.0009639014365314097, |
| "grad_norm": 0.22928836941719055, |
| "learning_rate": 0.0002, |
| "loss": 0.0956, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.0009820882560886062, |
| "grad_norm": 0.084063321352005, |
| "learning_rate": 0.0002, |
| "loss": 0.0708, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.0010002750756458027, |
| "grad_norm": 0.004612344317138195, |
| "learning_rate": 0.0002, |
| "loss": 0.0007, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.001018461895202999, |
| "grad_norm": 0.3866584599018097, |
| "learning_rate": 0.0002, |
| "loss": 0.5406, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.0010366487147601955, |
| "grad_norm": 0.32303065061569214, |
| "learning_rate": 0.0002, |
| "loss": 0.1001, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.0010548355343173918, |
| "grad_norm": 0.09439560770988464, |
| "learning_rate": 0.0002, |
| "loss": 0.1051, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.0010730223538745881, |
| "grad_norm": 0.028145521879196167, |
| "learning_rate": 0.0002, |
| "loss": 0.0638, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.0010912091734317847, |
| "grad_norm": 0.00048497263924218714, |
| "learning_rate": 0.0002, |
| "loss": 0.002, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.001109395992988981, |
| "grad_norm": 0.32391539216041565, |
| "learning_rate": 0.0002, |
| "loss": 0.5483, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.0011275828125461775, |
| "grad_norm": 0.02977031283080578, |
| "learning_rate": 0.0002, |
| "loss": 0.1264, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.0011457696321033738, |
| "grad_norm": 0.07332426309585571, |
| "learning_rate": 0.0002, |
| "loss": 0.1018, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.0011639564516605703, |
| "grad_norm": 0.05653443560004234, |
| "learning_rate": 0.0002, |
| "loss": 0.0666, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.0011821432712177666, |
| "grad_norm": 0.0010635281214490533, |
| "learning_rate": 0.0002, |
| "loss": 0.0009, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.0012003300907749632, |
| "grad_norm": 0.04933600872755051, |
| "learning_rate": 0.0002, |
| "loss": 0.3902, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.0012185169103321595, |
| "grad_norm": 0.14713574945926666, |
| "learning_rate": 0.0002, |
| "loss": 0.0905, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.001236703729889356, |
| "grad_norm": 0.05463952198624611, |
| "learning_rate": 0.0002, |
| "loss": 0.0909, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.0012548905494465523, |
| "grad_norm": 0.10299955308437347, |
| "learning_rate": 0.0002, |
| "loss": 0.07, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.0012730773690037488, |
| "grad_norm": 0.022791124880313873, |
| "learning_rate": 0.0002, |
| "loss": 0.0027, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.0012912641885609452, |
| "grad_norm": 0.27977490425109863, |
| "learning_rate": 0.0002, |
| "loss": 0.4421, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.0013094510081181417, |
| "grad_norm": 0.2346329241991043, |
| "learning_rate": 0.0002, |
| "loss": 0.1263, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.001327637827675338, |
| "grad_norm": 0.09294597059488297, |
| "learning_rate": 0.0002, |
| "loss": 0.096, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.0013458246472325343, |
| "grad_norm": 0.10317150503396988, |
| "learning_rate": 0.0002, |
| "loss": 0.0727, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.0013640114667897308, |
| "grad_norm": 0.001372635131701827, |
| "learning_rate": 0.0002, |
| "loss": 0.001, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.0013821982863469271, |
| "grad_norm": 0.10563486814498901, |
| "learning_rate": 0.0002, |
| "loss": 0.596, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.0014003851059041237, |
| "grad_norm": 0.14429838955402374, |
| "learning_rate": 0.0002, |
| "loss": 0.1178, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.00141857192546132, |
| "grad_norm": 0.0848163515329361, |
| "learning_rate": 0.0002, |
| "loss": 0.1008, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.0014367587450185165, |
| "grad_norm": 0.07259710133075714, |
| "learning_rate": 0.0002, |
| "loss": 0.069, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.0014549455645757128, |
| "grad_norm": 0.0019098519114777446, |
| "learning_rate": 0.0002, |
| "loss": 0.0023, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.0014731323841329093, |
| "grad_norm": 0.2433256059885025, |
| "learning_rate": 0.0002, |
| "loss": 0.2937, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.0014913192036901056, |
| "grad_norm": 0.04093409329652786, |
| "learning_rate": 0.0002, |
| "loss": 0.1133, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.0015095060232473022, |
| "grad_norm": 0.0480966717004776, |
| "learning_rate": 0.0002, |
| "loss": 0.0969, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.0015276928428044985, |
| "grad_norm": 0.14327965676784515, |
| "learning_rate": 0.0002, |
| "loss": 0.0866, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.001545879662361695, |
| "grad_norm": 0.001585015095770359, |
| "learning_rate": 0.0002, |
| "loss": 0.0042, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.0015640664819188913, |
| "grad_norm": 0.1842886209487915, |
| "learning_rate": 0.0002, |
| "loss": 0.3273, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.0015822533014760878, |
| "grad_norm": 0.09671049565076828, |
| "learning_rate": 0.0002, |
| "loss": 0.1079, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.0016004401210332842, |
| "grad_norm": 0.2730088233947754, |
| "learning_rate": 0.0002, |
| "loss": 0.1018, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.0016186269405904805, |
| "grad_norm": 0.11702803522348404, |
| "learning_rate": 0.0002, |
| "loss": 0.0758, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.001636813760147677, |
| "grad_norm": 0.004438066389411688, |
| "learning_rate": 0.0002, |
| "loss": 0.0033, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.0016550005797048733, |
| "grad_norm": 0.18424616754055023, |
| "learning_rate": 0.0002, |
| "loss": 0.4028, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.0016731873992620698, |
| "grad_norm": 0.12502820789813995, |
| "learning_rate": 0.0002, |
| "loss": 0.0979, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.0016913742188192661, |
| "grad_norm": 0.05109328031539917, |
| "learning_rate": 0.0002, |
| "loss": 0.0889, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.0017095610383764627, |
| "grad_norm": 0.18566183745861053, |
| "learning_rate": 0.0002, |
| "loss": 0.0833, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.001727747857933659, |
| "grad_norm": 0.0012954511912539601, |
| "learning_rate": 0.0002, |
| "loss": 0.0029, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.0017459346774908555, |
| "grad_norm": 0.06683014333248138, |
| "learning_rate": 0.0002, |
| "loss": 0.4614, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.0017641214970480518, |
| "grad_norm": 0.27773013710975647, |
| "learning_rate": 0.0002, |
| "loss": 0.1131, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.0017823083166052483, |
| "grad_norm": 0.1999790072441101, |
| "learning_rate": 0.0002, |
| "loss": 0.089, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.0018004951361624446, |
| "grad_norm": 0.09625103324651718, |
| "learning_rate": 0.0002, |
| "loss": 0.0739, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.0018186819557196412, |
| "grad_norm": 0.005470380187034607, |
| "learning_rate": 0.0002, |
| "loss": 0.0012, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.0018368687752768375, |
| "grad_norm": 0.038832616060972214, |
| "learning_rate": 0.0002, |
| "loss": 0.5521, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.001855055594834034, |
| "grad_norm": 0.1903093159198761, |
| "learning_rate": 0.0002, |
| "loss": 0.1237, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.0018732424143912303, |
| "grad_norm": 0.031102774664759636, |
| "learning_rate": 0.0002, |
| "loss": 0.0866, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.0018914292339484266, |
| "grad_norm": 0.043983202427625656, |
| "learning_rate": 0.0002, |
| "loss": 0.0611, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.0019096160535056232, |
| "grad_norm": 0.0002974902163259685, |
| "learning_rate": 0.0002, |
| "loss": 0.0035, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.0019278028730628195, |
| "grad_norm": 0.1936149299144745, |
| "learning_rate": 0.0002, |
| "loss": 0.3019, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.001945989692620016, |
| "grad_norm": 0.15767355263233185, |
| "learning_rate": 0.0002, |
| "loss": 0.108, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.0019641765121772123, |
| "grad_norm": 0.08244495838880539, |
| "learning_rate": 0.0002, |
| "loss": 0.091, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.0019823633317344086, |
| "grad_norm": 0.15848897397518158, |
| "learning_rate": 0.0002, |
| "loss": 0.0655, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.0020005501512916054, |
| "grad_norm": 0.0011951205087825656, |
| "learning_rate": 0.0002, |
| "loss": 0.0052, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.0020187369708488017, |
| "grad_norm": 0.13027112185955048, |
| "learning_rate": 0.0002, |
| "loss": 0.2943, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.002036923790405998, |
| "grad_norm": 0.19413979351520538, |
| "learning_rate": 0.0002, |
| "loss": 0.1329, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.0020551106099631943, |
| "grad_norm": 0.08515465259552002, |
| "learning_rate": 0.0002, |
| "loss": 0.0921, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.002073297429520391, |
| "grad_norm": 0.1244177296757698, |
| "learning_rate": 0.0002, |
| "loss": 0.0678, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.0020914842490775873, |
| "grad_norm": 0.0016714326338842511, |
| "learning_rate": 0.0002, |
| "loss": 0.0035, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.0021096710686347836, |
| "grad_norm": 0.24979737401008606, |
| "learning_rate": 0.0002, |
| "loss": 0.2643, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.00212785788819198, |
| "grad_norm": 0.14143353700637817, |
| "learning_rate": 0.0002, |
| "loss": 0.1037, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.0021460447077491763, |
| "grad_norm": 0.033794257789850235, |
| "learning_rate": 0.0002, |
| "loss": 0.087, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.002164231527306373, |
| "grad_norm": 0.11503162235021591, |
| "learning_rate": 0.0002, |
| "loss": 0.0659, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.0021824183468635693, |
| "grad_norm": 0.0014654065016657114, |
| "learning_rate": 0.0002, |
| "loss": 0.0056, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.0022006051664207656, |
| "grad_norm": 0.13292767107486725, |
| "learning_rate": 0.0002, |
| "loss": 0.2956, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.002218791985977962, |
| "grad_norm": 0.15238040685653687, |
| "learning_rate": 0.0002, |
| "loss": 0.1122, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.0022369788055351587, |
| "grad_norm": 0.045078523457050323, |
| "learning_rate": 0.0002, |
| "loss": 0.091, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.002255165625092355, |
| "grad_norm": 0.11438468098640442, |
| "learning_rate": 0.0002, |
| "loss": 0.0754, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.0022733524446495513, |
| "grad_norm": 0.001236733514815569, |
| "learning_rate": 0.0002, |
| "loss": 0.004, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.0022915392642067476, |
| "grad_norm": 0.23386552929878235, |
| "learning_rate": 0.0002, |
| "loss": 0.351, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.0023097260837639444, |
| "grad_norm": 0.030786139890551567, |
| "learning_rate": 0.0002, |
| "loss": 0.1074, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.0023279129033211407, |
| "grad_norm": 0.150347501039505, |
| "learning_rate": 0.0002, |
| "loss": 0.1064, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.002346099722878337, |
| "grad_norm": 0.1402382105588913, |
| "learning_rate": 0.0002, |
| "loss": 0.0675, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.0023642865424355333, |
| "grad_norm": 0.0006117303855717182, |
| "learning_rate": 0.0002, |
| "loss": 0.0031, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.00238247336199273, |
| "grad_norm": 0.16031372547149658, |
| "learning_rate": 0.0002, |
| "loss": 0.4344, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.0024006601815499263, |
| "grad_norm": 0.11017303168773651, |
| "learning_rate": 0.0002, |
| "loss": 0.1147, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.0024188470011071227, |
| "grad_norm": 0.055746905505657196, |
| "learning_rate": 0.0002, |
| "loss": 0.093, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.002437033820664319, |
| "grad_norm": 0.09806664288043976, |
| "learning_rate": 0.0002, |
| "loss": 0.0682, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.0024552206402215153, |
| "grad_norm": 0.000555588339921087, |
| "learning_rate": 0.0002, |
| "loss": 0.0045, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.002473407459778712, |
| "grad_norm": 0.04899182915687561, |
| "learning_rate": 0.0002, |
| "loss": 0.3454, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.0024915942793359083, |
| "grad_norm": 0.02870030514895916, |
| "learning_rate": 0.0002, |
| "loss": 0.1036, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.0025097810988931046, |
| "grad_norm": 0.08591730147600174, |
| "learning_rate": 0.0002, |
| "loss": 0.0962, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.002527967918450301, |
| "grad_norm": 0.1169242337346077, |
| "learning_rate": 0.0002, |
| "loss": 0.0627, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.0025461547380074977, |
| "grad_norm": 0.0008637752034701407, |
| "learning_rate": 0.0002, |
| "loss": 0.0025, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.002564341557564694, |
| "grad_norm": 0.11741841584444046, |
| "learning_rate": 0.0002, |
| "loss": 0.3703, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.0025825283771218903, |
| "grad_norm": 0.05232485383749008, |
| "learning_rate": 0.0002, |
| "loss": 0.1072, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.0026007151966790866, |
| "grad_norm": 0.025201110169291496, |
| "learning_rate": 0.0002, |
| "loss": 0.0893, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.0026189020162362834, |
| "grad_norm": 0.11462239921092987, |
| "learning_rate": 0.0002, |
| "loss": 0.0826, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.0026370888357934797, |
| "grad_norm": 0.002194227883592248, |
| "learning_rate": 0.0002, |
| "loss": 0.0049, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.002655275655350676, |
| "grad_norm": 0.05786404758691788, |
| "learning_rate": 0.0002, |
| "loss": 0.3187, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.0026734624749078723, |
| "grad_norm": 0.03776915743947029, |
| "learning_rate": 0.0002, |
| "loss": 0.1002, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.0026916492944650686, |
| "grad_norm": 0.08628734946250916, |
| "learning_rate": 0.0002, |
| "loss": 0.0933, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.0027098361140222653, |
| "grad_norm": 0.0933455228805542, |
| "learning_rate": 0.0002, |
| "loss": 0.0712, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.0027280229335794617, |
| "grad_norm": 0.0007446192903444171, |
| "learning_rate": 0.0002, |
| "loss": 0.003, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.002746209753136658, |
| "grad_norm": 0.04412281885743141, |
| "learning_rate": 0.0002, |
| "loss": 0.3738, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.0027643965726938543, |
| "grad_norm": 0.04729326814413071, |
| "learning_rate": 0.0002, |
| "loss": 0.1015, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.002782583392251051, |
| "grad_norm": 0.04822024703025818, |
| "learning_rate": 0.0002, |
| "loss": 0.0913, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.0028007702118082473, |
| "grad_norm": 0.15468090772628784, |
| "learning_rate": 0.0002, |
| "loss": 0.0794, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.0028189570313654436, |
| "grad_norm": 0.0011828596470877528, |
| "learning_rate": 0.0002, |
| "loss": 0.0089, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.00283714385092264, |
| "grad_norm": 0.030639037489891052, |
| "learning_rate": 0.0002, |
| "loss": 0.3382, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.0028553306704798367, |
| "grad_norm": 0.08429472148418427, |
| "learning_rate": 0.0002, |
| "loss": 0.1075, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.002873517490037033, |
| "grad_norm": 0.056431323289871216, |
| "learning_rate": 0.0002, |
| "loss": 0.0946, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.0028917043095942293, |
| "grad_norm": 0.1799512803554535, |
| "learning_rate": 0.0002, |
| "loss": 0.0795, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.0029098911291514256, |
| "grad_norm": 0.0018818675307556987, |
| "learning_rate": 0.0002, |
| "loss": 0.0082, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.002928077948708622, |
| "grad_norm": 0.061398155987262726, |
| "learning_rate": 0.0002, |
| "loss": 0.3414, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.0029462647682658187, |
| "grad_norm": 0.0657019093632698, |
| "learning_rate": 0.0002, |
| "loss": 0.1082, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.002964451587823015, |
| "grad_norm": 0.04701487720012665, |
| "learning_rate": 0.0002, |
| "loss": 0.0918, |
| "step": 1630 |
| }, |
| { |
| "epoch": 0.0029826384073802113, |
| "grad_norm": 0.1834430694580078, |
| "learning_rate": 0.0002, |
| "loss": 0.081, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.0030008252269374076, |
| "grad_norm": 0.004841644782572985, |
| "learning_rate": 0.0002, |
| "loss": 0.0138, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.0030190120464946043, |
| "grad_norm": 0.05793444439768791, |
| "learning_rate": 0.0002, |
| "loss": 0.2981, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.0030371988660518007, |
| "grad_norm": 0.049123138189315796, |
| "learning_rate": 0.0002, |
| "loss": 0.1072, |
| "step": 1670 |
| }, |
| { |
| "epoch": 0.003055385685608997, |
| "grad_norm": 0.033852141350507736, |
| "learning_rate": 0.0002, |
| "loss": 0.093, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.0030735725051661933, |
| "grad_norm": 0.16161279380321503, |
| "learning_rate": 0.0002, |
| "loss": 0.084, |
| "step": 1690 |
| }, |
| { |
| "epoch": 0.00309175932472339, |
| "grad_norm": 0.0011225020280107856, |
| "learning_rate": 0.0002, |
| "loss": 0.0059, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.0031099461442805863, |
| "grad_norm": 0.05849582701921463, |
| "learning_rate": 0.0002, |
| "loss": 0.3878, |
| "step": 1710 |
| }, |
| { |
| "epoch": 0.0031281329638377826, |
| "grad_norm": 0.033466637134552, |
| "learning_rate": 0.0002, |
| "loss": 0.1096, |
| "step": 1720 |
| }, |
| { |
| "epoch": 0.003146319783394979, |
| "grad_norm": 0.03488466143608093, |
| "learning_rate": 0.0002, |
| "loss": 0.0895, |
| "step": 1730 |
| }, |
| { |
| "epoch": 0.0031645066029521757, |
| "grad_norm": 0.15636079013347626, |
| "learning_rate": 0.0002, |
| "loss": 0.0716, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.003182693422509372, |
| "grad_norm": 0.001519509358331561, |
| "learning_rate": 0.0002, |
| "loss": 0.0062, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.0032008802420665683, |
| "grad_norm": 0.04979783296585083, |
| "learning_rate": 0.0002, |
| "loss": 0.3409, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.0032190670616237646, |
| "grad_norm": 0.09706272929906845, |
| "learning_rate": 0.0002, |
| "loss": 0.1052, |
| "step": 1770 |
| }, |
| { |
| "epoch": 0.003237253881180961, |
| "grad_norm": 0.08768483251333237, |
| "learning_rate": 0.0002, |
| "loss": 0.0938, |
| "step": 1780 |
| }, |
| { |
| "epoch": 0.0032554407007381577, |
| "grad_norm": 0.20421457290649414, |
| "learning_rate": 0.0002, |
| "loss": 0.085, |
| "step": 1790 |
| }, |
| { |
| "epoch": 0.003273627520295354, |
| "grad_norm": 0.0024727964773774147, |
| "learning_rate": 0.0002, |
| "loss": 0.0147, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.0032918143398525503, |
| "grad_norm": 0.04270516335964203, |
| "learning_rate": 0.0002, |
| "loss": 0.2872, |
| "step": 1810 |
| }, |
| { |
| "epoch": 0.0033100011594097466, |
| "grad_norm": 0.08055799454450607, |
| "learning_rate": 0.0002, |
| "loss": 0.0992, |
| "step": 1820 |
| }, |
| { |
| "epoch": 0.0033281879789669433, |
| "grad_norm": 0.02607434056699276, |
| "learning_rate": 0.0002, |
| "loss": 0.0803, |
| "step": 1830 |
| }, |
| { |
| "epoch": 0.0033463747985241397, |
| "grad_norm": 0.16260816156864166, |
| "learning_rate": 0.0002, |
| "loss": 0.0753, |
| "step": 1840 |
| }, |
| { |
| "epoch": 0.003364561618081336, |
| "grad_norm": 0.004690333269536495, |
| "learning_rate": 0.0002, |
| "loss": 0.012, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.0033827484376385323, |
| "grad_norm": 0.041513338685035706, |
| "learning_rate": 0.0002, |
| "loss": 0.2491, |
| "step": 1860 |
| }, |
| { |
| "epoch": 0.003400935257195729, |
| "grad_norm": 0.08935420960187912, |
| "learning_rate": 0.0002, |
| "loss": 0.1001, |
| "step": 1870 |
| }, |
| { |
| "epoch": 0.0034191220767529253, |
| "grad_norm": 0.03826737776398659, |
| "learning_rate": 0.0002, |
| "loss": 0.0877, |
| "step": 1880 |
| }, |
| { |
| "epoch": 0.0034373088963101216, |
| "grad_norm": 0.19423778355121613, |
| "learning_rate": 0.0002, |
| "loss": 0.0797, |
| "step": 1890 |
| }, |
| { |
| "epoch": 0.003455495715867318, |
| "grad_norm": 0.003520288970321417, |
| "learning_rate": 0.0002, |
| "loss": 0.013, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.0034736825354245143, |
| "grad_norm": 0.14648132026195526, |
| "learning_rate": 0.0002, |
| "loss": 0.3209, |
| "step": 1910 |
| }, |
| { |
| "epoch": 0.003491869354981711, |
| "grad_norm": 0.03780071437358856, |
| "learning_rate": 0.0002, |
| "loss": 0.0934, |
| "step": 1920 |
| }, |
| { |
| "epoch": 0.0035100561745389073, |
| "grad_norm": 0.05014612153172493, |
| "learning_rate": 0.0002, |
| "loss": 0.082, |
| "step": 1930 |
| }, |
| { |
| "epoch": 0.0035282429940961036, |
| "grad_norm": 0.12917590141296387, |
| "learning_rate": 0.0002, |
| "loss": 0.0733, |
| "step": 1940 |
| }, |
| { |
| "epoch": 0.0035464298136533, |
| "grad_norm": 0.0030132795218378305, |
| "learning_rate": 0.0002, |
| "loss": 0.0111, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.0035646166332104967, |
| "grad_norm": 0.03008626028895378, |
| "learning_rate": 0.0002, |
| "loss": 0.2126, |
| "step": 1960 |
| }, |
| { |
| "epoch": 0.003582803452767693, |
| "grad_norm": 0.0915503203868866, |
| "learning_rate": 0.0002, |
| "loss": 0.1097, |
| "step": 1970 |
| }, |
| { |
| "epoch": 0.0036009902723248893, |
| "grad_norm": 0.06607015430927277, |
| "learning_rate": 0.0002, |
| "loss": 0.0932, |
| "step": 1980 |
| }, |
| { |
| "epoch": 0.0036191770918820856, |
| "grad_norm": 0.18796613812446594, |
| "learning_rate": 0.0002, |
| "loss": 0.083, |
| "step": 1990 |
| }, |
| { |
| "epoch": 0.0036373639114392823, |
| "grad_norm": 0.0022257096134126186, |
| "learning_rate": 0.0002, |
| "loss": 0.0147, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.0036555507309964787, |
| "grad_norm": 0.0687415823340416, |
| "learning_rate": 0.0002, |
| "loss": 0.2604, |
| "step": 2010 |
| }, |
| { |
| "epoch": 0.003673737550553675, |
| "grad_norm": 0.025175679475069046, |
| "learning_rate": 0.0002, |
| "loss": 0.0998, |
| "step": 2020 |
| }, |
| { |
| "epoch": 0.0036919243701108713, |
| "grad_norm": 0.04275168478488922, |
| "learning_rate": 0.0002, |
| "loss": 0.0898, |
| "step": 2030 |
| }, |
| { |
| "epoch": 0.003710111189668068, |
| "grad_norm": 0.17306455969810486, |
| "learning_rate": 0.0002, |
| "loss": 0.0793, |
| "step": 2040 |
| }, |
| { |
| "epoch": 0.0037282980092252643, |
| "grad_norm": 0.007826454006135464, |
| "learning_rate": 0.0002, |
| "loss": 0.011, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.0037464848287824606, |
| "grad_norm": 0.06461178511381149, |
| "learning_rate": 0.0002, |
| "loss": 0.2597, |
| "step": 2060 |
| }, |
| { |
| "epoch": 0.003764671648339657, |
| "grad_norm": 0.061357177793979645, |
| "learning_rate": 0.0002, |
| "loss": 0.1001, |
| "step": 2070 |
| }, |
| { |
| "epoch": 0.0037828584678968533, |
| "grad_norm": 0.029154235497117043, |
| "learning_rate": 0.0002, |
| "loss": 0.0859, |
| "step": 2080 |
| }, |
| { |
| "epoch": 0.00380104528745405, |
| "grad_norm": 0.1350340098142624, |
| "learning_rate": 0.0002, |
| "loss": 0.0756, |
| "step": 2090 |
| }, |
| { |
| "epoch": 0.0038192321070112463, |
| "grad_norm": 0.0017614173702895641, |
| "learning_rate": 0.0002, |
| "loss": 0.0058, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.0038374189265684426, |
| "grad_norm": 0.024254316464066505, |
| "learning_rate": 0.0002, |
| "loss": 0.3349, |
| "step": 2110 |
| }, |
| { |
| "epoch": 0.003855605746125639, |
| "grad_norm": 0.07142530381679535, |
| "learning_rate": 0.0002, |
| "loss": 0.0953, |
| "step": 2120 |
| }, |
| { |
| "epoch": 0.0038737925656828357, |
| "grad_norm": 0.05570175498723984, |
| "learning_rate": 0.0002, |
| "loss": 0.0796, |
| "step": 2130 |
| }, |
| { |
| "epoch": 0.003891979385240032, |
| "grad_norm": 0.16996875405311584, |
| "learning_rate": 0.0002, |
| "loss": 0.0782, |
| "step": 2140 |
| }, |
| { |
| "epoch": 0.003910166204797228, |
| "grad_norm": 0.0058751595206558704, |
| "learning_rate": 0.0002, |
| "loss": 0.0206, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.003928353024354425, |
| "grad_norm": 0.029807811602950096, |
| "learning_rate": 0.0002, |
| "loss": 0.1926, |
| "step": 2160 |
| }, |
| { |
| "epoch": 0.003946539843911621, |
| "grad_norm": 0.11123469471931458, |
| "learning_rate": 0.0002, |
| "loss": 0.1082, |
| "step": 2170 |
| }, |
| { |
| "epoch": 0.003964726663468817, |
| "grad_norm": 0.074626125395298, |
| "learning_rate": 0.0002, |
| "loss": 0.081, |
| "step": 2180 |
| }, |
| { |
| "epoch": 0.003982913483026014, |
| "grad_norm": 0.17397737503051758, |
| "learning_rate": 0.0002, |
| "loss": 0.0729, |
| "step": 2190 |
| }, |
| { |
| "epoch": 0.004001100302583211, |
| "grad_norm": 0.007995887659490108, |
| "learning_rate": 0.0002, |
| "loss": 0.022, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.004019287122140407, |
| "grad_norm": 0.039921898394823074, |
| "learning_rate": 0.0002, |
| "loss": 0.1883, |
| "step": 2210 |
| }, |
| { |
| "epoch": 0.004037473941697603, |
| "grad_norm": 0.07736324518918991, |
| "learning_rate": 0.0002, |
| "loss": 0.0941, |
| "step": 2220 |
| }, |
| { |
| "epoch": 0.0040556607612548, |
| "grad_norm": 0.0867881178855896, |
| "learning_rate": 0.0002, |
| "loss": 0.0873, |
| "step": 2230 |
| }, |
| { |
| "epoch": 0.004073847580811996, |
| "grad_norm": 0.1497400403022766, |
| "learning_rate": 0.0002, |
| "loss": 0.0829, |
| "step": 2240 |
| }, |
| { |
| "epoch": 0.004092034400369192, |
| "grad_norm": 0.007458314299583435, |
| "learning_rate": 0.0002, |
| "loss": 0.02, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.004110221219926389, |
| "grad_norm": 0.04168029874563217, |
| "learning_rate": 0.0002, |
| "loss": 0.2176, |
| "step": 2260 |
| }, |
| { |
| "epoch": 0.004128408039483585, |
| "grad_norm": 0.10017130523920059, |
| "learning_rate": 0.0002, |
| "loss": 0.0958, |
| "step": 2270 |
| }, |
| { |
| "epoch": 0.004146594859040782, |
| "grad_norm": 0.02727416157722473, |
| "learning_rate": 0.0002, |
| "loss": 0.088, |
| "step": 2280 |
| }, |
| { |
| "epoch": 0.004164781678597978, |
| "grad_norm": 0.15034393966197968, |
| "learning_rate": 0.0002, |
| "loss": 0.0826, |
| "step": 2290 |
| }, |
| { |
| "epoch": 0.004182968498155175, |
| "grad_norm": 0.0023451410233974457, |
| "learning_rate": 0.0002, |
| "loss": 0.0102, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.004201155317712371, |
| "grad_norm": 0.03462455794215202, |
| "learning_rate": 0.0002, |
| "loss": 0.3404, |
| "step": 2310 |
| }, |
| { |
| "epoch": 0.004219342137269567, |
| "grad_norm": 0.02866148017346859, |
| "learning_rate": 0.0002, |
| "loss": 0.0932, |
| "step": 2320 |
| }, |
| { |
| "epoch": 0.004237528956826764, |
| "grad_norm": 0.0685456171631813, |
| "learning_rate": 0.0002, |
| "loss": 0.0806, |
| "step": 2330 |
| }, |
| { |
| "epoch": 0.00425571577638396, |
| "grad_norm": 0.17208056151866913, |
| "learning_rate": 0.0002, |
| "loss": 0.0826, |
| "step": 2340 |
| }, |
| { |
| "epoch": 0.004273902595941156, |
| "grad_norm": 0.008708455599844456, |
| "learning_rate": 0.0002, |
| "loss": 0.0171, |
| "step": 2350 |
| }, |
| { |
| "epoch": 0.0042920894154983525, |
| "grad_norm": 0.044025715440511703, |
| "learning_rate": 0.0002, |
| "loss": 0.212, |
| "step": 2360 |
| }, |
| { |
| "epoch": 0.00431027623505555, |
| "grad_norm": 0.050246164202690125, |
| "learning_rate": 0.0002, |
| "loss": 0.107, |
| "step": 2370 |
| }, |
| { |
| "epoch": 0.004328463054612746, |
| "grad_norm": 0.05257886275649071, |
| "learning_rate": 0.0002, |
| "loss": 0.0868, |
| "step": 2380 |
| }, |
| { |
| "epoch": 0.004346649874169942, |
| "grad_norm": 0.16567641496658325, |
| "learning_rate": 0.0002, |
| "loss": 0.0819, |
| "step": 2390 |
| }, |
| { |
| "epoch": 0.004364836693727139, |
| "grad_norm": 0.0062621901743113995, |
| "learning_rate": 0.0002, |
| "loss": 0.0171, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.004383023513284335, |
| "grad_norm": 0.03025338612496853, |
| "learning_rate": 0.0002, |
| "loss": 0.2141, |
| "step": 2410 |
| }, |
| { |
| "epoch": 0.004401210332841531, |
| "grad_norm": 0.06401577591896057, |
| "learning_rate": 0.0002, |
| "loss": 0.0982, |
| "step": 2420 |
| }, |
| { |
| "epoch": 0.004419397152398728, |
| "grad_norm": 0.12474781274795532, |
| "learning_rate": 0.0002, |
| "loss": 0.0834, |
| "step": 2430 |
| }, |
| { |
| "epoch": 0.004437583971955924, |
| "grad_norm": 0.18607665598392487, |
| "learning_rate": 0.0002, |
| "loss": 0.0801, |
| "step": 2440 |
| }, |
| { |
| "epoch": 0.004455770791513121, |
| "grad_norm": 0.0017643098253756762, |
| "learning_rate": 0.0002, |
| "loss": 0.0129, |
| "step": 2450 |
| }, |
| { |
| "epoch": 0.004473957611070317, |
| "grad_norm": 0.03936386480927467, |
| "learning_rate": 0.0002, |
| "loss": 0.2541, |
| "step": 2460 |
| }, |
| { |
| "epoch": 0.004492144430627514, |
| "grad_norm": 0.08961635082960129, |
| "learning_rate": 0.0002, |
| "loss": 0.0961, |
| "step": 2470 |
| }, |
| { |
| "epoch": 0.00451033125018471, |
| "grad_norm": 0.07525113970041275, |
| "learning_rate": 0.0002, |
| "loss": 0.0844, |
| "step": 2480 |
| }, |
| { |
| "epoch": 0.004528518069741906, |
| "grad_norm": 0.16746751964092255, |
| "learning_rate": 0.0002, |
| "loss": 0.071, |
| "step": 2490 |
| }, |
| { |
| "epoch": 0.004546704889299103, |
| "grad_norm": 0.0027625334914773703, |
| "learning_rate": 0.0002, |
| "loss": 0.0151, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.004564891708856299, |
| "grad_norm": 0.049662694334983826, |
| "learning_rate": 0.0002, |
| "loss": 0.253, |
| "step": 2510 |
| }, |
| { |
| "epoch": 0.004583078528413495, |
| "grad_norm": 0.08312079310417175, |
| "learning_rate": 0.0002, |
| "loss": 0.0922, |
| "step": 2520 |
| }, |
| { |
| "epoch": 0.0046012653479706915, |
| "grad_norm": 0.0646345317363739, |
| "learning_rate": 0.0002, |
| "loss": 0.0889, |
| "step": 2530 |
| }, |
| { |
| "epoch": 0.004619452167527889, |
| "grad_norm": 0.20036271214485168, |
| "learning_rate": 0.0002, |
| "loss": 0.081, |
| "step": 2540 |
| }, |
| { |
| "epoch": 0.004637638987085085, |
| "grad_norm": 0.010091719217598438, |
| "learning_rate": 0.0002, |
| "loss": 0.024, |
| "step": 2550 |
| }, |
| { |
| "epoch": 0.004655825806642281, |
| "grad_norm": 0.048885516822338104, |
| "learning_rate": 0.0002, |
| "loss": 0.184, |
| "step": 2560 |
| }, |
| { |
| "epoch": 0.004674012626199478, |
| "grad_norm": 0.09142889827489853, |
| "learning_rate": 0.0002, |
| "loss": 0.0935, |
| "step": 2570 |
| }, |
| { |
| "epoch": 0.004692199445756674, |
| "grad_norm": 0.049207963049411774, |
| "learning_rate": 0.0002, |
| "loss": 0.0816, |
| "step": 2580 |
| }, |
| { |
| "epoch": 0.00471038626531387, |
| "grad_norm": 0.1498396098613739, |
| "learning_rate": 0.0002, |
| "loss": 0.0698, |
| "step": 2590 |
| }, |
| { |
| "epoch": 0.004728573084871067, |
| "grad_norm": 0.00522881094366312, |
| "learning_rate": 0.0002, |
| "loss": 0.0189, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.004746759904428263, |
| "grad_norm": 0.07461311668157578, |
| "learning_rate": 0.0002, |
| "loss": 0.1944, |
| "step": 2610 |
| }, |
| { |
| "epoch": 0.00476494672398546, |
| "grad_norm": 0.048005711287260056, |
| "learning_rate": 0.0002, |
| "loss": 0.0883, |
| "step": 2620 |
| }, |
| { |
| "epoch": 0.004783133543542656, |
| "grad_norm": 0.10151612013578415, |
| "learning_rate": 0.0002, |
| "loss": 0.0827, |
| "step": 2630 |
| }, |
| { |
| "epoch": 0.004801320363099853, |
| "grad_norm": 0.1504422426223755, |
| "learning_rate": 0.0002, |
| "loss": 0.078, |
| "step": 2640 |
| }, |
| { |
| "epoch": 0.004819507182657049, |
| "grad_norm": 0.004988422151654959, |
| "learning_rate": 0.0002, |
| "loss": 0.0229, |
| "step": 2650 |
| }, |
| { |
| "epoch": 0.004837694002214245, |
| "grad_norm": 0.025008924305438995, |
| "learning_rate": 0.0002, |
| "loss": 0.1818, |
| "step": 2660 |
| }, |
| { |
| "epoch": 0.004855880821771442, |
| "grad_norm": 0.027460169047117233, |
| "learning_rate": 0.0002, |
| "loss": 0.0966, |
| "step": 2670 |
| }, |
| { |
| "epoch": 0.004874067641328638, |
| "grad_norm": 0.09704197943210602, |
| "learning_rate": 0.0002, |
| "loss": 0.0824, |
| "step": 2680 |
| }, |
| { |
| "epoch": 0.004892254460885834, |
| "grad_norm": 0.138654425740242, |
| "learning_rate": 0.0002, |
| "loss": 0.0746, |
| "step": 2690 |
| }, |
| { |
| "epoch": 0.0049104412804430305, |
| "grad_norm": 0.00859556533396244, |
| "learning_rate": 0.0002, |
| "loss": 0.0187, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.004928628100000228, |
| "grad_norm": 0.05207522585988045, |
| "learning_rate": 0.0002, |
| "loss": 0.1985, |
| "step": 2710 |
| }, |
| { |
| "epoch": 0.004946814919557424, |
| "grad_norm": 0.07787417620420456, |
| "learning_rate": 0.0002, |
| "loss": 0.101, |
| "step": 2720 |
| }, |
| { |
| "epoch": 0.00496500173911462, |
| "grad_norm": 0.02819981426000595, |
| "learning_rate": 0.0002, |
| "loss": 0.0845, |
| "step": 2730 |
| }, |
| { |
| "epoch": 0.004983188558671817, |
| "grad_norm": 0.13569314777851105, |
| "learning_rate": 0.0002, |
| "loss": 0.0756, |
| "step": 2740 |
| }, |
| { |
| "epoch": 0.005001375378229013, |
| "grad_norm": 0.05175986513495445, |
| "learning_rate": 0.0002, |
| "loss": 0.024, |
| "step": 2750 |
| }, |
| { |
| "epoch": 0.005019562197786209, |
| "grad_norm": 0.037230249494314194, |
| "learning_rate": 0.0002, |
| "loss": 0.2056, |
| "step": 2760 |
| }, |
| { |
| "epoch": 0.005037749017343406, |
| "grad_norm": 0.05532974749803543, |
| "learning_rate": 0.0002, |
| "loss": 0.0939, |
| "step": 2770 |
| }, |
| { |
| "epoch": 0.005055935836900602, |
| "grad_norm": 0.06930708140134811, |
| "learning_rate": 0.0002, |
| "loss": 0.0853, |
| "step": 2780 |
| }, |
| { |
| "epoch": 0.005074122656457798, |
| "grad_norm": 0.16405801475048065, |
| "learning_rate": 0.0002, |
| "loss": 0.0766, |
| "step": 2790 |
| }, |
| { |
| "epoch": 0.005092309476014995, |
| "grad_norm": 0.006398684345185757, |
| "learning_rate": 0.0002, |
| "loss": 0.0124, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.005110496295572192, |
| "grad_norm": 0.06269315630197525, |
| "learning_rate": 0.0002, |
| "loss": 0.2703, |
| "step": 2810 |
| }, |
| { |
| "epoch": 0.005128683115129388, |
| "grad_norm": 0.049293261021375656, |
| "learning_rate": 0.0002, |
| "loss": 0.0943, |
| "step": 2820 |
| }, |
| { |
| "epoch": 0.005146869934686584, |
| "grad_norm": 0.08814405649900436, |
| "learning_rate": 0.0002, |
| "loss": 0.0855, |
| "step": 2830 |
| }, |
| { |
| "epoch": 0.005165056754243781, |
| "grad_norm": 0.17452259361743927, |
| "learning_rate": 0.0002, |
| "loss": 0.0822, |
| "step": 2840 |
| }, |
| { |
| "epoch": 0.005183243573800977, |
| "grad_norm": 0.005008229520171881, |
| "learning_rate": 0.0002, |
| "loss": 0.0136, |
| "step": 2850 |
| }, |
| { |
| "epoch": 0.005201430393358173, |
| "grad_norm": 0.04459540545940399, |
| "learning_rate": 0.0002, |
| "loss": 0.2623, |
| "step": 2860 |
| }, |
| { |
| "epoch": 0.0052196172129153695, |
| "grad_norm": 0.042845603078603745, |
| "learning_rate": 0.0002, |
| "loss": 0.0929, |
| "step": 2870 |
| }, |
| { |
| "epoch": 0.005237804032472567, |
| "grad_norm": 0.03079635463654995, |
| "learning_rate": 0.0002, |
| "loss": 0.0844, |
| "step": 2880 |
| }, |
| { |
| "epoch": 0.005255990852029763, |
| "grad_norm": 0.14457851648330688, |
| "learning_rate": 0.0002, |
| "loss": 0.0753, |
| "step": 2890 |
| }, |
| { |
| "epoch": 0.005274177671586959, |
| "grad_norm": 0.0009016963304020464, |
| "learning_rate": 0.0002, |
| "loss": 0.0037, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.005292364491144156, |
| "grad_norm": 0.0983906164765358, |
| "learning_rate": 0.0002, |
| "loss": 0.3661, |
| "step": 2910 |
| }, |
| { |
| "epoch": 0.005310551310701352, |
| "grad_norm": 0.08794154971837997, |
| "learning_rate": 0.0002, |
| "loss": 0.0894, |
| "step": 2920 |
| }, |
| { |
| "epoch": 0.005328738130258548, |
| "grad_norm": 0.026981573551893234, |
| "learning_rate": 0.0002, |
| "loss": 0.0779, |
| "step": 2930 |
| }, |
| { |
| "epoch": 0.005346924949815745, |
| "grad_norm": 0.15572553873062134, |
| "learning_rate": 0.0002, |
| "loss": 0.077, |
| "step": 2940 |
| }, |
| { |
| "epoch": 0.005365111769372941, |
| "grad_norm": 0.005491070915013552, |
| "learning_rate": 0.0002, |
| "loss": 0.0092, |
| "step": 2950 |
| }, |
| { |
| "epoch": 0.005383298588930137, |
| "grad_norm": 0.07383686304092407, |
| "learning_rate": 0.0002, |
| "loss": 0.2574, |
| "step": 2960 |
| }, |
| { |
| "epoch": 0.005401485408487334, |
| "grad_norm": 0.05919960141181946, |
| "learning_rate": 0.0002, |
| "loss": 0.1045, |
| "step": 2970 |
| }, |
| { |
| "epoch": 0.005419672228044531, |
| "grad_norm": 0.06027739867568016, |
| "learning_rate": 0.0002, |
| "loss": 0.0822, |
| "step": 2980 |
| }, |
| { |
| "epoch": 0.005437859047601727, |
| "grad_norm": 0.1288602501153946, |
| "learning_rate": 0.0002, |
| "loss": 0.0688, |
| "step": 2990 |
| }, |
| { |
| "epoch": 0.005456045867158923, |
| "grad_norm": 0.007565880194306374, |
| "learning_rate": 0.0002, |
| "loss": 0.0192, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.00547423268671612, |
| "grad_norm": 0.024412864819169044, |
| "learning_rate": 0.0002, |
| "loss": 0.1782, |
| "step": 3010 |
| }, |
| { |
| "epoch": 0.005492419506273316, |
| "grad_norm": 0.05559355765581131, |
| "learning_rate": 0.0002, |
| "loss": 0.1072, |
| "step": 3020 |
| }, |
| { |
| "epoch": 0.005510606325830512, |
| "grad_norm": 0.07073906064033508, |
| "learning_rate": 0.0002, |
| "loss": 0.0863, |
| "step": 3030 |
| }, |
| { |
| "epoch": 0.0055287931453877085, |
| "grad_norm": 0.14979414641857147, |
| "learning_rate": 0.0002, |
| "loss": 0.0788, |
| "step": 3040 |
| }, |
| { |
| "epoch": 0.005546979964944906, |
| "grad_norm": 0.0057297456078231335, |
| "learning_rate": 0.0002, |
| "loss": 0.0192, |
| "step": 3050 |
| }, |
| { |
| "epoch": 0.005565166784502102, |
| "grad_norm": 0.03195042535662651, |
| "learning_rate": 0.0002, |
| "loss": 0.1879, |
| "step": 3060 |
| }, |
| { |
| "epoch": 0.005583353604059298, |
| "grad_norm": 0.05925082787871361, |
| "learning_rate": 0.0002, |
| "loss": 0.0992, |
| "step": 3070 |
| }, |
| { |
| "epoch": 0.005601540423616495, |
| "grad_norm": 0.052063606679439545, |
| "learning_rate": 0.0002, |
| "loss": 0.0826, |
| "step": 3080 |
| }, |
| { |
| "epoch": 0.005619727243173691, |
| "grad_norm": 0.16005952656269073, |
| "learning_rate": 0.0002, |
| "loss": 0.0743, |
| "step": 3090 |
| }, |
| { |
| "epoch": 0.005637914062730887, |
| "grad_norm": 0.005742133595049381, |
| "learning_rate": 0.0002, |
| "loss": 0.0137, |
| "step": 3100 |
| }, |
| { |
| "epoch": 0.005656100882288084, |
| "grad_norm": 0.07523638010025024, |
| "learning_rate": 0.0002, |
| "loss": 0.2072, |
| "step": 3110 |
| }, |
| { |
| "epoch": 0.00567428770184528, |
| "grad_norm": 0.23799611628055573, |
| "learning_rate": 0.0002, |
| "loss": 0.0906, |
| "step": 3120 |
| }, |
| { |
| "epoch": 0.005692474521402476, |
| "grad_norm": 0.06176261603832245, |
| "learning_rate": 0.0002, |
| "loss": 0.088, |
| "step": 3130 |
| }, |
| { |
| "epoch": 0.005710661340959673, |
| "grad_norm": 0.13692723214626312, |
| "learning_rate": 0.0002, |
| "loss": 0.0733, |
| "step": 3140 |
| }, |
| { |
| "epoch": 0.00572884816051687, |
| "grad_norm": 0.007059803698211908, |
| "learning_rate": 0.0002, |
| "loss": 0.0194, |
| "step": 3150 |
| }, |
| { |
| "epoch": 0.005747034980074066, |
| "grad_norm": 0.08868405222892761, |
| "learning_rate": 0.0002, |
| "loss": 0.1745, |
| "step": 3160 |
| }, |
| { |
| "epoch": 0.005765221799631262, |
| "grad_norm": 0.05126733332872391, |
| "learning_rate": 0.0002, |
| "loss": 0.1024, |
| "step": 3170 |
| }, |
| { |
| "epoch": 0.005783408619188459, |
| "grad_norm": 0.06377821415662766, |
| "learning_rate": 0.0002, |
| "loss": 0.0846, |
| "step": 3180 |
| }, |
| { |
| "epoch": 0.005801595438745655, |
| "grad_norm": 0.10748566687107086, |
| "learning_rate": 0.0002, |
| "loss": 0.0769, |
| "step": 3190 |
| }, |
| { |
| "epoch": 0.005819782258302851, |
| "grad_norm": 0.004992443602532148, |
| "learning_rate": 0.0002, |
| "loss": 0.0114, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.0058379690778600475, |
| "grad_norm": 0.0420277863740921, |
| "learning_rate": 0.0002, |
| "loss": 0.2159, |
| "step": 3210 |
| }, |
| { |
| "epoch": 0.005856155897417244, |
| "grad_norm": 0.02828531712293625, |
| "learning_rate": 0.0002, |
| "loss": 0.0923, |
| "step": 3220 |
| }, |
| { |
| "epoch": 0.005874342716974441, |
| "grad_norm": 0.028216248378157616, |
| "learning_rate": 0.0002, |
| "loss": 0.0789, |
| "step": 3230 |
| }, |
| { |
| "epoch": 0.005892529536531637, |
| "grad_norm": 0.11420746147632599, |
| "learning_rate": 0.0002, |
| "loss": 0.0696, |
| "step": 3240 |
| }, |
| { |
| "epoch": 0.005910716356088834, |
| "grad_norm": 0.0019631448667496443, |
| "learning_rate": 0.0002, |
| "loss": 0.0128, |
| "step": 3250 |
| }, |
| { |
| "epoch": 0.00592890317564603, |
| "grad_norm": 0.05514012649655342, |
| "learning_rate": 0.0002, |
| "loss": 0.2609, |
| "step": 3260 |
| }, |
| { |
| "epoch": 0.005947089995203226, |
| "grad_norm": 0.0917636826634407, |
| "learning_rate": 0.0002, |
| "loss": 0.0996, |
| "step": 3270 |
| }, |
| { |
| "epoch": 0.005965276814760423, |
| "grad_norm": 0.03648284077644348, |
| "learning_rate": 0.0002, |
| "loss": 0.084, |
| "step": 3280 |
| }, |
| { |
| "epoch": 0.005983463634317619, |
| "grad_norm": 0.13859149813652039, |
| "learning_rate": 0.0002, |
| "loss": 0.0807, |
| "step": 3290 |
| }, |
| { |
| "epoch": 0.006001650453874815, |
| "grad_norm": 0.013779910281300545, |
| "learning_rate": 0.0002, |
| "loss": 0.0181, |
| "step": 3300 |
| }, |
| { |
| "epoch": 0.006019837273432012, |
| "grad_norm": 0.02654041163623333, |
| "learning_rate": 0.0002, |
| "loss": 0.1636, |
| "step": 3310 |
| }, |
| { |
| "epoch": 0.006038024092989209, |
| "grad_norm": 0.062298137694597244, |
| "learning_rate": 0.0002, |
| "loss": 0.0872, |
| "step": 3320 |
| }, |
| { |
| "epoch": 0.006056210912546405, |
| "grad_norm": 0.0351388119161129, |
| "learning_rate": 0.0002, |
| "loss": 0.0802, |
| "step": 3330 |
| }, |
| { |
| "epoch": 0.006074397732103601, |
| "grad_norm": 0.16063807904720306, |
| "learning_rate": 0.0002, |
| "loss": 0.0753, |
| "step": 3340 |
| }, |
| { |
| "epoch": 0.006092584551660798, |
| "grad_norm": 0.009991235099732876, |
| "learning_rate": 0.0002, |
| "loss": 0.016, |
| "step": 3350 |
| }, |
| { |
| "epoch": 0.006110771371217994, |
| "grad_norm": 0.052919622510671616, |
| "learning_rate": 0.0002, |
| "loss": 0.2027, |
| "step": 3360 |
| }, |
| { |
| "epoch": 0.00612895819077519, |
| "grad_norm": 0.03228602185845375, |
| "learning_rate": 0.0002, |
| "loss": 0.0985, |
| "step": 3370 |
| }, |
| { |
| "epoch": 0.0061471450103323865, |
| "grad_norm": 0.11311203986406326, |
| "learning_rate": 0.0002, |
| "loss": 0.0797, |
| "step": 3380 |
| }, |
| { |
| "epoch": 0.006165331829889583, |
| "grad_norm": 0.1674620360136032, |
| "learning_rate": 0.0002, |
| "loss": 0.072, |
| "step": 3390 |
| }, |
| { |
| "epoch": 0.00618351864944678, |
| "grad_norm": 0.015154430642724037, |
| "learning_rate": 0.0002, |
| "loss": 0.0186, |
| "step": 3400 |
| }, |
| { |
| "epoch": 0.006201705469003976, |
| "grad_norm": 0.043151434510946274, |
| "learning_rate": 0.0002, |
| "loss": 0.1892, |
| "step": 3410 |
| }, |
| { |
| "epoch": 0.006219892288561173, |
| "grad_norm": 0.12342707067728043, |
| "learning_rate": 0.0002, |
| "loss": 0.0907, |
| "step": 3420 |
| }, |
| { |
| "epoch": 0.006238079108118369, |
| "grad_norm": 0.08350827544927597, |
| "learning_rate": 0.0002, |
| "loss": 0.0783, |
| "step": 3430 |
| }, |
| { |
| "epoch": 0.006256265927675565, |
| "grad_norm": 0.11938697844743729, |
| "learning_rate": 0.0002, |
| "loss": 0.0666, |
| "step": 3440 |
| }, |
| { |
| "epoch": 0.006274452747232762, |
| "grad_norm": 0.015424132347106934, |
| "learning_rate": 0.0002, |
| "loss": 0.0173, |
| "step": 3450 |
| }, |
| { |
| "epoch": 0.006292639566789958, |
| "grad_norm": 0.04220043867826462, |
| "learning_rate": 0.0002, |
| "loss": 0.1805, |
| "step": 3460 |
| }, |
| { |
| "epoch": 0.006310826386347154, |
| "grad_norm": 0.08813903480768204, |
| "learning_rate": 0.0002, |
| "loss": 0.096, |
| "step": 3470 |
| }, |
| { |
| "epoch": 0.006329013205904351, |
| "grad_norm": 0.07647278904914856, |
| "learning_rate": 0.0002, |
| "loss": 0.0821, |
| "step": 3480 |
| }, |
| { |
| "epoch": 0.006347200025461548, |
| "grad_norm": 0.14242641627788544, |
| "learning_rate": 0.0002, |
| "loss": 0.0769, |
| "step": 3490 |
| }, |
| { |
| "epoch": 0.006365386845018744, |
| "grad_norm": 0.011115231551229954, |
| "learning_rate": 0.0002, |
| "loss": 0.0221, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.00638357366457594, |
| "grad_norm": 0.036351826041936874, |
| "learning_rate": 0.0002, |
| "loss": 0.1557, |
| "step": 3510 |
| }, |
| { |
| "epoch": 0.006401760484133137, |
| "grad_norm": 0.08549819141626358, |
| "learning_rate": 0.0002, |
| "loss": 0.0864, |
| "step": 3520 |
| }, |
| { |
| "epoch": 0.006419947303690333, |
| "grad_norm": 0.047141823917627335, |
| "learning_rate": 0.0002, |
| "loss": 0.079, |
| "step": 3530 |
| }, |
| { |
| "epoch": 0.006438134123247529, |
| "grad_norm": 0.13143447041511536, |
| "learning_rate": 0.0002, |
| "loss": 0.0717, |
| "step": 3540 |
| }, |
| { |
| "epoch": 0.0064563209428047256, |
| "grad_norm": 0.013524871319532394, |
| "learning_rate": 0.0002, |
| "loss": 0.0149, |
| "step": 3550 |
| }, |
| { |
| "epoch": 0.006474507762361922, |
| "grad_norm": 0.03367459774017334, |
| "learning_rate": 0.0002, |
| "loss": 0.1715, |
| "step": 3560 |
| }, |
| { |
| "epoch": 0.006492694581919119, |
| "grad_norm": 0.045889757573604584, |
| "learning_rate": 0.0002, |
| "loss": 0.0949, |
| "step": 3570 |
| }, |
| { |
| "epoch": 0.006510881401476315, |
| "grad_norm": 0.04099202901124954, |
| "learning_rate": 0.0002, |
| "loss": 0.0813, |
| "step": 3580 |
| }, |
| { |
| "epoch": 0.006529068221033512, |
| "grad_norm": 0.133371040225029, |
| "learning_rate": 0.0002, |
| "loss": 0.0767, |
| "step": 3590 |
| }, |
| { |
| "epoch": 0.006547255040590708, |
| "grad_norm": 0.00645647756755352, |
| "learning_rate": 0.0002, |
| "loss": 0.0186, |
| "step": 3600 |
| }, |
| { |
| "epoch": 0.006565441860147904, |
| "grad_norm": 0.050674330443143845, |
| "learning_rate": 0.0002, |
| "loss": 0.2179, |
| "step": 3610 |
| }, |
| { |
| "epoch": 0.006583628679705101, |
| "grad_norm": 0.07087302207946777, |
| "learning_rate": 0.0002, |
| "loss": 0.0882, |
| "step": 3620 |
| }, |
| { |
| "epoch": 0.006601815499262297, |
| "grad_norm": 0.02759486250579357, |
| "learning_rate": 0.0002, |
| "loss": 0.0789, |
| "step": 3630 |
| }, |
| { |
| "epoch": 0.006620002318819493, |
| "grad_norm": 0.12163479626178741, |
| "learning_rate": 0.0002, |
| "loss": 0.0689, |
| "step": 3640 |
| }, |
| { |
| "epoch": 0.00663818913837669, |
| "grad_norm": 0.00969718024134636, |
| "learning_rate": 0.0002, |
| "loss": 0.0112, |
| "step": 3650 |
| }, |
| { |
| "epoch": 0.006656375957933887, |
| "grad_norm": 0.07106204330921173, |
| "learning_rate": 0.0002, |
| "loss": 0.199, |
| "step": 3660 |
| }, |
| { |
| "epoch": 0.006674562777491083, |
| "grad_norm": 0.08954132348299026, |
| "learning_rate": 0.0002, |
| "loss": 0.0985, |
| "step": 3670 |
| }, |
| { |
| "epoch": 0.006692749597048279, |
| "grad_norm": 0.09899396449327469, |
| "learning_rate": 0.0002, |
| "loss": 0.0811, |
| "step": 3680 |
| }, |
| { |
| "epoch": 0.006710936416605476, |
| "grad_norm": 0.12119311839342117, |
| "learning_rate": 0.0002, |
| "loss": 0.0698, |
| "step": 3690 |
| }, |
| { |
| "epoch": 0.006729123236162672, |
| "grad_norm": 0.013957214541733265, |
| "learning_rate": 0.0002, |
| "loss": 0.018, |
| "step": 3700 |
| }, |
| { |
| "epoch": 0.006747310055719868, |
| "grad_norm": 0.03089285083115101, |
| "learning_rate": 0.0002, |
| "loss": 0.1434, |
| "step": 3710 |
| }, |
| { |
| "epoch": 0.0067654968752770646, |
| "grad_norm": 0.025650829076766968, |
| "learning_rate": 0.0002, |
| "loss": 0.0886, |
| "step": 3720 |
| }, |
| { |
| "epoch": 0.006783683694834261, |
| "grad_norm": 0.044103365391492844, |
| "learning_rate": 0.0002, |
| "loss": 0.0788, |
| "step": 3730 |
| }, |
| { |
| "epoch": 0.006801870514391458, |
| "grad_norm": 0.09726370871067047, |
| "learning_rate": 0.0002, |
| "loss": 0.0674, |
| "step": 3740 |
| }, |
| { |
| "epoch": 0.006820057333948654, |
| "grad_norm": 0.018105274066329002, |
| "learning_rate": 0.0002, |
| "loss": 0.0232, |
| "step": 3750 |
| }, |
| { |
| "epoch": 0.006838244153505851, |
| "grad_norm": 0.021543240174651146, |
| "learning_rate": 0.0002, |
| "loss": 0.1406, |
| "step": 3760 |
| }, |
| { |
| "epoch": 0.006856430973063047, |
| "grad_norm": 0.09367050975561142, |
| "learning_rate": 0.0002, |
| "loss": 0.0973, |
| "step": 3770 |
| }, |
| { |
| "epoch": 0.006874617792620243, |
| "grad_norm": 0.06836032122373581, |
| "learning_rate": 0.0002, |
| "loss": 0.0848, |
| "step": 3780 |
| }, |
| { |
| "epoch": 0.00689280461217744, |
| "grad_norm": 0.11758081614971161, |
| "learning_rate": 0.0002, |
| "loss": 0.0693, |
| "step": 3790 |
| }, |
| { |
| "epoch": 0.006910991431734636, |
| "grad_norm": 0.008669364266097546, |
| "learning_rate": 0.0002, |
| "loss": 0.0223, |
| "step": 3800 |
| }, |
| { |
| "epoch": 0.006929178251291832, |
| "grad_norm": 0.03903719782829285, |
| "learning_rate": 0.0002, |
| "loss": 0.1519, |
| "step": 3810 |
| }, |
| { |
| "epoch": 0.0069473650708490285, |
| "grad_norm": 0.030682874843478203, |
| "learning_rate": 0.0002, |
| "loss": 0.0931, |
| "step": 3820 |
| }, |
| { |
| "epoch": 0.006965551890406226, |
| "grad_norm": 0.02693006955087185, |
| "learning_rate": 0.0002, |
| "loss": 0.0784, |
| "step": 3830 |
| }, |
| { |
| "epoch": 0.006983738709963422, |
| "grad_norm": 0.09535166621208191, |
| "learning_rate": 0.0002, |
| "loss": 0.0696, |
| "step": 3840 |
| }, |
| { |
| "epoch": 0.007001925529520618, |
| "grad_norm": 0.014680403284728527, |
| "learning_rate": 0.0002, |
| "loss": 0.0176, |
| "step": 3850 |
| }, |
| { |
| "epoch": 0.007020112349077815, |
| "grad_norm": 0.031090212985873222, |
| "learning_rate": 0.0002, |
| "loss": 0.1544, |
| "step": 3860 |
| }, |
| { |
| "epoch": 0.007038299168635011, |
| "grad_norm": 0.05870644003152847, |
| "learning_rate": 0.0002, |
| "loss": 0.0898, |
| "step": 3870 |
| }, |
| { |
| "epoch": 0.007056485988192207, |
| "grad_norm": 0.03480982780456543, |
| "learning_rate": 0.0002, |
| "loss": 0.0812, |
| "step": 3880 |
| }, |
| { |
| "epoch": 0.0070746728077494036, |
| "grad_norm": 0.09751418977975845, |
| "learning_rate": 0.0002, |
| "loss": 0.0724, |
| "step": 3890 |
| }, |
| { |
| "epoch": 0.0070928596273066, |
| "grad_norm": 0.022084850817918777, |
| "learning_rate": 0.0002, |
| "loss": 0.019, |
| "step": 3900 |
| }, |
| { |
| "epoch": 0.007111046446863797, |
| "grad_norm": 0.06994971632957458, |
| "learning_rate": 0.0002, |
| "loss": 0.1478, |
| "step": 3910 |
| }, |
| { |
| "epoch": 0.007129233266420993, |
| "grad_norm": 0.05761263892054558, |
| "learning_rate": 0.0002, |
| "loss": 0.0932, |
| "step": 3920 |
| }, |
| { |
| "epoch": 0.00714742008597819, |
| "grad_norm": 0.029772033914923668, |
| "learning_rate": 0.0002, |
| "loss": 0.0855, |
| "step": 3930 |
| }, |
| { |
| "epoch": 0.007165606905535386, |
| "grad_norm": 0.11868726462125778, |
| "learning_rate": 0.0002, |
| "loss": 0.0727, |
| "step": 3940 |
| }, |
| { |
| "epoch": 0.007183793725092582, |
| "grad_norm": 0.0065403408370912075, |
| "learning_rate": 0.0002, |
| "loss": 0.0174, |
| "step": 3950 |
| }, |
| { |
| "epoch": 0.007201980544649779, |
| "grad_norm": 0.031544361263513565, |
| "learning_rate": 0.0002, |
| "loss": 0.1827, |
| "step": 3960 |
| }, |
| { |
| "epoch": 0.007220167364206975, |
| "grad_norm": 0.031641531735658646, |
| "learning_rate": 0.0002, |
| "loss": 0.0867, |
| "step": 3970 |
| }, |
| { |
| "epoch": 0.007238354183764171, |
| "grad_norm": 0.028574040159583092, |
| "learning_rate": 0.0002, |
| "loss": 0.0798, |
| "step": 3980 |
| }, |
| { |
| "epoch": 0.0072565410033213675, |
| "grad_norm": 0.12866555154323578, |
| "learning_rate": 0.0002, |
| "loss": 0.0708, |
| "step": 3990 |
| }, |
| { |
| "epoch": 0.007274727822878565, |
| "grad_norm": 0.00843430683016777, |
| "learning_rate": 0.0002, |
| "loss": 0.0127, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.007292914642435761, |
| "grad_norm": 0.03737691789865494, |
| "learning_rate": 0.0002, |
| "loss": 0.2201, |
| "step": 4010 |
| }, |
| { |
| "epoch": 0.007311101461992957, |
| "grad_norm": 0.05326579511165619, |
| "learning_rate": 0.0002, |
| "loss": 0.0838, |
| "step": 4020 |
| }, |
| { |
| "epoch": 0.007329288281550154, |
| "grad_norm": 0.031934209167957306, |
| "learning_rate": 0.0002, |
| "loss": 0.0778, |
| "step": 4030 |
| }, |
| { |
| "epoch": 0.00734747510110735, |
| "grad_norm": 0.17401957511901855, |
| "learning_rate": 0.0002, |
| "loss": 0.073, |
| "step": 4040 |
| }, |
| { |
| "epoch": 0.007365661920664546, |
| "grad_norm": 0.005256639327853918, |
| "learning_rate": 0.0002, |
| "loss": 0.0122, |
| "step": 4050 |
| }, |
| { |
| "epoch": 0.0073838487402217426, |
| "grad_norm": 0.05043623968958855, |
| "learning_rate": 0.0002, |
| "loss": 0.2524, |
| "step": 4060 |
| }, |
| { |
| "epoch": 0.007402035559778939, |
| "grad_norm": 0.06662425398826599, |
| "learning_rate": 0.0002, |
| "loss": 0.0976, |
| "step": 4070 |
| }, |
| { |
| "epoch": 0.007420222379336136, |
| "grad_norm": 0.13419686257839203, |
| "learning_rate": 0.0002, |
| "loss": 0.0833, |
| "step": 4080 |
| }, |
| { |
| "epoch": 0.007438409198893332, |
| "grad_norm": 0.176285520195961, |
| "learning_rate": 0.0002, |
| "loss": 0.0761, |
| "step": 4090 |
| }, |
| { |
| "epoch": 0.007456596018450529, |
| "grad_norm": 0.008489354513585567, |
| "learning_rate": 0.0002, |
| "loss": 0.0182, |
| "step": 4100 |
| }, |
| { |
| "epoch": 0.007474782838007725, |
| "grad_norm": 0.06247509643435478, |
| "learning_rate": 0.0002, |
| "loss": 0.2232, |
| "step": 4110 |
| }, |
| { |
| "epoch": 0.007492969657564921, |
| "grad_norm": 0.05744702368974686, |
| "learning_rate": 0.0002, |
| "loss": 0.0875, |
| "step": 4120 |
| }, |
| { |
| "epoch": 0.007511156477122118, |
| "grad_norm": 0.053026407957077026, |
| "learning_rate": 0.0002, |
| "loss": 0.0807, |
| "step": 4130 |
| }, |
| { |
| "epoch": 0.007529343296679314, |
| "grad_norm": 0.11734003573656082, |
| "learning_rate": 0.0002, |
| "loss": 0.0724, |
| "step": 4140 |
| }, |
| { |
| "epoch": 0.00754753011623651, |
| "grad_norm": 0.005216363817453384, |
| "learning_rate": 0.0002, |
| "loss": 0.0129, |
| "step": 4150 |
| }, |
| { |
| "epoch": 0.0075657169357937065, |
| "grad_norm": 0.08154789358377457, |
| "learning_rate": 0.0002, |
| "loss": 0.2221, |
| "step": 4160 |
| }, |
| { |
| "epoch": 0.007583903755350904, |
| "grad_norm": 0.03619784861803055, |
| "learning_rate": 0.0002, |
| "loss": 0.0993, |
| "step": 4170 |
| }, |
| { |
| "epoch": 0.0076020905749081, |
| "grad_norm": 0.08239256590604782, |
| "learning_rate": 0.0002, |
| "loss": 0.0811, |
| "step": 4180 |
| }, |
| { |
| "epoch": 0.007620277394465296, |
| "grad_norm": 0.11934535950422287, |
| "learning_rate": 0.0002, |
| "loss": 0.0726, |
| "step": 4190 |
| }, |
| { |
| "epoch": 0.007638464214022493, |
| "grad_norm": 0.006965799257159233, |
| "learning_rate": 0.0002, |
| "loss": 0.0181, |
| "step": 4200 |
| }, |
| { |
| "epoch": 0.007656651033579689, |
| "grad_norm": 0.04328077286481857, |
| "learning_rate": 0.0002, |
| "loss": 0.1983, |
| "step": 4210 |
| }, |
| { |
| "epoch": 0.007674837853136885, |
| "grad_norm": 0.08253510296344757, |
| "learning_rate": 0.0002, |
| "loss": 0.0954, |
| "step": 4220 |
| }, |
| { |
| "epoch": 0.0076930246726940816, |
| "grad_norm": 0.06146657094359398, |
| "learning_rate": 0.0002, |
| "loss": 0.0843, |
| "step": 4230 |
| }, |
| { |
| "epoch": 0.007711211492251278, |
| "grad_norm": 0.13579218089580536, |
| "learning_rate": 0.0002, |
| "loss": 0.0672, |
| "step": 4240 |
| }, |
| { |
| "epoch": 0.007729398311808474, |
| "grad_norm": 0.0038396338932216167, |
| "learning_rate": 0.0002, |
| "loss": 0.0131, |
| "step": 4250 |
| }, |
| { |
| "epoch": 0.007747585131365671, |
| "grad_norm": 0.03109130822122097, |
| "learning_rate": 0.0002, |
| "loss": 0.2102, |
| "step": 4260 |
| }, |
| { |
| "epoch": 0.007765771950922868, |
| "grad_norm": 0.04971664398908615, |
| "learning_rate": 0.0002, |
| "loss": 0.0903, |
| "step": 4270 |
| }, |
| { |
| "epoch": 0.007783958770480064, |
| "grad_norm": 0.06476306915283203, |
| "learning_rate": 0.0002, |
| "loss": 0.0859, |
| "step": 4280 |
| }, |
| { |
| "epoch": 0.00780214559003726, |
| "grad_norm": 0.15377041697502136, |
| "learning_rate": 0.0002, |
| "loss": 0.0828, |
| "step": 4290 |
| }, |
| { |
| "epoch": 0.007820332409594457, |
| "grad_norm": 0.005592274013906717, |
| "learning_rate": 0.0002, |
| "loss": 0.014, |
| "step": 4300 |
| }, |
| { |
| "epoch": 0.007838519229151653, |
| "grad_norm": 0.04387212172150612, |
| "learning_rate": 0.0002, |
| "loss": 0.1907, |
| "step": 4310 |
| }, |
| { |
| "epoch": 0.00785670604870885, |
| "grad_norm": 0.06001356989145279, |
| "learning_rate": 0.0002, |
| "loss": 0.0864, |
| "step": 4320 |
| }, |
| { |
| "epoch": 0.007874892868266046, |
| "grad_norm": 0.030866140499711037, |
| "learning_rate": 0.0002, |
| "loss": 0.0748, |
| "step": 4330 |
| }, |
| { |
| "epoch": 0.007893079687823242, |
| "grad_norm": 0.13280808925628662, |
| "learning_rate": 0.0002, |
| "loss": 0.0686, |
| "step": 4340 |
| }, |
| { |
| "epoch": 0.007911266507380438, |
| "grad_norm": 0.015559020452201366, |
| "learning_rate": 0.0002, |
| "loss": 0.016, |
| "step": 4350 |
| }, |
| { |
| "epoch": 0.007929453326937634, |
| "grad_norm": 0.0669974684715271, |
| "learning_rate": 0.0002, |
| "loss": 0.1916, |
| "step": 4360 |
| }, |
| { |
| "epoch": 0.00794764014649483, |
| "grad_norm": 0.0759076252579689, |
| "learning_rate": 0.0002, |
| "loss": 0.0925, |
| "step": 4370 |
| }, |
| { |
| "epoch": 0.007965826966052029, |
| "grad_norm": 0.029388410970568657, |
| "learning_rate": 0.0002, |
| "loss": 0.086, |
| "step": 4380 |
| }, |
| { |
| "epoch": 0.007984013785609225, |
| "grad_norm": 0.17637981474399567, |
| "learning_rate": 0.0002, |
| "loss": 0.0697, |
| "step": 4390 |
| }, |
| { |
| "epoch": 0.008002200605166421, |
| "grad_norm": 0.008022189140319824, |
| "learning_rate": 0.0002, |
| "loss": 0.0196, |
| "step": 4400 |
| }, |
| { |
| "epoch": 0.008020387424723618, |
| "grad_norm": 0.04126167669892311, |
| "learning_rate": 0.0002, |
| "loss": 0.192, |
| "step": 4410 |
| }, |
| { |
| "epoch": 0.008038574244280814, |
| "grad_norm": 0.08132971078157425, |
| "learning_rate": 0.0002, |
| "loss": 0.093, |
| "step": 4420 |
| }, |
| { |
| "epoch": 0.00805676106383801, |
| "grad_norm": 0.07568484544754028, |
| "learning_rate": 0.0002, |
| "loss": 0.0823, |
| "step": 4430 |
| }, |
| { |
| "epoch": 0.008074947883395207, |
| "grad_norm": 0.1259222775697708, |
| "learning_rate": 0.0002, |
| "loss": 0.0696, |
| "step": 4440 |
| }, |
| { |
| "epoch": 0.008093134702952403, |
| "grad_norm": 0.009711826220154762, |
| "learning_rate": 0.0002, |
| "loss": 0.0232, |
| "step": 4450 |
| }, |
| { |
| "epoch": 0.0081113215225096, |
| "grad_norm": 0.029734279960393906, |
| "learning_rate": 0.0002, |
| "loss": 0.1595, |
| "step": 4460 |
| }, |
| { |
| "epoch": 0.008129508342066796, |
| "grad_norm": 0.04886960610747337, |
| "learning_rate": 0.0002, |
| "loss": 0.0919, |
| "step": 4470 |
| }, |
| { |
| "epoch": 0.008147695161623992, |
| "grad_norm": 0.07031470537185669, |
| "learning_rate": 0.0002, |
| "loss": 0.0813, |
| "step": 4480 |
| }, |
| { |
| "epoch": 0.008165881981181188, |
| "grad_norm": 0.12099859863519669, |
| "learning_rate": 0.0002, |
| "loss": 0.0731, |
| "step": 4490 |
| }, |
| { |
| "epoch": 0.008184068800738385, |
| "grad_norm": 0.02181529812514782, |
| "learning_rate": 0.0002, |
| "loss": 0.021, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.00820225562029558, |
| "grad_norm": 0.035477787256240845, |
| "learning_rate": 0.0002, |
| "loss": 0.1429, |
| "step": 4510 |
| }, |
| { |
| "epoch": 0.008220442439852777, |
| "grad_norm": 0.07788772135972977, |
| "learning_rate": 0.0002, |
| "loss": 0.0842, |
| "step": 4520 |
| }, |
| { |
| "epoch": 0.008238629259409973, |
| "grad_norm": 0.045833125710487366, |
| "learning_rate": 0.0002, |
| "loss": 0.0829, |
| "step": 4530 |
| }, |
| { |
| "epoch": 0.00825681607896717, |
| "grad_norm": 0.12271951884031296, |
| "learning_rate": 0.0002, |
| "loss": 0.0707, |
| "step": 4540 |
| }, |
| { |
| "epoch": 0.008275002898524366, |
| "grad_norm": 0.01919553242623806, |
| "learning_rate": 0.0002, |
| "loss": 0.0213, |
| "step": 4550 |
| }, |
| { |
| "epoch": 0.008293189718081564, |
| "grad_norm": 0.032527096569538116, |
| "learning_rate": 0.0002, |
| "loss": 0.1397, |
| "step": 4560 |
| }, |
| { |
| "epoch": 0.00831137653763876, |
| "grad_norm": 0.045243579894304276, |
| "learning_rate": 0.0002, |
| "loss": 0.0854, |
| "step": 4570 |
| }, |
| { |
| "epoch": 0.008329563357195957, |
| "grad_norm": 0.04226524010300636, |
| "learning_rate": 0.0002, |
| "loss": 0.0728, |
| "step": 4580 |
| }, |
| { |
| "epoch": 0.008347750176753153, |
| "grad_norm": 0.09887039661407471, |
| "learning_rate": 0.0002, |
| "loss": 0.0661, |
| "step": 4590 |
| }, |
| { |
| "epoch": 0.00836593699631035, |
| "grad_norm": 0.01822318509221077, |
| "learning_rate": 0.0002, |
| "loss": 0.0169, |
| "step": 4600 |
| }, |
| { |
| "epoch": 0.008384123815867546, |
| "grad_norm": 0.05729951336979866, |
| "learning_rate": 0.0002, |
| "loss": 0.137, |
| "step": 4610 |
| }, |
| { |
| "epoch": 0.008402310635424742, |
| "grad_norm": 0.041520439088344574, |
| "learning_rate": 0.0002, |
| "loss": 0.0825, |
| "step": 4620 |
| }, |
| { |
| "epoch": 0.008420497454981938, |
| "grad_norm": 0.051164623349905014, |
| "learning_rate": 0.0002, |
| "loss": 0.0818, |
| "step": 4630 |
| }, |
| { |
| "epoch": 0.008438684274539135, |
| "grad_norm": 0.1289409101009369, |
| "learning_rate": 0.0002, |
| "loss": 0.0664, |
| "step": 4640 |
| }, |
| { |
| "epoch": 0.008456871094096331, |
| "grad_norm": 0.0085114361718297, |
| "learning_rate": 0.0002, |
| "loss": 0.0229, |
| "step": 4650 |
| }, |
| { |
| "epoch": 0.008475057913653527, |
| "grad_norm": 0.03594676032662392, |
| "learning_rate": 0.0002, |
| "loss": 0.1401, |
| "step": 4660 |
| }, |
| { |
| "epoch": 0.008493244733210724, |
| "grad_norm": 0.0316978394985199, |
| "learning_rate": 0.0002, |
| "loss": 0.0877, |
| "step": 4670 |
| }, |
| { |
| "epoch": 0.00851143155276792, |
| "grad_norm": 0.023302162066102028, |
| "learning_rate": 0.0002, |
| "loss": 0.0764, |
| "step": 4680 |
| }, |
| { |
| "epoch": 0.008529618372325116, |
| "grad_norm": 0.1329929083585739, |
| "learning_rate": 0.0002, |
| "loss": 0.0788, |
| "step": 4690 |
| }, |
| { |
| "epoch": 0.008547805191882312, |
| "grad_norm": 0.01048013661056757, |
| "learning_rate": 0.0002, |
| "loss": 0.0234, |
| "step": 4700 |
| }, |
| { |
| "epoch": 0.008565992011439509, |
| "grad_norm": 0.03505022078752518, |
| "learning_rate": 0.0002, |
| "loss": 0.1509, |
| "step": 4710 |
| }, |
| { |
| "epoch": 0.008584178830996705, |
| "grad_norm": 0.03877585008740425, |
| "learning_rate": 0.0002, |
| "loss": 0.0802, |
| "step": 4720 |
| }, |
| { |
| "epoch": 0.008602365650553903, |
| "grad_norm": 0.041193027049303055, |
| "learning_rate": 0.0002, |
| "loss": 0.0695, |
| "step": 4730 |
| }, |
| { |
| "epoch": 0.0086205524701111, |
| "grad_norm": 0.17310455441474915, |
| "learning_rate": 0.0002, |
| "loss": 0.0794, |
| "step": 4740 |
| }, |
| { |
| "epoch": 0.008638739289668296, |
| "grad_norm": 0.0061012376099824905, |
| "learning_rate": 0.0002, |
| "loss": 0.0158, |
| "step": 4750 |
| }, |
| { |
| "epoch": 0.008656926109225492, |
| "grad_norm": 0.04843207076191902, |
| "learning_rate": 0.0002, |
| "loss": 0.2103, |
| "step": 4760 |
| }, |
| { |
| "epoch": 0.008675112928782688, |
| "grad_norm": 0.04483436048030853, |
| "learning_rate": 0.0002, |
| "loss": 0.0878, |
| "step": 4770 |
| }, |
| { |
| "epoch": 0.008693299748339885, |
| "grad_norm": 0.056655965745449066, |
| "learning_rate": 0.0002, |
| "loss": 0.0752, |
| "step": 4780 |
| }, |
| { |
| "epoch": 0.008711486567897081, |
| "grad_norm": 0.11626063287258148, |
| "learning_rate": 0.0002, |
| "loss": 0.0685, |
| "step": 4790 |
| }, |
| { |
| "epoch": 0.008729673387454277, |
| "grad_norm": 0.013872025534510612, |
| "learning_rate": 0.0002, |
| "loss": 0.0198, |
| "step": 4800 |
| }, |
| { |
| "epoch": 0.008747860207011474, |
| "grad_norm": 0.06217370182275772, |
| "learning_rate": 0.0002, |
| "loss": 0.1371, |
| "step": 4810 |
| }, |
| { |
| "epoch": 0.00876604702656867, |
| "grad_norm": 0.027149083092808723, |
| "learning_rate": 0.0002, |
| "loss": 0.0849, |
| "step": 4820 |
| }, |
| { |
| "epoch": 0.008784233846125866, |
| "grad_norm": 0.043290987610816956, |
| "learning_rate": 0.0002, |
| "loss": 0.0739, |
| "step": 4830 |
| }, |
| { |
| "epoch": 0.008802420665683063, |
| "grad_norm": 0.10664638131856918, |
| "learning_rate": 0.0002, |
| "loss": 0.0722, |
| "step": 4840 |
| }, |
| { |
| "epoch": 0.008820607485240259, |
| "grad_norm": 0.033459801226854324, |
| "learning_rate": 0.0002, |
| "loss": 0.0234, |
| "step": 4850 |
| }, |
| { |
| "epoch": 0.008838794304797455, |
| "grad_norm": 0.049193184822797775, |
| "learning_rate": 0.0002, |
| "loss": 0.1173, |
| "step": 4860 |
| }, |
| { |
| "epoch": 0.008856981124354651, |
| "grad_norm": 0.05060647428035736, |
| "learning_rate": 0.0002, |
| "loss": 0.0883, |
| "step": 4870 |
| }, |
| { |
| "epoch": 0.008875167943911848, |
| "grad_norm": 0.028496885672211647, |
| "learning_rate": 0.0002, |
| "loss": 0.0747, |
| "step": 4880 |
| }, |
| { |
| "epoch": 0.008893354763469044, |
| "grad_norm": 0.10652820765972137, |
| "learning_rate": 0.0002, |
| "loss": 0.0707, |
| "step": 4890 |
| }, |
| { |
| "epoch": 0.008911541583026242, |
| "grad_norm": 0.007879966869950294, |
| "learning_rate": 0.0002, |
| "loss": 0.0178, |
| "step": 4900 |
| }, |
| { |
| "epoch": 0.008929728402583438, |
| "grad_norm": 0.05227983742952347, |
| "learning_rate": 0.0002, |
| "loss": 0.1379, |
| "step": 4910 |
| }, |
| { |
| "epoch": 0.008947915222140635, |
| "grad_norm": 0.06054231896996498, |
| "learning_rate": 0.0002, |
| "loss": 0.0934, |
| "step": 4920 |
| }, |
| { |
| "epoch": 0.008966102041697831, |
| "grad_norm": 0.029085835441946983, |
| "learning_rate": 0.0002, |
| "loss": 0.0816, |
| "step": 4930 |
| }, |
| { |
| "epoch": 0.008984288861255027, |
| "grad_norm": 0.09829402714967728, |
| "learning_rate": 0.0002, |
| "loss": 0.0672, |
| "step": 4940 |
| }, |
| { |
| "epoch": 0.009002475680812224, |
| "grad_norm": 0.005579107441008091, |
| "learning_rate": 0.0002, |
| "loss": 0.0172, |
| "step": 4950 |
| }, |
| { |
| "epoch": 0.00902066250036942, |
| "grad_norm": 0.027280857786536217, |
| "learning_rate": 0.0002, |
| "loss": 0.1659, |
| "step": 4960 |
| }, |
| { |
| "epoch": 0.009038849319926616, |
| "grad_norm": 0.10321583598852158, |
| "learning_rate": 0.0002, |
| "loss": 0.0947, |
| "step": 4970 |
| }, |
| { |
| "epoch": 0.009057036139483813, |
| "grad_norm": 0.03381946310400963, |
| "learning_rate": 0.0002, |
| "loss": 0.0837, |
| "step": 4980 |
| }, |
| { |
| "epoch": 0.009075222959041009, |
| "grad_norm": 0.14493779838085175, |
| "learning_rate": 0.0002, |
| "loss": 0.0736, |
| "step": 4990 |
| }, |
| { |
| "epoch": 0.009093409778598205, |
| "grad_norm": 0.009917684830725193, |
| "learning_rate": 0.0002, |
| "loss": 0.0188, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.009111596598155402, |
| "grad_norm": 1.003450632095337, |
| "learning_rate": 0.0002, |
| "loss": 0.218, |
| "step": 5010 |
| }, |
| { |
| "epoch": 0.009129783417712598, |
| "grad_norm": 0.09081514924764633, |
| "learning_rate": 0.0002, |
| "loss": 0.1714, |
| "step": 5020 |
| }, |
| { |
| "epoch": 0.009147970237269794, |
| "grad_norm": 0.042343392968177795, |
| "learning_rate": 0.0002, |
| "loss": 0.0823, |
| "step": 5030 |
| }, |
| { |
| "epoch": 0.00916615705682699, |
| "grad_norm": 0.09944835305213928, |
| "learning_rate": 0.0002, |
| "loss": 0.0667, |
| "step": 5040 |
| }, |
| { |
| "epoch": 0.009184343876384187, |
| "grad_norm": 0.008264658972620964, |
| "learning_rate": 0.0002, |
| "loss": 0.0122, |
| "step": 5050 |
| }, |
| { |
| "epoch": 0.009202530695941383, |
| "grad_norm": 0.08990125358104706, |
| "learning_rate": 0.0002, |
| "loss": 0.1685, |
| "step": 5060 |
| }, |
| { |
| "epoch": 0.009220717515498581, |
| "grad_norm": 0.0331488698720932, |
| "learning_rate": 0.0002, |
| "loss": 0.0885, |
| "step": 5070 |
| }, |
| { |
| "epoch": 0.009238904335055777, |
| "grad_norm": 0.029458707198500633, |
| "learning_rate": 0.0002, |
| "loss": 0.078, |
| "step": 5080 |
| }, |
| { |
| "epoch": 0.009257091154612974, |
| "grad_norm": 0.10468839108943939, |
| "learning_rate": 0.0002, |
| "loss": 0.0683, |
| "step": 5090 |
| }, |
| { |
| "epoch": 0.00927527797417017, |
| "grad_norm": 0.002719841431826353, |
| "learning_rate": 0.0002, |
| "loss": 0.0117, |
| "step": 5100 |
| }, |
| { |
| "epoch": 0.009293464793727366, |
| "grad_norm": 0.0411439947783947, |
| "learning_rate": 0.0002, |
| "loss": 0.2025, |
| "step": 5110 |
| }, |
| { |
| "epoch": 0.009311651613284563, |
| "grad_norm": 0.03695548698306084, |
| "learning_rate": 0.0002, |
| "loss": 0.0831, |
| "step": 5120 |
| }, |
| { |
| "epoch": 0.009329838432841759, |
| "grad_norm": 0.06067590415477753, |
| "learning_rate": 0.0002, |
| "loss": 0.0779, |
| "step": 5130 |
| }, |
| { |
| "epoch": 0.009348025252398955, |
| "grad_norm": 0.11754634976387024, |
| "learning_rate": 0.0002, |
| "loss": 0.0667, |
| "step": 5140 |
| }, |
| { |
| "epoch": 0.009366212071956152, |
| "grad_norm": 0.004248317331075668, |
| "learning_rate": 0.0002, |
| "loss": 0.0113, |
| "step": 5150 |
| }, |
| { |
| "epoch": 0.009384398891513348, |
| "grad_norm": 0.03073648177087307, |
| "learning_rate": 0.0002, |
| "loss": 0.2289, |
| "step": 5160 |
| }, |
| { |
| "epoch": 0.009402585711070544, |
| "grad_norm": 0.10287592560052872, |
| "learning_rate": 0.0002, |
| "loss": 0.0977, |
| "step": 5170 |
| }, |
| { |
| "epoch": 0.00942077253062774, |
| "grad_norm": 0.06832946836948395, |
| "learning_rate": 0.0002, |
| "loss": 0.0764, |
| "step": 5180 |
| }, |
| { |
| "epoch": 0.009438959350184937, |
| "grad_norm": 0.1760883778333664, |
| "learning_rate": 0.0002, |
| "loss": 0.0795, |
| "step": 5190 |
| }, |
| { |
| "epoch": 0.009457146169742133, |
| "grad_norm": 0.02968805655837059, |
| "learning_rate": 0.0002, |
| "loss": 0.0253, |
| "step": 5200 |
| }, |
| { |
| "epoch": 0.00947533298929933, |
| "grad_norm": 0.046602651476860046, |
| "learning_rate": 0.0002, |
| "loss": 0.1432, |
| "step": 5210 |
| }, |
| { |
| "epoch": 0.009493519808856526, |
| "grad_norm": 0.051989324390888214, |
| "learning_rate": 0.0002, |
| "loss": 0.0807, |
| "step": 5220 |
| }, |
| { |
| "epoch": 0.009511706628413722, |
| "grad_norm": 0.04583961144089699, |
| "learning_rate": 0.0002, |
| "loss": 0.0782, |
| "step": 5230 |
| }, |
| { |
| "epoch": 0.00952989344797092, |
| "grad_norm": 0.13195525109767914, |
| "learning_rate": 0.0002, |
| "loss": 0.0688, |
| "step": 5240 |
| }, |
| { |
| "epoch": 0.009548080267528116, |
| "grad_norm": 0.011369351297616959, |
| "learning_rate": 0.0002, |
| "loss": 0.0232, |
| "step": 5250 |
| }, |
| { |
| "epoch": 0.009566267087085313, |
| "grad_norm": 0.05092083290219307, |
| "learning_rate": 0.0002, |
| "loss": 0.145, |
| "step": 5260 |
| }, |
| { |
| "epoch": 0.009584453906642509, |
| "grad_norm": 0.05051489174365997, |
| "learning_rate": 0.0002, |
| "loss": 0.0803, |
| "step": 5270 |
| }, |
| { |
| "epoch": 0.009602640726199705, |
| "grad_norm": 0.05730990320444107, |
| "learning_rate": 0.0002, |
| "loss": 0.0717, |
| "step": 5280 |
| }, |
| { |
| "epoch": 0.009620827545756902, |
| "grad_norm": 0.11170202493667603, |
| "learning_rate": 0.0002, |
| "loss": 0.0711, |
| "step": 5290 |
| }, |
| { |
| "epoch": 0.009639014365314098, |
| "grad_norm": 0.011571788229048252, |
| "learning_rate": 0.0002, |
| "loss": 0.0204, |
| "step": 5300 |
| }, |
| { |
| "epoch": 0.009657201184871294, |
| "grad_norm": 0.04396244138479233, |
| "learning_rate": 0.0002, |
| "loss": 0.1764, |
| "step": 5310 |
| }, |
| { |
| "epoch": 0.00967538800442849, |
| "grad_norm": 0.047808658331632614, |
| "learning_rate": 0.0002, |
| "loss": 0.0855, |
| "step": 5320 |
| }, |
| { |
| "epoch": 0.009693574823985687, |
| "grad_norm": 0.09201673418283463, |
| "learning_rate": 0.0002, |
| "loss": 0.0737, |
| "step": 5330 |
| }, |
| { |
| "epoch": 0.009711761643542883, |
| "grad_norm": 0.12273146212100983, |
| "learning_rate": 0.0002, |
| "loss": 0.0658, |
| "step": 5340 |
| }, |
| { |
| "epoch": 0.00972994846310008, |
| "grad_norm": 0.014599839225411415, |
| "learning_rate": 0.0002, |
| "loss": 0.0254, |
| "step": 5350 |
| }, |
| { |
| "epoch": 0.009748135282657276, |
| "grad_norm": 0.049732692539691925, |
| "learning_rate": 0.0002, |
| "loss": 0.1432, |
| "step": 5360 |
| }, |
| { |
| "epoch": 0.009766322102214472, |
| "grad_norm": 0.07791377604007721, |
| "learning_rate": 0.0002, |
| "loss": 0.0865, |
| "step": 5370 |
| }, |
| { |
| "epoch": 0.009784508921771668, |
| "grad_norm": 0.06298892199993134, |
| "learning_rate": 0.0002, |
| "loss": 0.0816, |
| "step": 5380 |
| }, |
| { |
| "epoch": 0.009802695741328865, |
| "grad_norm": 0.08924435079097748, |
| "learning_rate": 0.0002, |
| "loss": 0.0709, |
| "step": 5390 |
| }, |
| { |
| "epoch": 0.009820882560886061, |
| "grad_norm": 0.02383723482489586, |
| "learning_rate": 0.0002, |
| "loss": 0.0208, |
| "step": 5400 |
| }, |
| { |
| "epoch": 0.009839069380443257, |
| "grad_norm": 0.042910825461149216, |
| "learning_rate": 0.0002, |
| "loss": 0.1383, |
| "step": 5410 |
| }, |
| { |
| "epoch": 0.009857256200000455, |
| "grad_norm": 0.05560186505317688, |
| "learning_rate": 0.0002, |
| "loss": 0.0827, |
| "step": 5420 |
| }, |
| { |
| "epoch": 0.009875443019557652, |
| "grad_norm": 0.08179624378681183, |
| "learning_rate": 0.0002, |
| "loss": 0.0758, |
| "step": 5430 |
| }, |
| { |
| "epoch": 0.009893629839114848, |
| "grad_norm": 0.17111806571483612, |
| "learning_rate": 0.0002, |
| "loss": 0.0688, |
| "step": 5440 |
| }, |
| { |
| "epoch": 0.009911816658672044, |
| "grad_norm": 0.008684845641255379, |
| "learning_rate": 0.0002, |
| "loss": 0.0177, |
| "step": 5450 |
| }, |
| { |
| "epoch": 0.00993000347822924, |
| "grad_norm": 0.044370412826538086, |
| "learning_rate": 0.0002, |
| "loss": 0.2036, |
| "step": 5460 |
| }, |
| { |
| "epoch": 0.009948190297786437, |
| "grad_norm": 0.08403154462575912, |
| "learning_rate": 0.0002, |
| "loss": 0.0878, |
| "step": 5470 |
| }, |
| { |
| "epoch": 0.009966377117343633, |
| "grad_norm": 0.10712645202875137, |
| "learning_rate": 0.0002, |
| "loss": 0.0787, |
| "step": 5480 |
| }, |
| { |
| "epoch": 0.00998456393690083, |
| "grad_norm": 0.12575705349445343, |
| "learning_rate": 0.0002, |
| "loss": 0.0637, |
| "step": 5490 |
| }, |
| { |
| "epoch": 0.010002750756458026, |
| "grad_norm": 0.018583891913294792, |
| "learning_rate": 0.0002, |
| "loss": 0.0179, |
| "step": 5500 |
| }, |
| { |
| "epoch": 0.010020937576015222, |
| "grad_norm": 0.040852561593055725, |
| "learning_rate": 0.0002, |
| "loss": 0.1545, |
| "step": 5510 |
| }, |
| { |
| "epoch": 0.010039124395572419, |
| "grad_norm": 0.09006325900554657, |
| "learning_rate": 0.0002, |
| "loss": 0.0888, |
| "step": 5520 |
| }, |
| { |
| "epoch": 0.010057311215129615, |
| "grad_norm": 0.06323093175888062, |
| "learning_rate": 0.0002, |
| "loss": 0.0778, |
| "step": 5530 |
| }, |
| { |
| "epoch": 0.010075498034686811, |
| "grad_norm": 0.10159824043512344, |
| "learning_rate": 0.0002, |
| "loss": 0.0662, |
| "step": 5540 |
| }, |
| { |
| "epoch": 0.010093684854244007, |
| "grad_norm": 0.012086872011423111, |
| "learning_rate": 0.0002, |
| "loss": 0.0237, |
| "step": 5550 |
| }, |
| { |
| "epoch": 0.010111871673801204, |
| "grad_norm": 0.02518664114177227, |
| "learning_rate": 0.0002, |
| "loss": 0.1246, |
| "step": 5560 |
| }, |
| { |
| "epoch": 0.0101300584933584, |
| "grad_norm": 0.056161828339099884, |
| "learning_rate": 0.0002, |
| "loss": 0.086, |
| "step": 5570 |
| }, |
| { |
| "epoch": 0.010148245312915596, |
| "grad_norm": 0.03376586362719536, |
| "learning_rate": 0.0002, |
| "loss": 0.0842, |
| "step": 5580 |
| }, |
| { |
| "epoch": 0.010166432132472794, |
| "grad_norm": 0.09921032190322876, |
| "learning_rate": 0.0002, |
| "loss": 0.0667, |
| "step": 5590 |
| }, |
| { |
| "epoch": 0.01018461895202999, |
| "grad_norm": 0.009120604954659939, |
| "learning_rate": 0.0002, |
| "loss": 0.0209, |
| "step": 5600 |
| }, |
| { |
| "epoch": 0.010202805771587187, |
| "grad_norm": 0.037767425179481506, |
| "learning_rate": 0.0002, |
| "loss": 0.1248, |
| "step": 5610 |
| }, |
| { |
| "epoch": 0.010220992591144383, |
| "grad_norm": 0.05255524069070816, |
| "learning_rate": 0.0002, |
| "loss": 0.0794, |
| "step": 5620 |
| }, |
| { |
| "epoch": 0.01023917941070158, |
| "grad_norm": 0.038734354078769684, |
| "learning_rate": 0.0002, |
| "loss": 0.0791, |
| "step": 5630 |
| }, |
| { |
| "epoch": 0.010257366230258776, |
| "grad_norm": 0.09293238073587418, |
| "learning_rate": 0.0002, |
| "loss": 0.064, |
| "step": 5640 |
| }, |
| { |
| "epoch": 0.010275553049815972, |
| "grad_norm": 0.013020232319831848, |
| "learning_rate": 0.0002, |
| "loss": 0.0174, |
| "step": 5650 |
| }, |
| { |
| "epoch": 0.010293739869373169, |
| "grad_norm": 0.030535893514752388, |
| "learning_rate": 0.0002, |
| "loss": 0.1615, |
| "step": 5660 |
| }, |
| { |
| "epoch": 0.010311926688930365, |
| "grad_norm": 0.08644227683544159, |
| "learning_rate": 0.0002, |
| "loss": 0.0856, |
| "step": 5670 |
| }, |
| { |
| "epoch": 0.010330113508487561, |
| "grad_norm": 0.04769067466259003, |
| "learning_rate": 0.0002, |
| "loss": 0.0777, |
| "step": 5680 |
| }, |
| { |
| "epoch": 0.010348300328044758, |
| "grad_norm": 0.1528550088405609, |
| "learning_rate": 0.0002, |
| "loss": 0.0757, |
| "step": 5690 |
| }, |
| { |
| "epoch": 0.010366487147601954, |
| "grad_norm": 0.012257793918251991, |
| "learning_rate": 0.0002, |
| "loss": 0.0163, |
| "step": 5700 |
| }, |
| { |
| "epoch": 0.01038467396715915, |
| "grad_norm": 0.5761304497718811, |
| "learning_rate": 0.0002, |
| "loss": 0.1787, |
| "step": 5710 |
| }, |
| { |
| "epoch": 0.010402860786716346, |
| "grad_norm": 0.07034485787153244, |
| "learning_rate": 0.0002, |
| "loss": 0.0964, |
| "step": 5720 |
| }, |
| { |
| "epoch": 0.010421047606273543, |
| "grad_norm": 0.04541708156466484, |
| "learning_rate": 0.0002, |
| "loss": 0.0793, |
| "step": 5730 |
| }, |
| { |
| "epoch": 0.010439234425830739, |
| "grad_norm": 0.12013612687587738, |
| "learning_rate": 0.0002, |
| "loss": 0.07, |
| "step": 5740 |
| }, |
| { |
| "epoch": 0.010457421245387935, |
| "grad_norm": 0.014152747578918934, |
| "learning_rate": 0.0002, |
| "loss": 0.0208, |
| "step": 5750 |
| }, |
| { |
| "epoch": 0.010475608064945133, |
| "grad_norm": 0.029470542445778847, |
| "learning_rate": 0.0002, |
| "loss": 0.1352, |
| "step": 5760 |
| }, |
| { |
| "epoch": 0.01049379488450233, |
| "grad_norm": 0.04889104515314102, |
| "learning_rate": 0.0002, |
| "loss": 0.0748, |
| "step": 5770 |
| }, |
| { |
| "epoch": 0.010511981704059526, |
| "grad_norm": 0.0311355609446764, |
| "learning_rate": 0.0002, |
| "loss": 0.0764, |
| "step": 5780 |
| }, |
| { |
| "epoch": 0.010530168523616722, |
| "grad_norm": 0.16830098628997803, |
| "learning_rate": 0.0002, |
| "loss": 0.0734, |
| "step": 5790 |
| }, |
| { |
| "epoch": 0.010548355343173919, |
| "grad_norm": 0.013224232010543346, |
| "learning_rate": 0.0002, |
| "loss": 0.0218, |
| "step": 5800 |
| }, |
| { |
| "epoch": 0.010566542162731115, |
| "grad_norm": 0.03710555657744408, |
| "learning_rate": 0.0002, |
| "loss": 0.1403, |
| "step": 5810 |
| }, |
| { |
| "epoch": 0.010584728982288311, |
| "grad_norm": 0.05788695067167282, |
| "learning_rate": 0.0002, |
| "loss": 0.0863, |
| "step": 5820 |
| }, |
| { |
| "epoch": 0.010602915801845508, |
| "grad_norm": 0.03398163616657257, |
| "learning_rate": 0.0002, |
| "loss": 0.0751, |
| "step": 5830 |
| }, |
| { |
| "epoch": 0.010621102621402704, |
| "grad_norm": 0.13862720131874084, |
| "learning_rate": 0.0002, |
| "loss": 0.07, |
| "step": 5840 |
| }, |
| { |
| "epoch": 0.0106392894409599, |
| "grad_norm": 0.016240287572145462, |
| "learning_rate": 0.0002, |
| "loss": 0.0209, |
| "step": 5850 |
| }, |
| { |
| "epoch": 0.010657476260517097, |
| "grad_norm": 0.030351752415299416, |
| "learning_rate": 0.0002, |
| "loss": 0.157, |
| "step": 5860 |
| }, |
| { |
| "epoch": 0.010675663080074293, |
| "grad_norm": 0.038465555757284164, |
| "learning_rate": 0.0002, |
| "loss": 0.072, |
| "step": 5870 |
| }, |
| { |
| "epoch": 0.01069384989963149, |
| "grad_norm": 0.07298482209444046, |
| "learning_rate": 0.0002, |
| "loss": 0.0796, |
| "step": 5880 |
| }, |
| { |
| "epoch": 0.010712036719188685, |
| "grad_norm": 0.13822157680988312, |
| "learning_rate": 0.0002, |
| "loss": 0.0687, |
| "step": 5890 |
| }, |
| { |
| "epoch": 0.010730223538745882, |
| "grad_norm": 0.014381729066371918, |
| "learning_rate": 0.0002, |
| "loss": 0.0192, |
| "step": 5900 |
| }, |
| { |
| "epoch": 0.010748410358303078, |
| "grad_norm": 0.040448348969221115, |
| "learning_rate": 0.0002, |
| "loss": 0.1714, |
| "step": 5910 |
| }, |
| { |
| "epoch": 0.010766597177860274, |
| "grad_norm": 0.06950225681066513, |
| "learning_rate": 0.0002, |
| "loss": 0.098, |
| "step": 5920 |
| }, |
| { |
| "epoch": 0.010784783997417472, |
| "grad_norm": 0.04581855982542038, |
| "learning_rate": 0.0002, |
| "loss": 0.0752, |
| "step": 5930 |
| }, |
| { |
| "epoch": 0.010802970816974669, |
| "grad_norm": 0.10498905926942825, |
| "learning_rate": 0.0002, |
| "loss": 0.0627, |
| "step": 5940 |
| }, |
| { |
| "epoch": 0.010821157636531865, |
| "grad_norm": 0.009345698170363903, |
| "learning_rate": 0.0002, |
| "loss": 0.0183, |
| "step": 5950 |
| }, |
| { |
| "epoch": 0.010839344456089061, |
| "grad_norm": 0.02440352365374565, |
| "learning_rate": 0.0002, |
| "loss": 0.1289, |
| "step": 5960 |
| }, |
| { |
| "epoch": 0.010857531275646258, |
| "grad_norm": 0.051523737609386444, |
| "learning_rate": 0.0002, |
| "loss": 0.0813, |
| "step": 5970 |
| }, |
| { |
| "epoch": 0.010875718095203454, |
| "grad_norm": 0.031664300709962845, |
| "learning_rate": 0.0002, |
| "loss": 0.0736, |
| "step": 5980 |
| }, |
| { |
| "epoch": 0.01089390491476065, |
| "grad_norm": 0.10166060924530029, |
| "learning_rate": 0.0002, |
| "loss": 0.0631, |
| "step": 5990 |
| }, |
| { |
| "epoch": 0.010912091734317847, |
| "grad_norm": 0.01642071269452572, |
| "learning_rate": 0.0002, |
| "loss": 0.0198, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.010930278553875043, |
| "grad_norm": 0.04028782621026039, |
| "learning_rate": 0.0002, |
| "loss": 0.1355, |
| "step": 6010 |
| }, |
| { |
| "epoch": 0.01094846537343224, |
| "grad_norm": 0.04289260134100914, |
| "learning_rate": 0.0002, |
| "loss": 0.0794, |
| "step": 6020 |
| }, |
| { |
| "epoch": 0.010966652192989436, |
| "grad_norm": 0.03854202851653099, |
| "learning_rate": 0.0002, |
| "loss": 0.0765, |
| "step": 6030 |
| }, |
| { |
| "epoch": 0.010984839012546632, |
| "grad_norm": 0.07910823822021484, |
| "learning_rate": 0.0002, |
| "loss": 0.0618, |
| "step": 6040 |
| }, |
| { |
| "epoch": 0.011003025832103828, |
| "grad_norm": 0.009719946421682835, |
| "learning_rate": 0.0002, |
| "loss": 0.0145, |
| "step": 6050 |
| }, |
| { |
| "epoch": 0.011021212651661024, |
| "grad_norm": 0.06853003799915314, |
| "learning_rate": 0.0002, |
| "loss": 0.1563, |
| "step": 6060 |
| }, |
| { |
| "epoch": 0.01103939947121822, |
| "grad_norm": 0.02887076325714588, |
| "learning_rate": 0.0002, |
| "loss": 0.0803, |
| "step": 6070 |
| }, |
| { |
| "epoch": 0.011057586290775417, |
| "grad_norm": 0.060147739946842194, |
| "learning_rate": 0.0002, |
| "loss": 0.0758, |
| "step": 6080 |
| }, |
| { |
| "epoch": 0.011075773110332613, |
| "grad_norm": 0.10197418928146362, |
| "learning_rate": 0.0002, |
| "loss": 0.0627, |
| "step": 6090 |
| }, |
| { |
| "epoch": 0.011093959929889811, |
| "grad_norm": 0.015125100500881672, |
| "learning_rate": 0.0002, |
| "loss": 0.0164, |
| "step": 6100 |
| }, |
| { |
| "epoch": 0.011112146749447008, |
| "grad_norm": 0.029526161029934883, |
| "learning_rate": 0.0002, |
| "loss": 0.1526, |
| "step": 6110 |
| }, |
| { |
| "epoch": 0.011130333569004204, |
| "grad_norm": 0.05942453444004059, |
| "learning_rate": 0.0002, |
| "loss": 0.0891, |
| "step": 6120 |
| }, |
| { |
| "epoch": 0.0111485203885614, |
| "grad_norm": 0.07344426214694977, |
| "learning_rate": 0.0002, |
| "loss": 0.0753, |
| "step": 6130 |
| }, |
| { |
| "epoch": 0.011166707208118597, |
| "grad_norm": 0.1394059658050537, |
| "learning_rate": 0.0002, |
| "loss": 0.0776, |
| "step": 6140 |
| }, |
| { |
| "epoch": 0.011184894027675793, |
| "grad_norm": 0.00965851079672575, |
| "learning_rate": 0.0002, |
| "loss": 0.019, |
| "step": 6150 |
| }, |
| { |
| "epoch": 0.01120308084723299, |
| "grad_norm": 0.041846372187137604, |
| "learning_rate": 0.0002, |
| "loss": 0.1776, |
| "step": 6160 |
| }, |
| { |
| "epoch": 0.011221267666790186, |
| "grad_norm": 0.04657486826181412, |
| "learning_rate": 0.0002, |
| "loss": 0.0878, |
| "step": 6170 |
| }, |
| { |
| "epoch": 0.011239454486347382, |
| "grad_norm": 0.026520246639847755, |
| "learning_rate": 0.0002, |
| "loss": 0.0768, |
| "step": 6180 |
| }, |
| { |
| "epoch": 0.011257641305904578, |
| "grad_norm": 0.10318096727132797, |
| "learning_rate": 0.0002, |
| "loss": 0.0617, |
| "step": 6190 |
| }, |
| { |
| "epoch": 0.011275828125461775, |
| "grad_norm": 0.019912905991077423, |
| "learning_rate": 0.0002, |
| "loss": 0.0202, |
| "step": 6200 |
| }, |
| { |
| "epoch": 0.01129401494501897, |
| "grad_norm": 0.05316480994224548, |
| "learning_rate": 0.0002, |
| "loss": 0.1412, |
| "step": 6210 |
| }, |
| { |
| "epoch": 0.011312201764576167, |
| "grad_norm": 0.02944323979318142, |
| "learning_rate": 0.0002, |
| "loss": 0.0829, |
| "step": 6220 |
| }, |
| { |
| "epoch": 0.011330388584133363, |
| "grad_norm": 0.0285831056535244, |
| "learning_rate": 0.0002, |
| "loss": 0.074, |
| "step": 6230 |
| }, |
| { |
| "epoch": 0.01134857540369056, |
| "grad_norm": 0.0975700169801712, |
| "learning_rate": 0.0002, |
| "loss": 0.0681, |
| "step": 6240 |
| }, |
| { |
| "epoch": 0.011366762223247756, |
| "grad_norm": 0.025717545300722122, |
| "learning_rate": 0.0002, |
| "loss": 0.0221, |
| "step": 6250 |
| }, |
| { |
| "epoch": 0.011384949042804952, |
| "grad_norm": 0.02859714813530445, |
| "learning_rate": 0.0002, |
| "loss": 0.1142, |
| "step": 6260 |
| }, |
| { |
| "epoch": 0.01140313586236215, |
| "grad_norm": 0.04395005479454994, |
| "learning_rate": 0.0002, |
| "loss": 0.0777, |
| "step": 6270 |
| }, |
| { |
| "epoch": 0.011421322681919347, |
| "grad_norm": 0.05116860568523407, |
| "learning_rate": 0.0002, |
| "loss": 0.0763, |
| "step": 6280 |
| }, |
| { |
| "epoch": 0.011439509501476543, |
| "grad_norm": 0.06850302964448929, |
| "learning_rate": 0.0002, |
| "loss": 0.0632, |
| "step": 6290 |
| }, |
| { |
| "epoch": 0.01145769632103374, |
| "grad_norm": 0.016113542020320892, |
| "learning_rate": 0.0002, |
| "loss": 0.0178, |
| "step": 6300 |
| }, |
| { |
| "epoch": 0.011475883140590936, |
| "grad_norm": 0.032306116074323654, |
| "learning_rate": 0.0002, |
| "loss": 0.1306, |
| "step": 6310 |
| }, |
| { |
| "epoch": 0.011494069960148132, |
| "grad_norm": 0.055701326578855515, |
| "learning_rate": 0.0002, |
| "loss": 0.0834, |
| "step": 6320 |
| }, |
| { |
| "epoch": 0.011512256779705328, |
| "grad_norm": 0.022934190928936005, |
| "learning_rate": 0.0002, |
| "loss": 0.0737, |
| "step": 6330 |
| }, |
| { |
| "epoch": 0.011530443599262525, |
| "grad_norm": 0.08375566452741623, |
| "learning_rate": 0.0002, |
| "loss": 0.0661, |
| "step": 6340 |
| }, |
| { |
| "epoch": 0.011548630418819721, |
| "grad_norm": 0.013614729046821594, |
| "learning_rate": 0.0002, |
| "loss": 0.0187, |
| "step": 6350 |
| }, |
| { |
| "epoch": 0.011566817238376917, |
| "grad_norm": 0.028269700706005096, |
| "learning_rate": 0.0002, |
| "loss": 0.1245, |
| "step": 6360 |
| }, |
| { |
| "epoch": 0.011585004057934114, |
| "grad_norm": 0.03646335378289223, |
| "learning_rate": 0.0002, |
| "loss": 0.0866, |
| "step": 6370 |
| }, |
| { |
| "epoch": 0.01160319087749131, |
| "grad_norm": 0.0371277742087841, |
| "learning_rate": 0.0002, |
| "loss": 0.0737, |
| "step": 6380 |
| }, |
| { |
| "epoch": 0.011621377697048506, |
| "grad_norm": 0.13698458671569824, |
| "learning_rate": 0.0002, |
| "loss": 0.0679, |
| "step": 6390 |
| }, |
| { |
| "epoch": 0.011639564516605702, |
| "grad_norm": 0.009350700303912163, |
| "learning_rate": 0.0002, |
| "loss": 0.024, |
| "step": 6400 |
| }, |
| { |
| "epoch": 0.011657751336162899, |
| "grad_norm": 0.03187236189842224, |
| "learning_rate": 0.0002, |
| "loss": 0.1555, |
| "step": 6410 |
| }, |
| { |
| "epoch": 0.011675938155720095, |
| "grad_norm": 0.06672242283821106, |
| "learning_rate": 0.0002, |
| "loss": 0.0835, |
| "step": 6420 |
| }, |
| { |
| "epoch": 0.011694124975277291, |
| "grad_norm": 0.07821471244096756, |
| "learning_rate": 0.0002, |
| "loss": 0.0746, |
| "step": 6430 |
| }, |
| { |
| "epoch": 0.011712311794834488, |
| "grad_norm": 0.14781107008457184, |
| "learning_rate": 0.0002, |
| "loss": 0.0662, |
| "step": 6440 |
| }, |
| { |
| "epoch": 0.011730498614391686, |
| "grad_norm": 0.0057207453064620495, |
| "learning_rate": 0.0002, |
| "loss": 0.0169, |
| "step": 6450 |
| }, |
| { |
| "epoch": 0.011748685433948882, |
| "grad_norm": 0.04252105578780174, |
| "learning_rate": 0.0002, |
| "loss": 0.1868, |
| "step": 6460 |
| }, |
| { |
| "epoch": 0.011766872253506078, |
| "grad_norm": 0.05041474476456642, |
| "learning_rate": 0.0002, |
| "loss": 0.0842, |
| "step": 6470 |
| }, |
| { |
| "epoch": 0.011785059073063275, |
| "grad_norm": 0.06584125757217407, |
| "learning_rate": 0.0002, |
| "loss": 0.0779, |
| "step": 6480 |
| }, |
| { |
| "epoch": 0.011803245892620471, |
| "grad_norm": 0.14610575139522552, |
| "learning_rate": 0.0002, |
| "loss": 0.063, |
| "step": 6490 |
| }, |
| { |
| "epoch": 0.011821432712177667, |
| "grad_norm": 0.01419675163924694, |
| "learning_rate": 0.0002, |
| "loss": 0.0152, |
| "step": 6500 |
| }, |
| { |
| "epoch": 0.011839619531734864, |
| "grad_norm": 0.03371060639619827, |
| "learning_rate": 0.0002, |
| "loss": 0.1725, |
| "step": 6510 |
| }, |
| { |
| "epoch": 0.01185780635129206, |
| "grad_norm": 0.028900766745209694, |
| "learning_rate": 0.0002, |
| "loss": 0.0815, |
| "step": 6520 |
| }, |
| { |
| "epoch": 0.011875993170849256, |
| "grad_norm": 0.059519629925489426, |
| "learning_rate": 0.0002, |
| "loss": 0.0785, |
| "step": 6530 |
| }, |
| { |
| "epoch": 0.011894179990406453, |
| "grad_norm": 0.12085167318582535, |
| "learning_rate": 0.0002, |
| "loss": 0.0615, |
| "step": 6540 |
| }, |
| { |
| "epoch": 0.011912366809963649, |
| "grad_norm": 0.028604619204998016, |
| "learning_rate": 0.0002, |
| "loss": 0.0196, |
| "step": 6550 |
| }, |
| { |
| "epoch": 0.011930553629520845, |
| "grad_norm": 0.03659407049417496, |
| "learning_rate": 0.0002, |
| "loss": 0.1403, |
| "step": 6560 |
| }, |
| { |
| "epoch": 0.011948740449078041, |
| "grad_norm": 0.034444138407707214, |
| "learning_rate": 0.0002, |
| "loss": 0.0812, |
| "step": 6570 |
| }, |
| { |
| "epoch": 0.011966927268635238, |
| "grad_norm": 0.029788263142108917, |
| "learning_rate": 0.0002, |
| "loss": 0.0713, |
| "step": 6580 |
| }, |
| { |
| "epoch": 0.011985114088192434, |
| "grad_norm": 0.1271272599697113, |
| "learning_rate": 0.0002, |
| "loss": 0.0672, |
| "step": 6590 |
| }, |
| { |
| "epoch": 0.01200330090774963, |
| "grad_norm": 0.018705012276768684, |
| "learning_rate": 0.0002, |
| "loss": 0.0212, |
| "step": 6600 |
| }, |
| { |
| "epoch": 0.012021487727306827, |
| "grad_norm": 0.02982541173696518, |
| "learning_rate": 0.0002, |
| "loss": 0.1152, |
| "step": 6610 |
| }, |
| { |
| "epoch": 0.012039674546864025, |
| "grad_norm": 0.06942040473222733, |
| "learning_rate": 0.0002, |
| "loss": 0.0963, |
| "step": 6620 |
| }, |
| { |
| "epoch": 0.012057861366421221, |
| "grad_norm": 0.06102292984724045, |
| "learning_rate": 0.0002, |
| "loss": 0.0775, |
| "step": 6630 |
| }, |
| { |
| "epoch": 0.012076048185978417, |
| "grad_norm": 0.10115987807512283, |
| "learning_rate": 0.0002, |
| "loss": 0.0729, |
| "step": 6640 |
| }, |
| { |
| "epoch": 0.012094235005535614, |
| "grad_norm": 0.011439867317676544, |
| "learning_rate": 0.0002, |
| "loss": 0.0253, |
| "step": 6650 |
| }, |
| { |
| "epoch": 0.01211242182509281, |
| "grad_norm": 0.062434904277324677, |
| "learning_rate": 0.0002, |
| "loss": 0.1166, |
| "step": 6660 |
| }, |
| { |
| "epoch": 0.012130608644650006, |
| "grad_norm": 0.055352553725242615, |
| "learning_rate": 0.0002, |
| "loss": 0.0802, |
| "step": 6670 |
| }, |
| { |
| "epoch": 0.012148795464207203, |
| "grad_norm": 0.031538888812065125, |
| "learning_rate": 0.0002, |
| "loss": 0.0786, |
| "step": 6680 |
| }, |
| { |
| "epoch": 0.012166982283764399, |
| "grad_norm": 0.10964162647724152, |
| "learning_rate": 0.0002, |
| "loss": 0.0626, |
| "step": 6690 |
| }, |
| { |
| "epoch": 0.012185169103321595, |
| "grad_norm": 0.011173764243721962, |
| "learning_rate": 0.0002, |
| "loss": 0.0205, |
| "step": 6700 |
| }, |
| { |
| "epoch": 0.012203355922878792, |
| "grad_norm": 0.035984206944704056, |
| "learning_rate": 0.0002, |
| "loss": 0.1412, |
| "step": 6710 |
| }, |
| { |
| "epoch": 0.012221542742435988, |
| "grad_norm": 0.07189827412366867, |
| "learning_rate": 0.0002, |
| "loss": 0.0818, |
| "step": 6720 |
| }, |
| { |
| "epoch": 0.012239729561993184, |
| "grad_norm": 0.0400136299431324, |
| "learning_rate": 0.0002, |
| "loss": 0.0676, |
| "step": 6730 |
| }, |
| { |
| "epoch": 0.01225791638155038, |
| "grad_norm": 0.14700625836849213, |
| "learning_rate": 0.0002, |
| "loss": 0.0663, |
| "step": 6740 |
| }, |
| { |
| "epoch": 0.012276103201107577, |
| "grad_norm": 0.007156179752200842, |
| "learning_rate": 0.0002, |
| "loss": 0.0171, |
| "step": 6750 |
| }, |
| { |
| "epoch": 0.012294290020664773, |
| "grad_norm": 0.04911777004599571, |
| "learning_rate": 0.0002, |
| "loss": 0.1657, |
| "step": 6760 |
| }, |
| { |
| "epoch": 0.01231247684022197, |
| "grad_norm": 0.03729144483804703, |
| "learning_rate": 0.0002, |
| "loss": 0.0806, |
| "step": 6770 |
| }, |
| { |
| "epoch": 0.012330663659779166, |
| "grad_norm": 0.037231944501399994, |
| "learning_rate": 0.0002, |
| "loss": 0.0736, |
| "step": 6780 |
| }, |
| { |
| "epoch": 0.012348850479336364, |
| "grad_norm": 0.09694401919841766, |
| "learning_rate": 0.0002, |
| "loss": 0.0642, |
| "step": 6790 |
| }, |
| { |
| "epoch": 0.01236703729889356, |
| "grad_norm": 0.025534989312291145, |
| "learning_rate": 0.0002, |
| "loss": 0.0208, |
| "step": 6800 |
| }, |
| { |
| "epoch": 0.012385224118450756, |
| "grad_norm": 0.033654361963272095, |
| "learning_rate": 0.0002, |
| "loss": 0.1295, |
| "step": 6810 |
| }, |
| { |
| "epoch": 0.012403410938007953, |
| "grad_norm": 0.04499521851539612, |
| "learning_rate": 0.0002, |
| "loss": 0.0902, |
| "step": 6820 |
| }, |
| { |
| "epoch": 0.012421597757565149, |
| "grad_norm": 0.0335836224257946, |
| "learning_rate": 0.0002, |
| "loss": 0.0781, |
| "step": 6830 |
| }, |
| { |
| "epoch": 0.012439784577122345, |
| "grad_norm": 0.1040850430727005, |
| "learning_rate": 0.0002, |
| "loss": 0.0679, |
| "step": 6840 |
| }, |
| { |
| "epoch": 0.012457971396679542, |
| "grad_norm": 0.015963764861226082, |
| "learning_rate": 0.0002, |
| "loss": 0.0226, |
| "step": 6850 |
| }, |
| { |
| "epoch": 0.012476158216236738, |
| "grad_norm": 0.05578307807445526, |
| "learning_rate": 0.0002, |
| "loss": 0.1119, |
| "step": 6860 |
| }, |
| { |
| "epoch": 0.012494345035793934, |
| "grad_norm": 0.0364505760371685, |
| "learning_rate": 0.0002, |
| "loss": 0.0805, |
| "step": 6870 |
| }, |
| { |
| "epoch": 0.01251253185535113, |
| "grad_norm": 0.027990469709038734, |
| "learning_rate": 0.0002, |
| "loss": 0.0826, |
| "step": 6880 |
| }, |
| { |
| "epoch": 0.012530718674908327, |
| "grad_norm": 0.08282670378684998, |
| "learning_rate": 0.0002, |
| "loss": 0.0685, |
| "step": 6890 |
| }, |
| { |
| "epoch": 0.012548905494465523, |
| "grad_norm": 0.02172144502401352, |
| "learning_rate": 0.0002, |
| "loss": 0.0259, |
| "step": 6900 |
| }, |
| { |
| "epoch": 0.01256709231402272, |
| "grad_norm": 0.04074740409851074, |
| "learning_rate": 0.0002, |
| "loss": 0.1211, |
| "step": 6910 |
| }, |
| { |
| "epoch": 0.012585279133579916, |
| "grad_norm": 0.05433020740747452, |
| "learning_rate": 0.0002, |
| "loss": 0.0776, |
| "step": 6920 |
| }, |
| { |
| "epoch": 0.012603465953137112, |
| "grad_norm": 0.05479983240365982, |
| "learning_rate": 0.0002, |
| "loss": 0.0778, |
| "step": 6930 |
| }, |
| { |
| "epoch": 0.012621652772694308, |
| "grad_norm": 1.6031180620193481, |
| "learning_rate": 0.0002, |
| "loss": 0.2265, |
| "step": 6940 |
| }, |
| { |
| "epoch": 0.012639839592251505, |
| "grad_norm": 1.0940366983413696, |
| "learning_rate": 0.0002, |
| "loss": 0.4586, |
| "step": 6950 |
| }, |
| { |
| "epoch": 0.012658026411808703, |
| "grad_norm": 0.0412282720208168, |
| "learning_rate": 0.0002, |
| "loss": 0.1072, |
| "step": 6960 |
| }, |
| { |
| "epoch": 0.012676213231365899, |
| "grad_norm": 0.03705910965800285, |
| "learning_rate": 0.0002, |
| "loss": 0.1014, |
| "step": 6970 |
| }, |
| { |
| "epoch": 0.012694400050923095, |
| "grad_norm": 0.07444313168525696, |
| "learning_rate": 0.0002, |
| "loss": 0.0881, |
| "step": 6980 |
| }, |
| { |
| "epoch": 0.012712586870480292, |
| "grad_norm": 0.08558017760515213, |
| "learning_rate": 0.0002, |
| "loss": 0.0619, |
| "step": 6990 |
| }, |
| { |
| "epoch": 0.012730773690037488, |
| "grad_norm": 0.0004157133516855538, |
| "learning_rate": 0.0002, |
| "loss": 0.0045, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.012748960509594684, |
| "grad_norm": 0.07950109243392944, |
| "learning_rate": 0.0002, |
| "loss": 0.1801, |
| "step": 7010 |
| }, |
| { |
| "epoch": 0.01276714732915188, |
| "grad_norm": 0.08424151688814163, |
| "learning_rate": 0.0002, |
| "loss": 0.0753, |
| "step": 7020 |
| }, |
| { |
| "epoch": 0.012785334148709077, |
| "grad_norm": 0.47635558247566223, |
| "learning_rate": 0.0002, |
| "loss": 0.0753, |
| "step": 7030 |
| }, |
| { |
| "epoch": 0.012803520968266273, |
| "grad_norm": 0.0452958345413208, |
| "learning_rate": 0.0002, |
| "loss": 0.0731, |
| "step": 7040 |
| }, |
| { |
| "epoch": 0.01282170778782347, |
| "grad_norm": 0.007719043176621199, |
| "learning_rate": 0.0002, |
| "loss": 0.0193, |
| "step": 7050 |
| }, |
| { |
| "epoch": 0.012839894607380666, |
| "grad_norm": 0.2408572882413864, |
| "learning_rate": 0.0002, |
| "loss": 0.4117, |
| "step": 7060 |
| }, |
| { |
| "epoch": 0.012858081426937862, |
| "grad_norm": 0.7272363305091858, |
| "learning_rate": 0.0002, |
| "loss": 0.0852, |
| "step": 7070 |
| }, |
| { |
| "epoch": 0.012876268246495058, |
| "grad_norm": 0.5539261698722839, |
| "learning_rate": 0.0002, |
| "loss": 0.075, |
| "step": 7080 |
| }, |
| { |
| "epoch": 0.012894455066052255, |
| "grad_norm": 4.608922481536865, |
| "learning_rate": 0.0002, |
| "loss": 0.2301, |
| "step": 7090 |
| }, |
| { |
| "epoch": 0.012912641885609451, |
| "grad_norm": 0.0012216357281431556, |
| "learning_rate": 0.0002, |
| "loss": 0.0034, |
| "step": 7100 |
| }, |
| { |
| "epoch": 0.012930828705166647, |
| "grad_norm": 0.15025563538074493, |
| "learning_rate": 0.0002, |
| "loss": 0.2717, |
| "step": 7110 |
| }, |
| { |
| "epoch": 0.012949015524723844, |
| "grad_norm": 0.06209970638155937, |
| "learning_rate": 0.0002, |
| "loss": 0.0852, |
| "step": 7120 |
| }, |
| { |
| "epoch": 0.012967202344281042, |
| "grad_norm": 0.6127016544342041, |
| "learning_rate": 0.0002, |
| "loss": 0.1271, |
| "step": 7130 |
| }, |
| { |
| "epoch": 0.012985389163838238, |
| "grad_norm": 0.047152891755104065, |
| "learning_rate": 0.0002, |
| "loss": 0.0626, |
| "step": 7140 |
| }, |
| { |
| "epoch": 0.013003575983395434, |
| "grad_norm": 0.0005132685182616115, |
| "learning_rate": 0.0002, |
| "loss": 0.0029, |
| "step": 7150 |
| }, |
| { |
| "epoch": 0.01302176280295263, |
| "grad_norm": 0.08946029096841812, |
| "learning_rate": 0.0002, |
| "loss": 0.309, |
| "step": 7160 |
| }, |
| { |
| "epoch": 0.013039949622509827, |
| "grad_norm": 0.18610751628875732, |
| "learning_rate": 0.0002, |
| "loss": 0.0867, |
| "step": 7170 |
| }, |
| { |
| "epoch": 0.013058136442067023, |
| "grad_norm": 0.07280854880809784, |
| "learning_rate": 0.0002, |
| "loss": 0.0832, |
| "step": 7180 |
| }, |
| { |
| "epoch": 0.01307632326162422, |
| "grad_norm": 0.11997990310192108, |
| "learning_rate": 0.0002, |
| "loss": 0.0746, |
| "step": 7190 |
| }, |
| { |
| "epoch": 0.013094510081181416, |
| "grad_norm": 0.00019475500448606908, |
| "learning_rate": 0.0002, |
| "loss": 0.01, |
| "step": 7200 |
| }, |
| { |
| "epoch": 0.013112696900738612, |
| "grad_norm": 0.07719916105270386, |
| "learning_rate": 0.0002, |
| "loss": 0.3035, |
| "step": 7210 |
| }, |
| { |
| "epoch": 0.013130883720295809, |
| "grad_norm": 0.0990060344338417, |
| "learning_rate": 0.0002, |
| "loss": 0.0902, |
| "step": 7220 |
| }, |
| { |
| "epoch": 0.013149070539853005, |
| "grad_norm": 0.22215688228607178, |
| "learning_rate": 0.0002, |
| "loss": 0.0797, |
| "step": 7230 |
| }, |
| { |
| "epoch": 0.013167257359410201, |
| "grad_norm": 0.08412040770053864, |
| "learning_rate": 0.0002, |
| "loss": 0.0646, |
| "step": 7240 |
| }, |
| { |
| "epoch": 0.013185444178967397, |
| "grad_norm": 0.0017518314998596907, |
| "learning_rate": 0.0002, |
| "loss": 0.007, |
| "step": 7250 |
| }, |
| { |
| "epoch": 0.013203630998524594, |
| "grad_norm": 0.1554754078388214, |
| "learning_rate": 0.0002, |
| "loss": 0.2319, |
| "step": 7260 |
| }, |
| { |
| "epoch": 0.01322181781808179, |
| "grad_norm": 0.052371326833963394, |
| "learning_rate": 0.0002, |
| "loss": 0.0832, |
| "step": 7270 |
| }, |
| { |
| "epoch": 0.013240004637638986, |
| "grad_norm": 0.9168817400932312, |
| "learning_rate": 0.0002, |
| "loss": 0.0791, |
| "step": 7280 |
| }, |
| { |
| "epoch": 0.013258191457196183, |
| "grad_norm": 0.07169363647699356, |
| "learning_rate": 0.0002, |
| "loss": 0.0602, |
| "step": 7290 |
| }, |
| { |
| "epoch": 0.01327637827675338, |
| "grad_norm": 0.0009911650558933616, |
| "learning_rate": 0.0002, |
| "loss": 0.0041, |
| "step": 7300 |
| }, |
| { |
| "epoch": 0.013294565096310577, |
| "grad_norm": 0.2644541263580322, |
| "learning_rate": 0.0002, |
| "loss": 0.2193, |
| "step": 7310 |
| }, |
| { |
| "epoch": 0.013312751915867773, |
| "grad_norm": 0.12140689790248871, |
| "learning_rate": 0.0002, |
| "loss": 0.0944, |
| "step": 7320 |
| }, |
| { |
| "epoch": 0.01333093873542497, |
| "grad_norm": 0.03627191483974457, |
| "learning_rate": 0.0002, |
| "loss": 0.0812, |
| "step": 7330 |
| }, |
| { |
| "epoch": 0.013349125554982166, |
| "grad_norm": 0.06252894550561905, |
| "learning_rate": 0.0002, |
| "loss": 0.0596, |
| "step": 7340 |
| }, |
| { |
| "epoch": 0.013367312374539362, |
| "grad_norm": 0.20318441092967987, |
| "learning_rate": 0.0002, |
| "loss": 0.0064, |
| "step": 7350 |
| }, |
| { |
| "epoch": 0.013385499194096559, |
| "grad_norm": 0.4231732189655304, |
| "learning_rate": 0.0002, |
| "loss": 0.4329, |
| "step": 7360 |
| }, |
| { |
| "epoch": 0.013403686013653755, |
| "grad_norm": 0.07567082345485687, |
| "learning_rate": 0.0002, |
| "loss": 0.089, |
| "step": 7370 |
| }, |
| { |
| "epoch": 0.013421872833210951, |
| "grad_norm": 0.23021474480628967, |
| "learning_rate": 0.0002, |
| "loss": 0.0833, |
| "step": 7380 |
| }, |
| { |
| "epoch": 0.013440059652768148, |
| "grad_norm": 0.09458985179662704, |
| "learning_rate": 0.0002, |
| "loss": 0.1391, |
| "step": 7390 |
| }, |
| { |
| "epoch": 0.013458246472325344, |
| "grad_norm": 0.010052111931145191, |
| "learning_rate": 0.0002, |
| "loss": 0.0073, |
| "step": 7400 |
| }, |
| { |
| "epoch": 0.01347643329188254, |
| "grad_norm": 0.2159787267446518, |
| "learning_rate": 0.0002, |
| "loss": 0.2249, |
| "step": 7410 |
| }, |
| { |
| "epoch": 0.013494620111439736, |
| "grad_norm": 0.11222853511571884, |
| "learning_rate": 0.0002, |
| "loss": 0.093, |
| "step": 7420 |
| }, |
| { |
| "epoch": 0.013512806930996933, |
| "grad_norm": 0.08586139976978302, |
| "learning_rate": 0.0002, |
| "loss": 0.0821, |
| "step": 7430 |
| }, |
| { |
| "epoch": 0.013530993750554129, |
| "grad_norm": 0.12232748419046402, |
| "learning_rate": 0.0002, |
| "loss": 0.0736, |
| "step": 7440 |
| }, |
| { |
| "epoch": 0.013549180570111325, |
| "grad_norm": 0.006977527402341366, |
| "learning_rate": 0.0002, |
| "loss": 0.0115, |
| "step": 7450 |
| }, |
| { |
| "epoch": 0.013567367389668522, |
| "grad_norm": 0.051690369844436646, |
| "learning_rate": 0.0002, |
| "loss": 0.2247, |
| "step": 7460 |
| }, |
| { |
| "epoch": 0.013585554209225718, |
| "grad_norm": 0.06542158871889114, |
| "learning_rate": 0.0002, |
| "loss": 0.1056, |
| "step": 7470 |
| }, |
| { |
| "epoch": 0.013603741028782916, |
| "grad_norm": 0.18546995520591736, |
| "learning_rate": 0.0002, |
| "loss": 0.1102, |
| "step": 7480 |
| }, |
| { |
| "epoch": 0.013621927848340112, |
| "grad_norm": 13.399182319641113, |
| "learning_rate": 0.0002, |
| "loss": 2.0806, |
| "step": 7490 |
| }, |
| { |
| "epoch": 0.013640114667897309, |
| "grad_norm": 0.0982588455080986, |
| "learning_rate": 0.0002, |
| "loss": 0.2158, |
| "step": 7500 |
| }, |
| { |
| "epoch": 0.013658301487454505, |
| "grad_norm": 0.07860754430294037, |
| "learning_rate": 0.0002, |
| "loss": 0.125, |
| "step": 7510 |
| }, |
| { |
| "epoch": 0.013676488307011701, |
| "grad_norm": 0.1165497750043869, |
| "learning_rate": 0.0002, |
| "loss": 0.0899, |
| "step": 7520 |
| }, |
| { |
| "epoch": 0.013694675126568898, |
| "grad_norm": 0.2813965380191803, |
| "learning_rate": 0.0002, |
| "loss": 0.0758, |
| "step": 7530 |
| }, |
| { |
| "epoch": 0.013712861946126094, |
| "grad_norm": 0.33458462357521057, |
| "learning_rate": 0.0002, |
| "loss": 0.0683, |
| "step": 7540 |
| }, |
| { |
| "epoch": 0.01373104876568329, |
| "grad_norm": 0.012062279507517815, |
| "learning_rate": 0.0002, |
| "loss": 0.0135, |
| "step": 7550 |
| }, |
| { |
| "epoch": 0.013749235585240487, |
| "grad_norm": 0.1787721961736679, |
| "learning_rate": 0.0002, |
| "loss": 0.1763, |
| "step": 7560 |
| }, |
| { |
| "epoch": 0.013767422404797683, |
| "grad_norm": 0.05922751501202583, |
| "learning_rate": 0.0002, |
| "loss": 0.1223, |
| "step": 7570 |
| }, |
| { |
| "epoch": 0.01378560922435488, |
| "grad_norm": 0.11594684422016144, |
| "learning_rate": 0.0002, |
| "loss": 0.0934, |
| "step": 7580 |
| }, |
| { |
| "epoch": 0.013803796043912075, |
| "grad_norm": 0.2290794998407364, |
| "learning_rate": 0.0002, |
| "loss": 0.0799, |
| "step": 7590 |
| }, |
| { |
| "epoch": 0.013821982863469272, |
| "grad_norm": 0.04903063178062439, |
| "learning_rate": 0.0002, |
| "loss": 0.0195, |
| "step": 7600 |
| }, |
| { |
| "epoch": 0.013840169683026468, |
| "grad_norm": 123.61300659179688, |
| "learning_rate": 0.0002, |
| "loss": 0.8359, |
| "step": 7610 |
| }, |
| { |
| "epoch": 0.013858356502583664, |
| "grad_norm": 0.25403347611427307, |
| "learning_rate": 0.0002, |
| "loss": 1.0464, |
| "step": 7620 |
| }, |
| { |
| "epoch": 0.01387654332214086, |
| "grad_norm": 0.08144152164459229, |
| "learning_rate": 0.0002, |
| "loss": 0.0794, |
| "step": 7630 |
| }, |
| { |
| "epoch": 0.013894730141698057, |
| "grad_norm": 0.11679713428020477, |
| "learning_rate": 0.0002, |
| "loss": 0.0706, |
| "step": 7640 |
| }, |
| { |
| "epoch": 0.013912916961255255, |
| "grad_norm": 0.00391317019239068, |
| "learning_rate": 0.0002, |
| "loss": 0.0075, |
| "step": 7650 |
| }, |
| { |
| "epoch": 0.013931103780812451, |
| "grad_norm": 0.13209663331508636, |
| "learning_rate": 0.0002, |
| "loss": 0.2228, |
| "step": 7660 |
| }, |
| { |
| "epoch": 0.013949290600369648, |
| "grad_norm": 0.06067880615592003, |
| "learning_rate": 0.0002, |
| "loss": 0.089, |
| "step": 7670 |
| }, |
| { |
| "epoch": 0.013967477419926844, |
| "grad_norm": 0.04806550592184067, |
| "learning_rate": 0.0002, |
| "loss": 0.0769, |
| "step": 7680 |
| }, |
| { |
| "epoch": 0.01398566423948404, |
| "grad_norm": 0.09506970643997192, |
| "learning_rate": 0.0002, |
| "loss": 0.0689, |
| "step": 7690 |
| }, |
| { |
| "epoch": 0.014003851059041237, |
| "grad_norm": 0.002536884741857648, |
| "learning_rate": 0.0002, |
| "loss": 0.0172, |
| "step": 7700 |
| }, |
| { |
| "epoch": 0.014022037878598433, |
| "grad_norm": 0.13837113976478577, |
| "learning_rate": 0.0002, |
| "loss": 0.2243, |
| "step": 7710 |
| }, |
| { |
| "epoch": 0.01404022469815563, |
| "grad_norm": 0.08101535588502884, |
| "learning_rate": 0.0002, |
| "loss": 0.0878, |
| "step": 7720 |
| }, |
| { |
| "epoch": 0.014058411517712826, |
| "grad_norm": 0.04018868878483772, |
| "learning_rate": 0.0002, |
| "loss": 0.0776, |
| "step": 7730 |
| }, |
| { |
| "epoch": 0.014076598337270022, |
| "grad_norm": 0.1377197653055191, |
| "learning_rate": 0.0002, |
| "loss": 0.0681, |
| "step": 7740 |
| }, |
| { |
| "epoch": 0.014094785156827218, |
| "grad_norm": 0.0006735012284480035, |
| "learning_rate": 0.0002, |
| "loss": 0.0041, |
| "step": 7750 |
| }, |
| { |
| "epoch": 0.014112971976384414, |
| "grad_norm": 0.17503094673156738, |
| "learning_rate": 0.0002, |
| "loss": 0.3114, |
| "step": 7760 |
| }, |
| { |
| "epoch": 0.01413115879594161, |
| "grad_norm": 0.07190551608800888, |
| "learning_rate": 0.0002, |
| "loss": 0.1018, |
| "step": 7770 |
| }, |
| { |
| "epoch": 0.014149345615498807, |
| "grad_norm": 0.036945659667253494, |
| "learning_rate": 0.0002, |
| "loss": 0.0785, |
| "step": 7780 |
| }, |
| { |
| "epoch": 0.014167532435056003, |
| "grad_norm": 0.13999724388122559, |
| "learning_rate": 0.0002, |
| "loss": 0.0725, |
| "step": 7790 |
| }, |
| { |
| "epoch": 0.0141857192546132, |
| "grad_norm": 0.0031171294394880533, |
| "learning_rate": 0.0002, |
| "loss": 0.0144, |
| "step": 7800 |
| }, |
| { |
| "epoch": 0.014203906074170396, |
| "grad_norm": 0.059554051607847214, |
| "learning_rate": 0.0002, |
| "loss": 0.2442, |
| "step": 7810 |
| }, |
| { |
| "epoch": 0.014222092893727594, |
| "grad_norm": 0.06873622536659241, |
| "learning_rate": 0.0002, |
| "loss": 0.0904, |
| "step": 7820 |
| }, |
| { |
| "epoch": 0.01424027971328479, |
| "grad_norm": 0.11261582374572754, |
| "learning_rate": 0.0002, |
| "loss": 0.0758, |
| "step": 7830 |
| }, |
| { |
| "epoch": 0.014258466532841987, |
| "grad_norm": 1.497631311416626, |
| "learning_rate": 0.0002, |
| "loss": 0.0689, |
| "step": 7840 |
| }, |
| { |
| "epoch": 0.014276653352399183, |
| "grad_norm": 0.004822546616196632, |
| "learning_rate": 0.0002, |
| "loss": 0.0156, |
| "step": 7850 |
| }, |
| { |
| "epoch": 0.01429484017195638, |
| "grad_norm": 0.0575052835047245, |
| "learning_rate": 0.0002, |
| "loss": 0.1895, |
| "step": 7860 |
| }, |
| { |
| "epoch": 0.014313026991513576, |
| "grad_norm": 0.10657750070095062, |
| "learning_rate": 0.0002, |
| "loss": 0.0855, |
| "step": 7870 |
| }, |
| { |
| "epoch": 0.014331213811070772, |
| "grad_norm": 0.07080844044685364, |
| "learning_rate": 0.0002, |
| "loss": 0.0716, |
| "step": 7880 |
| }, |
| { |
| "epoch": 0.014349400630627968, |
| "grad_norm": 0.1628514677286148, |
| "learning_rate": 0.0002, |
| "loss": 0.071, |
| "step": 7890 |
| }, |
| { |
| "epoch": 0.014367587450185165, |
| "grad_norm": 0.013860347680747509, |
| "learning_rate": 0.0002, |
| "loss": 0.0227, |
| "step": 7900 |
| }, |
| { |
| "epoch": 0.014385774269742361, |
| "grad_norm": 0.5240967869758606, |
| "learning_rate": 0.0002, |
| "loss": 0.1854, |
| "step": 7910 |
| }, |
| { |
| "epoch": 0.014403961089299557, |
| "grad_norm": 1.0027457475662231, |
| "learning_rate": 0.0002, |
| "loss": 0.0942, |
| "step": 7920 |
| }, |
| { |
| "epoch": 0.014422147908856753, |
| "grad_norm": 0.05730056390166283, |
| "learning_rate": 0.0002, |
| "loss": 0.0778, |
| "step": 7930 |
| }, |
| { |
| "epoch": 0.01444033472841395, |
| "grad_norm": 0.1485404521226883, |
| "learning_rate": 0.0002, |
| "loss": 0.0719, |
| "step": 7940 |
| }, |
| { |
| "epoch": 0.014458521547971146, |
| "grad_norm": 0.009702637791633606, |
| "learning_rate": 0.0002, |
| "loss": 0.0136, |
| "step": 7950 |
| }, |
| { |
| "epoch": 0.014476708367528342, |
| "grad_norm": 0.046543315052986145, |
| "learning_rate": 0.0002, |
| "loss": 0.1697, |
| "step": 7960 |
| }, |
| { |
| "epoch": 0.014494895187085539, |
| "grad_norm": 0.05248842388391495, |
| "learning_rate": 0.0002, |
| "loss": 0.0888, |
| "step": 7970 |
| }, |
| { |
| "epoch": 0.014513082006642735, |
| "grad_norm": 0.047813788056373596, |
| "learning_rate": 0.0002, |
| "loss": 0.0806, |
| "step": 7980 |
| }, |
| { |
| "epoch": 0.014531268826199933, |
| "grad_norm": 0.19744129478931427, |
| "learning_rate": 0.0002, |
| "loss": 0.0788, |
| "step": 7990 |
| }, |
| { |
| "epoch": 0.01454945564575713, |
| "grad_norm": 0.005265017040073872, |
| "learning_rate": 0.0002, |
| "loss": 0.014, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.014567642465314326, |
| "grad_norm": 0.0564056858420372, |
| "learning_rate": 0.0002, |
| "loss": 0.2681, |
| "step": 8010 |
| }, |
| { |
| "epoch": 0.014585829284871522, |
| "grad_norm": 0.0958496481180191, |
| "learning_rate": 0.0002, |
| "loss": 0.0823, |
| "step": 8020 |
| }, |
| { |
| "epoch": 0.014604016104428718, |
| "grad_norm": 0.12000919133424759, |
| "learning_rate": 0.0002, |
| "loss": 0.073, |
| "step": 8030 |
| }, |
| { |
| "epoch": 0.014622202923985915, |
| "grad_norm": 0.15912771224975586, |
| "learning_rate": 0.0002, |
| "loss": 0.075, |
| "step": 8040 |
| }, |
| { |
| "epoch": 0.014640389743543111, |
| "grad_norm": 0.004025776404887438, |
| "learning_rate": 0.0002, |
| "loss": 0.012, |
| "step": 8050 |
| }, |
| { |
| "epoch": 0.014658576563100307, |
| "grad_norm": 0.1682930886745453, |
| "learning_rate": 0.0002, |
| "loss": 0.2926, |
| "step": 8060 |
| }, |
| { |
| "epoch": 0.014676763382657504, |
| "grad_norm": 0.057362254709005356, |
| "learning_rate": 0.0002, |
| "loss": 0.0869, |
| "step": 8070 |
| }, |
| { |
| "epoch": 0.0146949502022147, |
| "grad_norm": 0.0814078077673912, |
| "learning_rate": 0.0002, |
| "loss": 0.0825, |
| "step": 8080 |
| }, |
| { |
| "epoch": 0.014713137021771896, |
| "grad_norm": 0.18205074965953827, |
| "learning_rate": 0.0002, |
| "loss": 0.0699, |
| "step": 8090 |
| }, |
| { |
| "epoch": 0.014731323841329092, |
| "grad_norm": 0.013200881890952587, |
| "learning_rate": 0.0002, |
| "loss": 0.0228, |
| "step": 8100 |
| }, |
| { |
| "epoch": 0.014749510660886289, |
| "grad_norm": 0.21043474972248077, |
| "learning_rate": 0.0002, |
| "loss": 0.2138, |
| "step": 8110 |
| }, |
| { |
| "epoch": 0.014767697480443485, |
| "grad_norm": 0.1000015065073967, |
| "learning_rate": 0.0002, |
| "loss": 0.0906, |
| "step": 8120 |
| }, |
| { |
| "epoch": 0.014785884300000681, |
| "grad_norm": 0.045657768845558167, |
| "learning_rate": 0.0002, |
| "loss": 0.0826, |
| "step": 8130 |
| }, |
| { |
| "epoch": 0.014804071119557878, |
| "grad_norm": 0.13545630872249603, |
| "learning_rate": 0.0002, |
| "loss": 0.066, |
| "step": 8140 |
| }, |
| { |
| "epoch": 0.014822257939115074, |
| "grad_norm": 0.01422254927456379, |
| "learning_rate": 0.0002, |
| "loss": 0.0179, |
| "step": 8150 |
| }, |
| { |
| "epoch": 0.014840444758672272, |
| "grad_norm": 0.12108676135540009, |
| "learning_rate": 0.0002, |
| "loss": 0.1717, |
| "step": 8160 |
| }, |
| { |
| "epoch": 0.014858631578229468, |
| "grad_norm": 0.10441934317350388, |
| "learning_rate": 0.0002, |
| "loss": 0.106, |
| "step": 8170 |
| }, |
| { |
| "epoch": 0.014876818397786665, |
| "grad_norm": 0.08105968683958054, |
| "learning_rate": 0.0002, |
| "loss": 0.0829, |
| "step": 8180 |
| }, |
| { |
| "epoch": 0.014895005217343861, |
| "grad_norm": 0.12230301648378372, |
| "learning_rate": 0.0002, |
| "loss": 0.0731, |
| "step": 8190 |
| }, |
| { |
| "epoch": 0.014913192036901057, |
| "grad_norm": 0.033857300877571106, |
| "learning_rate": 0.0002, |
| "loss": 0.029, |
| "step": 8200 |
| }, |
| { |
| "epoch": 0.014931378856458254, |
| "grad_norm": 0.04827893525362015, |
| "learning_rate": 0.0002, |
| "loss": 0.1369, |
| "step": 8210 |
| }, |
| { |
| "epoch": 0.01494956567601545, |
| "grad_norm": 0.056212421506643295, |
| "learning_rate": 0.0002, |
| "loss": 0.0879, |
| "step": 8220 |
| }, |
| { |
| "epoch": 0.014967752495572646, |
| "grad_norm": 0.03163846209645271, |
| "learning_rate": 0.0002, |
| "loss": 0.0698, |
| "step": 8230 |
| }, |
| { |
| "epoch": 0.014985939315129843, |
| "grad_norm": 0.09394920617341995, |
| "learning_rate": 0.0002, |
| "loss": 0.0688, |
| "step": 8240 |
| }, |
| { |
| "epoch": 0.015004126134687039, |
| "grad_norm": 0.024936649948358536, |
| "learning_rate": 0.0002, |
| "loss": 0.0211, |
| "step": 8250 |
| }, |
| { |
| "epoch": 0.015022312954244235, |
| "grad_norm": 4.499615669250488, |
| "learning_rate": 0.0002, |
| "loss": 2.7596, |
| "step": 8260 |
| }, |
| { |
| "epoch": 0.015040499773801431, |
| "grad_norm": 9.221298217773438, |
| "learning_rate": 0.0002, |
| "loss": 0.9135, |
| "step": 8270 |
| }, |
| { |
| "epoch": 0.015058686593358628, |
| "grad_norm": 0.5199778079986572, |
| "learning_rate": 0.0002, |
| "loss": 0.1441, |
| "step": 8280 |
| }, |
| { |
| "epoch": 0.015076873412915824, |
| "grad_norm": 0.07028087228536606, |
| "learning_rate": 0.0002, |
| "loss": 0.0793, |
| "step": 8290 |
| }, |
| { |
| "epoch": 0.01509506023247302, |
| "grad_norm": 0.0003307730657979846, |
| "learning_rate": 0.0002, |
| "loss": 0.0006, |
| "step": 8300 |
| }, |
| { |
| "epoch": 0.015113247052030217, |
| "grad_norm": 0.7940683960914612, |
| "learning_rate": 0.0002, |
| "loss": 0.7233, |
| "step": 8310 |
| }, |
| { |
| "epoch": 0.015131433871587413, |
| "grad_norm": 0.09774448722600937, |
| "learning_rate": 0.0002, |
| "loss": 0.1451, |
| "step": 8320 |
| }, |
| { |
| "epoch": 0.015149620691144611, |
| "grad_norm": 0.3088306188583374, |
| "learning_rate": 0.0002, |
| "loss": 0.0986, |
| "step": 8330 |
| }, |
| { |
| "epoch": 0.015167807510701807, |
| "grad_norm": 0.08629265427589417, |
| "learning_rate": 0.0002, |
| "loss": 0.0581, |
| "step": 8340 |
| }, |
| { |
| "epoch": 0.015185994330259004, |
| "grad_norm": 0.0011582528240978718, |
| "learning_rate": 0.0002, |
| "loss": 0.0008, |
| "step": 8350 |
| }, |
| { |
| "epoch": 0.0152041811498162, |
| "grad_norm": 0.48978063464164734, |
| "learning_rate": 0.0002, |
| "loss": 0.5111, |
| "step": 8360 |
| }, |
| { |
| "epoch": 0.015222367969373396, |
| "grad_norm": 0.2633112668991089, |
| "learning_rate": 0.0002, |
| "loss": 0.1354, |
| "step": 8370 |
| }, |
| { |
| "epoch": 0.015240554788930593, |
| "grad_norm": 0.058184925466775894, |
| "learning_rate": 0.0002, |
| "loss": 0.0963, |
| "step": 8380 |
| }, |
| { |
| "epoch": 0.015258741608487789, |
| "grad_norm": 0.397290974855423, |
| "learning_rate": 0.0002, |
| "loss": 0.0915, |
| "step": 8390 |
| }, |
| { |
| "epoch": 0.015276928428044985, |
| "grad_norm": 0.0013334077084437013, |
| "learning_rate": 0.0002, |
| "loss": 0.0117, |
| "step": 8400 |
| }, |
| { |
| "epoch": 0.015295115247602182, |
| "grad_norm": 3.2027626037597656, |
| "learning_rate": 0.0002, |
| "loss": 0.3642, |
| "step": 8410 |
| }, |
| { |
| "epoch": 0.015313302067159378, |
| "grad_norm": 0.4110456705093384, |
| "learning_rate": 0.0002, |
| "loss": 0.1347, |
| "step": 8420 |
| }, |
| { |
| "epoch": 0.015331488886716574, |
| "grad_norm": 0.19789688289165497, |
| "learning_rate": 0.0002, |
| "loss": 0.0946, |
| "step": 8430 |
| }, |
| { |
| "epoch": 0.01534967570627377, |
| "grad_norm": 0.15914630889892578, |
| "learning_rate": 0.0002, |
| "loss": 0.0619, |
| "step": 8440 |
| }, |
| { |
| "epoch": 0.015367862525830967, |
| "grad_norm": 0.004021051339805126, |
| "learning_rate": 0.0002, |
| "loss": 0.0081, |
| "step": 8450 |
| }, |
| { |
| "epoch": 0.015386049345388163, |
| "grad_norm": 0.25250542163848877, |
| "learning_rate": 0.0002, |
| "loss": 0.2409, |
| "step": 8460 |
| }, |
| { |
| "epoch": 0.01540423616494536, |
| "grad_norm": 0.16660314798355103, |
| "learning_rate": 0.0002, |
| "loss": 0.1041, |
| "step": 8470 |
| }, |
| { |
| "epoch": 0.015422422984502556, |
| "grad_norm": 0.09435573220252991, |
| "learning_rate": 0.0002, |
| "loss": 0.0838, |
| "step": 8480 |
| }, |
| { |
| "epoch": 0.015440609804059752, |
| "grad_norm": 0.1622086614370346, |
| "learning_rate": 0.0002, |
| "loss": 0.0648, |
| "step": 8490 |
| }, |
| { |
| "epoch": 0.015458796623616948, |
| "grad_norm": 0.002267120871692896, |
| "learning_rate": 0.0002, |
| "loss": 0.0057, |
| "step": 8500 |
| }, |
| { |
| "epoch": 0.015476983443174146, |
| "grad_norm": 0.11559420824050903, |
| "learning_rate": 0.0002, |
| "loss": 0.2994, |
| "step": 8510 |
| }, |
| { |
| "epoch": 0.015495170262731343, |
| "grad_norm": 0.18291179835796356, |
| "learning_rate": 0.0002, |
| "loss": 0.0908, |
| "step": 8520 |
| }, |
| { |
| "epoch": 0.015513357082288539, |
| "grad_norm": 0.14989323914051056, |
| "learning_rate": 0.0002, |
| "loss": 0.0912, |
| "step": 8530 |
| }, |
| { |
| "epoch": 0.015531543901845735, |
| "grad_norm": 0.09752708673477173, |
| "learning_rate": 0.0002, |
| "loss": 0.0586, |
| "step": 8540 |
| }, |
| { |
| "epoch": 0.015549730721402932, |
| "grad_norm": 0.0005314307054504752, |
| "learning_rate": 0.0002, |
| "loss": 0.0026, |
| "step": 8550 |
| }, |
| { |
| "epoch": 0.015567917540960128, |
| "grad_norm": 0.18309178948402405, |
| "learning_rate": 0.0002, |
| "loss": 0.3059, |
| "step": 8560 |
| }, |
| { |
| "epoch": 0.015586104360517324, |
| "grad_norm": 0.8144251108169556, |
| "learning_rate": 0.0002, |
| "loss": 0.1103, |
| "step": 8570 |
| }, |
| { |
| "epoch": 0.01560429118007452, |
| "grad_norm": 0.0331404022872448, |
| "learning_rate": 0.0002, |
| "loss": 0.0898, |
| "step": 8580 |
| }, |
| { |
| "epoch": 0.015622477999631717, |
| "grad_norm": 0.1460132598876953, |
| "learning_rate": 0.0002, |
| "loss": 0.0656, |
| "step": 8590 |
| }, |
| { |
| "epoch": 0.015640664819188913, |
| "grad_norm": 0.013606027700006962, |
| "learning_rate": 0.0002, |
| "loss": 0.0076, |
| "step": 8600 |
| }, |
| { |
| "epoch": 0.01565885163874611, |
| "grad_norm": 0.22224061191082, |
| "learning_rate": 0.0002, |
| "loss": 0.2609, |
| "step": 8610 |
| }, |
| { |
| "epoch": 0.015677038458303306, |
| "grad_norm": 0.22729800641536713, |
| "learning_rate": 0.0002, |
| "loss": 0.1028, |
| "step": 8620 |
| }, |
| { |
| "epoch": 0.015695225277860502, |
| "grad_norm": 0.0848810002207756, |
| "learning_rate": 0.0002, |
| "loss": 0.0871, |
| "step": 8630 |
| }, |
| { |
| "epoch": 0.0157134120974177, |
| "grad_norm": 0.17896370589733124, |
| "learning_rate": 0.0002, |
| "loss": 0.0636, |
| "step": 8640 |
| }, |
| { |
| "epoch": 0.015731598916974895, |
| "grad_norm": 0.006263076793402433, |
| "learning_rate": 0.0002, |
| "loss": 0.0068, |
| "step": 8650 |
| }, |
| { |
| "epoch": 0.01574978573653209, |
| "grad_norm": 0.29927679896354675, |
| "learning_rate": 0.0002, |
| "loss": 0.2761, |
| "step": 8660 |
| }, |
| { |
| "epoch": 0.015767972556089287, |
| "grad_norm": 0.05662700906395912, |
| "learning_rate": 0.0002, |
| "loss": 0.1029, |
| "step": 8670 |
| }, |
| { |
| "epoch": 0.015786159375646484, |
| "grad_norm": 0.09140895307064056, |
| "learning_rate": 0.0002, |
| "loss": 0.0854, |
| "step": 8680 |
| }, |
| { |
| "epoch": 0.01580434619520368, |
| "grad_norm": 0.21034927666187286, |
| "learning_rate": 0.0002, |
| "loss": 0.0691, |
| "step": 8690 |
| }, |
| { |
| "epoch": 0.015822533014760876, |
| "grad_norm": 0.0010229075560346246, |
| "learning_rate": 0.0002, |
| "loss": 0.0033, |
| "step": 8700 |
| }, |
| { |
| "epoch": 0.015840719834318073, |
| "grad_norm": 0.0626237690448761, |
| "learning_rate": 0.0002, |
| "loss": 0.3583, |
| "step": 8710 |
| }, |
| { |
| "epoch": 0.01585890665387527, |
| "grad_norm": 0.10027278959751129, |
| "learning_rate": 0.0002, |
| "loss": 0.0959, |
| "step": 8720 |
| }, |
| { |
| "epoch": 0.015877093473432465, |
| "grad_norm": 0.0870286151766777, |
| "learning_rate": 0.0002, |
| "loss": 0.0767, |
| "step": 8730 |
| }, |
| { |
| "epoch": 0.01589528029298966, |
| "grad_norm": 0.16106969118118286, |
| "learning_rate": 0.0002, |
| "loss": 0.0763, |
| "step": 8740 |
| }, |
| { |
| "epoch": 0.015913467112546858, |
| "grad_norm": 0.0022529088892042637, |
| "learning_rate": 0.0002, |
| "loss": 0.0108, |
| "step": 8750 |
| }, |
| { |
| "epoch": 0.015931653932104058, |
| "grad_norm": 0.06070050224661827, |
| "learning_rate": 0.0002, |
| "loss": 0.2606, |
| "step": 8760 |
| }, |
| { |
| "epoch": 0.015949840751661254, |
| "grad_norm": 0.09406338632106781, |
| "learning_rate": 0.0002, |
| "loss": 0.1062, |
| "step": 8770 |
| }, |
| { |
| "epoch": 0.01596802757121845, |
| "grad_norm": 0.1367248147726059, |
| "learning_rate": 0.0002, |
| "loss": 0.0757, |
| "step": 8780 |
| }, |
| { |
| "epoch": 0.015986214390775647, |
| "grad_norm": 0.26938319206237793, |
| "learning_rate": 0.0002, |
| "loss": 0.0733, |
| "step": 8790 |
| }, |
| { |
| "epoch": 0.016004401210332843, |
| "grad_norm": 0.011559409089386463, |
| "learning_rate": 0.0002, |
| "loss": 0.0176, |
| "step": 8800 |
| }, |
| { |
| "epoch": 0.01602258802989004, |
| "grad_norm": 0.12351766228675842, |
| "learning_rate": 0.0002, |
| "loss": 0.24, |
| "step": 8810 |
| }, |
| { |
| "epoch": 0.016040774849447235, |
| "grad_norm": 0.08965809643268585, |
| "learning_rate": 0.0002, |
| "loss": 0.0947, |
| "step": 8820 |
| }, |
| { |
| "epoch": 0.016058961669004432, |
| "grad_norm": 0.027005961164832115, |
| "learning_rate": 0.0002, |
| "loss": 0.0738, |
| "step": 8830 |
| }, |
| { |
| "epoch": 0.016077148488561628, |
| "grad_norm": 0.18656685948371887, |
| "learning_rate": 0.0002, |
| "loss": 0.0667, |
| "step": 8840 |
| }, |
| { |
| "epoch": 0.016095335308118824, |
| "grad_norm": 0.003148626768961549, |
| "learning_rate": 0.0002, |
| "loss": 0.0119, |
| "step": 8850 |
| }, |
| { |
| "epoch": 0.01611352212767602, |
| "grad_norm": 0.07959452271461487, |
| "learning_rate": 0.0002, |
| "loss": 0.275, |
| "step": 8860 |
| }, |
| { |
| "epoch": 0.016131708947233217, |
| "grad_norm": 0.19433775544166565, |
| "learning_rate": 0.0002, |
| "loss": 0.0872, |
| "step": 8870 |
| }, |
| { |
| "epoch": 0.016149895766790413, |
| "grad_norm": 0.1376393735408783, |
| "learning_rate": 0.0002, |
| "loss": 0.0779, |
| "step": 8880 |
| }, |
| { |
| "epoch": 0.01616808258634761, |
| "grad_norm": 0.18282419443130493, |
| "learning_rate": 0.0002, |
| "loss": 0.068, |
| "step": 8890 |
| }, |
| { |
| "epoch": 0.016186269405904806, |
| "grad_norm": 0.0112565653398633, |
| "learning_rate": 0.0002, |
| "loss": 0.0137, |
| "step": 8900 |
| }, |
| { |
| "epoch": 0.016204456225462002, |
| "grad_norm": 0.08975637704133987, |
| "learning_rate": 0.0002, |
| "loss": 0.23, |
| "step": 8910 |
| }, |
| { |
| "epoch": 0.0162226430450192, |
| "grad_norm": 0.19316238164901733, |
| "learning_rate": 0.0002, |
| "loss": 0.0975, |
| "step": 8920 |
| }, |
| { |
| "epoch": 0.016240829864576395, |
| "grad_norm": 0.1870724856853485, |
| "learning_rate": 0.0002, |
| "loss": 0.0862, |
| "step": 8930 |
| }, |
| { |
| "epoch": 0.01625901668413359, |
| "grad_norm": 0.19031721353530884, |
| "learning_rate": 0.0002, |
| "loss": 0.0735, |
| "step": 8940 |
| }, |
| { |
| "epoch": 0.016277203503690788, |
| "grad_norm": 0.015979783609509468, |
| "learning_rate": 0.0002, |
| "loss": 0.0249, |
| "step": 8950 |
| }, |
| { |
| "epoch": 0.016295390323247984, |
| "grad_norm": 0.09105712175369263, |
| "learning_rate": 0.0002, |
| "loss": 0.1573, |
| "step": 8960 |
| }, |
| { |
| "epoch": 0.01631357714280518, |
| "grad_norm": 0.13035650551319122, |
| "learning_rate": 0.0002, |
| "loss": 0.0958, |
| "step": 8970 |
| }, |
| { |
| "epoch": 0.016331763962362376, |
| "grad_norm": 0.18613573908805847, |
| "learning_rate": 0.0002, |
| "loss": 0.088, |
| "step": 8980 |
| }, |
| { |
| "epoch": 0.016349950781919573, |
| "grad_norm": 0.2518664300441742, |
| "learning_rate": 0.0002, |
| "loss": 0.0725, |
| "step": 8990 |
| }, |
| { |
| "epoch": 0.01636813760147677, |
| "grad_norm": 0.03324449062347412, |
| "learning_rate": 0.0002, |
| "loss": 0.0256, |
| "step": 9000 |
| }, |
| { |
| "epoch": 0.016386324421033965, |
| "grad_norm": 0.08766523003578186, |
| "learning_rate": 0.0002, |
| "loss": 0.1531, |
| "step": 9010 |
| }, |
| { |
| "epoch": 0.01640451124059116, |
| "grad_norm": 0.14177583158016205, |
| "learning_rate": 0.0002, |
| "loss": 0.0861, |
| "step": 9020 |
| }, |
| { |
| "epoch": 0.016422698060148358, |
| "grad_norm": 0.1354762315750122, |
| "learning_rate": 0.0002, |
| "loss": 0.0782, |
| "step": 9030 |
| }, |
| { |
| "epoch": 0.016440884879705554, |
| "grad_norm": 0.15894347429275513, |
| "learning_rate": 0.0002, |
| "loss": 0.073, |
| "step": 9040 |
| }, |
| { |
| "epoch": 0.01645907169926275, |
| "grad_norm": 0.02154761180281639, |
| "learning_rate": 0.0002, |
| "loss": 0.0156, |
| "step": 9050 |
| }, |
| { |
| "epoch": 0.016477258518819947, |
| "grad_norm": 0.06432317197322845, |
| "learning_rate": 0.0002, |
| "loss": 0.1384, |
| "step": 9060 |
| }, |
| { |
| "epoch": 0.016495445338377143, |
| "grad_norm": 0.12112505733966827, |
| "learning_rate": 0.0002, |
| "loss": 0.093, |
| "step": 9070 |
| }, |
| { |
| "epoch": 0.01651363215793434, |
| "grad_norm": 0.10628003627061844, |
| "learning_rate": 0.0002, |
| "loss": 0.0738, |
| "step": 9080 |
| }, |
| { |
| "epoch": 0.016531818977491536, |
| "grad_norm": 0.1930958330631256, |
| "learning_rate": 0.0002, |
| "loss": 0.0678, |
| "step": 9090 |
| }, |
| { |
| "epoch": 0.016550005797048732, |
| "grad_norm": 0.03878525644540787, |
| "learning_rate": 0.0002, |
| "loss": 0.0235, |
| "step": 9100 |
| }, |
| { |
| "epoch": 0.016568192616605932, |
| "grad_norm": 0.0920896977186203, |
| "learning_rate": 0.0002, |
| "loss": 0.1661, |
| "step": 9110 |
| }, |
| { |
| "epoch": 0.016586379436163128, |
| "grad_norm": 0.11687818914651871, |
| "learning_rate": 0.0002, |
| "loss": 0.0847, |
| "step": 9120 |
| }, |
| { |
| "epoch": 0.016604566255720325, |
| "grad_norm": 0.10511167347431183, |
| "learning_rate": 0.0002, |
| "loss": 0.0832, |
| "step": 9130 |
| }, |
| { |
| "epoch": 0.01662275307527752, |
| "grad_norm": 0.26365017890930176, |
| "learning_rate": 0.0002, |
| "loss": 0.0717, |
| "step": 9140 |
| }, |
| { |
| "epoch": 0.016640939894834717, |
| "grad_norm": 0.02445841394364834, |
| "learning_rate": 0.0002, |
| "loss": 0.0233, |
| "step": 9150 |
| }, |
| { |
| "epoch": 0.016659126714391913, |
| "grad_norm": 0.08213133364915848, |
| "learning_rate": 0.0002, |
| "loss": 0.1439, |
| "step": 9160 |
| }, |
| { |
| "epoch": 0.01667731353394911, |
| "grad_norm": 0.17025598883628845, |
| "learning_rate": 0.0002, |
| "loss": 0.0852, |
| "step": 9170 |
| }, |
| { |
| "epoch": 0.016695500353506306, |
| "grad_norm": 0.098059743642807, |
| "learning_rate": 0.0002, |
| "loss": 0.0761, |
| "step": 9180 |
| }, |
| { |
| "epoch": 0.016713687173063502, |
| "grad_norm": 0.18436011672019958, |
| "learning_rate": 0.0002, |
| "loss": 0.0674, |
| "step": 9190 |
| }, |
| { |
| "epoch": 0.0167318739926207, |
| "grad_norm": 0.011012010276317596, |
| "learning_rate": 0.0002, |
| "loss": 0.0221, |
| "step": 9200 |
| }, |
| { |
| "epoch": 0.016750060812177895, |
| "grad_norm": 0.07544030994176865, |
| "learning_rate": 0.0002, |
| "loss": 0.161, |
| "step": 9210 |
| }, |
| { |
| "epoch": 0.01676824763173509, |
| "grad_norm": 0.16041946411132812, |
| "learning_rate": 0.0002, |
| "loss": 0.0824, |
| "step": 9220 |
| }, |
| { |
| "epoch": 0.016786434451292288, |
| "grad_norm": 0.17295844852924347, |
| "learning_rate": 0.0002, |
| "loss": 0.0797, |
| "step": 9230 |
| }, |
| { |
| "epoch": 0.016804621270849484, |
| "grad_norm": 0.1818791776895523, |
| "learning_rate": 0.0002, |
| "loss": 0.0683, |
| "step": 9240 |
| }, |
| { |
| "epoch": 0.01682280809040668, |
| "grad_norm": 0.019515013322234154, |
| "learning_rate": 0.0002, |
| "loss": 0.0188, |
| "step": 9250 |
| }, |
| { |
| "epoch": 0.016840994909963877, |
| "grad_norm": 0.15059705078601837, |
| "learning_rate": 0.0002, |
| "loss": 0.1743, |
| "step": 9260 |
| }, |
| { |
| "epoch": 0.016859181729521073, |
| "grad_norm": 0.1481601595878601, |
| "learning_rate": 0.0002, |
| "loss": 0.0906, |
| "step": 9270 |
| }, |
| { |
| "epoch": 0.01687736854907827, |
| "grad_norm": 0.07433108985424042, |
| "learning_rate": 0.0002, |
| "loss": 0.08, |
| "step": 9280 |
| }, |
| { |
| "epoch": 0.016895555368635466, |
| "grad_norm": 0.1752692312002182, |
| "learning_rate": 0.0002, |
| "loss": 0.06, |
| "step": 9290 |
| }, |
| { |
| "epoch": 0.016913742188192662, |
| "grad_norm": 0.027612384408712387, |
| "learning_rate": 0.0002, |
| "loss": 0.0157, |
| "step": 9300 |
| }, |
| { |
| "epoch": 0.016931929007749858, |
| "grad_norm": 0.08575212955474854, |
| "learning_rate": 0.0002, |
| "loss": 0.1679, |
| "step": 9310 |
| }, |
| { |
| "epoch": 0.016950115827307054, |
| "grad_norm": 0.11127147823572159, |
| "learning_rate": 0.0002, |
| "loss": 0.0848, |
| "step": 9320 |
| }, |
| { |
| "epoch": 0.01696830264686425, |
| "grad_norm": 0.08989393711090088, |
| "learning_rate": 0.0002, |
| "loss": 0.0823, |
| "step": 9330 |
| }, |
| { |
| "epoch": 0.016986489466421447, |
| "grad_norm": 0.18898548185825348, |
| "learning_rate": 0.0002, |
| "loss": 0.0687, |
| "step": 9340 |
| }, |
| { |
| "epoch": 0.017004676285978643, |
| "grad_norm": 0.023646721616387367, |
| "learning_rate": 0.0002, |
| "loss": 0.0244, |
| "step": 9350 |
| }, |
| { |
| "epoch": 0.01702286310553584, |
| "grad_norm": 0.11511775106191635, |
| "learning_rate": 0.0002, |
| "loss": 0.1642, |
| "step": 9360 |
| }, |
| { |
| "epoch": 0.017041049925093036, |
| "grad_norm": 0.1458021104335785, |
| "learning_rate": 0.0002, |
| "loss": 0.084, |
| "step": 9370 |
| }, |
| { |
| "epoch": 0.017059236744650232, |
| "grad_norm": 0.060528095811605453, |
| "learning_rate": 0.0002, |
| "loss": 0.0809, |
| "step": 9380 |
| }, |
| { |
| "epoch": 0.01707742356420743, |
| "grad_norm": 0.16314280033111572, |
| "learning_rate": 0.0002, |
| "loss": 0.0661, |
| "step": 9390 |
| }, |
| { |
| "epoch": 0.017095610383764625, |
| "grad_norm": 0.03078557923436165, |
| "learning_rate": 0.0002, |
| "loss": 0.015, |
| "step": 9400 |
| }, |
| { |
| "epoch": 0.01711379720332182, |
| "grad_norm": 0.11488370597362518, |
| "learning_rate": 0.0002, |
| "loss": 0.1712, |
| "step": 9410 |
| }, |
| { |
| "epoch": 0.017131984022879018, |
| "grad_norm": 0.0972781702876091, |
| "learning_rate": 0.0002, |
| "loss": 0.0856, |
| "step": 9420 |
| }, |
| { |
| "epoch": 0.017150170842436214, |
| "grad_norm": 0.08523645251989365, |
| "learning_rate": 0.0002, |
| "loss": 0.0744, |
| "step": 9430 |
| }, |
| { |
| "epoch": 0.01716835766199341, |
| "grad_norm": 0.18629521131515503, |
| "learning_rate": 0.0002, |
| "loss": 0.0659, |
| "step": 9440 |
| }, |
| { |
| "epoch": 0.01718654448155061, |
| "grad_norm": 0.00908618327230215, |
| "learning_rate": 0.0002, |
| "loss": 0.0219, |
| "step": 9450 |
| }, |
| { |
| "epoch": 0.017204731301107806, |
| "grad_norm": 0.05552325397729874, |
| "learning_rate": 0.0002, |
| "loss": 0.1377, |
| "step": 9460 |
| }, |
| { |
| "epoch": 0.017222918120665003, |
| "grad_norm": 0.16133128106594086, |
| "learning_rate": 0.0002, |
| "loss": 0.0885, |
| "step": 9470 |
| }, |
| { |
| "epoch": 0.0172411049402222, |
| "grad_norm": 0.0965205654501915, |
| "learning_rate": 0.0002, |
| "loss": 0.0713, |
| "step": 9480 |
| }, |
| { |
| "epoch": 0.017259291759779395, |
| "grad_norm": 0.21675604581832886, |
| "learning_rate": 0.0002, |
| "loss": 0.0658, |
| "step": 9490 |
| }, |
| { |
| "epoch": 0.01727747857933659, |
| "grad_norm": 0.043898243457078934, |
| "learning_rate": 0.0002, |
| "loss": 0.0213, |
| "step": 9500 |
| }, |
| { |
| "epoch": 0.017295665398893788, |
| "grad_norm": 0.0968618243932724, |
| "learning_rate": 0.0002, |
| "loss": 0.1391, |
| "step": 9510 |
| }, |
| { |
| "epoch": 0.017313852218450984, |
| "grad_norm": 0.15061378479003906, |
| "learning_rate": 0.0002, |
| "loss": 0.0879, |
| "step": 9520 |
| }, |
| { |
| "epoch": 0.01733203903800818, |
| "grad_norm": 0.08481590449810028, |
| "learning_rate": 0.0002, |
| "loss": 0.0771, |
| "step": 9530 |
| }, |
| { |
| "epoch": 0.017350225857565377, |
| "grad_norm": 0.20935995876789093, |
| "learning_rate": 0.0002, |
| "loss": 0.0705, |
| "step": 9540 |
| }, |
| { |
| "epoch": 0.017368412677122573, |
| "grad_norm": 0.04010302573442459, |
| "learning_rate": 0.0002, |
| "loss": 0.0257, |
| "step": 9550 |
| }, |
| { |
| "epoch": 0.01738659949667977, |
| "grad_norm": 0.10532956570386887, |
| "learning_rate": 0.0002, |
| "loss": 0.1528, |
| "step": 9560 |
| }, |
| { |
| "epoch": 0.017404786316236966, |
| "grad_norm": 0.1484638750553131, |
| "learning_rate": 0.0002, |
| "loss": 0.0847, |
| "step": 9570 |
| }, |
| { |
| "epoch": 0.017422973135794162, |
| "grad_norm": 0.05873465910553932, |
| "learning_rate": 0.0002, |
| "loss": 0.0765, |
| "step": 9580 |
| }, |
| { |
| "epoch": 0.01744115995535136, |
| "grad_norm": 0.1689092516899109, |
| "learning_rate": 0.0002, |
| "loss": 0.0673, |
| "step": 9590 |
| }, |
| { |
| "epoch": 0.017459346774908555, |
| "grad_norm": 0.014237391762435436, |
| "learning_rate": 0.0002, |
| "loss": 0.0165, |
| "step": 9600 |
| }, |
| { |
| "epoch": 0.01747753359446575, |
| "grad_norm": 0.06250491738319397, |
| "learning_rate": 0.0002, |
| "loss": 0.1635, |
| "step": 9610 |
| }, |
| { |
| "epoch": 0.017495720414022947, |
| "grad_norm": 0.08895017951726913, |
| "learning_rate": 0.0002, |
| "loss": 0.0765, |
| "step": 9620 |
| }, |
| { |
| "epoch": 0.017513907233580144, |
| "grad_norm": 0.08614445477724075, |
| "learning_rate": 0.0002, |
| "loss": 0.0852, |
| "step": 9630 |
| }, |
| { |
| "epoch": 0.01753209405313734, |
| "grad_norm": 0.25440698862075806, |
| "learning_rate": 0.0002, |
| "loss": 0.0735, |
| "step": 9640 |
| }, |
| { |
| "epoch": 0.017550280872694536, |
| "grad_norm": 0.015447271056473255, |
| "learning_rate": 0.0002, |
| "loss": 0.0199, |
| "step": 9650 |
| }, |
| { |
| "epoch": 0.017568467692251732, |
| "grad_norm": 0.08685171604156494, |
| "learning_rate": 0.0002, |
| "loss": 0.1721, |
| "step": 9660 |
| }, |
| { |
| "epoch": 0.01758665451180893, |
| "grad_norm": 0.1007658839225769, |
| "learning_rate": 0.0002, |
| "loss": 0.0858, |
| "step": 9670 |
| }, |
| { |
| "epoch": 0.017604841331366125, |
| "grad_norm": 0.1291055977344513, |
| "learning_rate": 0.0002, |
| "loss": 0.0817, |
| "step": 9680 |
| }, |
| { |
| "epoch": 0.01762302815092332, |
| "grad_norm": 0.21103522181510925, |
| "learning_rate": 0.0002, |
| "loss": 0.0707, |
| "step": 9690 |
| }, |
| { |
| "epoch": 0.017641214970480518, |
| "grad_norm": 0.027955593541264534, |
| "learning_rate": 0.0002, |
| "loss": 0.0199, |
| "step": 9700 |
| }, |
| { |
| "epoch": 0.017659401790037714, |
| "grad_norm": 0.06710019707679749, |
| "learning_rate": 0.0002, |
| "loss": 0.1623, |
| "step": 9710 |
| }, |
| { |
| "epoch": 0.01767758860959491, |
| "grad_norm": 0.09083720296621323, |
| "learning_rate": 0.0002, |
| "loss": 0.0845, |
| "step": 9720 |
| }, |
| { |
| "epoch": 0.017695775429152107, |
| "grad_norm": 0.07230041921138763, |
| "learning_rate": 0.0002, |
| "loss": 0.0767, |
| "step": 9730 |
| }, |
| { |
| "epoch": 0.017713962248709303, |
| "grad_norm": 0.19016912579536438, |
| "learning_rate": 0.0002, |
| "loss": 0.0648, |
| "step": 9740 |
| }, |
| { |
| "epoch": 0.0177321490682665, |
| "grad_norm": 0.03999534249305725, |
| "learning_rate": 0.0002, |
| "loss": 0.0216, |
| "step": 9750 |
| }, |
| { |
| "epoch": 0.017750335887823696, |
| "grad_norm": 0.08057496696710587, |
| "learning_rate": 0.0002, |
| "loss": 0.1251, |
| "step": 9760 |
| }, |
| { |
| "epoch": 0.017768522707380892, |
| "grad_norm": 0.16494789719581604, |
| "learning_rate": 0.0002, |
| "loss": 0.0896, |
| "step": 9770 |
| }, |
| { |
| "epoch": 0.017786709526938088, |
| "grad_norm": 0.07119818776845932, |
| "learning_rate": 0.0002, |
| "loss": 0.0749, |
| "step": 9780 |
| }, |
| { |
| "epoch": 0.017804896346495288, |
| "grad_norm": 0.1790028065443039, |
| "learning_rate": 0.0002, |
| "loss": 0.0718, |
| "step": 9790 |
| }, |
| { |
| "epoch": 0.017823083166052484, |
| "grad_norm": 0.055643875151872635, |
| "learning_rate": 0.0002, |
| "loss": 0.0294, |
| "step": 9800 |
| }, |
| { |
| "epoch": 0.01784126998560968, |
| "grad_norm": 0.15530900657176971, |
| "learning_rate": 0.0002, |
| "loss": 0.1343, |
| "step": 9810 |
| }, |
| { |
| "epoch": 0.017859456805166877, |
| "grad_norm": 0.08989892899990082, |
| "learning_rate": 0.0002, |
| "loss": 0.0778, |
| "step": 9820 |
| }, |
| { |
| "epoch": 0.017877643624724073, |
| "grad_norm": 0.038054581731557846, |
| "learning_rate": 0.0002, |
| "loss": 0.0842, |
| "step": 9830 |
| }, |
| { |
| "epoch": 0.01789583044428127, |
| "grad_norm": 0.12264154851436615, |
| "learning_rate": 0.0002, |
| "loss": 0.065, |
| "step": 9840 |
| }, |
| { |
| "epoch": 0.017914017263838466, |
| "grad_norm": 0.03432893753051758, |
| "learning_rate": 0.0002, |
| "loss": 0.02, |
| "step": 9850 |
| }, |
| { |
| "epoch": 0.017932204083395662, |
| "grad_norm": 0.0516468770802021, |
| "learning_rate": 0.0002, |
| "loss": 0.1339, |
| "step": 9860 |
| }, |
| { |
| "epoch": 0.01795039090295286, |
| "grad_norm": 0.11306226998567581, |
| "learning_rate": 0.0002, |
| "loss": 0.0842, |
| "step": 9870 |
| }, |
| { |
| "epoch": 0.017968577722510055, |
| "grad_norm": 0.051579318940639496, |
| "learning_rate": 0.0002, |
| "loss": 0.0795, |
| "step": 9880 |
| }, |
| { |
| "epoch": 0.01798676454206725, |
| "grad_norm": 0.19050930440425873, |
| "learning_rate": 0.0002, |
| "loss": 0.0673, |
| "step": 9890 |
| }, |
| { |
| "epoch": 0.018004951361624447, |
| "grad_norm": 0.015286738984286785, |
| "learning_rate": 0.0002, |
| "loss": 0.0169, |
| "step": 9900 |
| }, |
| { |
| "epoch": 0.018023138181181644, |
| "grad_norm": 0.16055025160312653, |
| "learning_rate": 0.0002, |
| "loss": 0.1655, |
| "step": 9910 |
| }, |
| { |
| "epoch": 0.01804132500073884, |
| "grad_norm": 0.05445674806833267, |
| "learning_rate": 0.0002, |
| "loss": 0.0786, |
| "step": 9920 |
| }, |
| { |
| "epoch": 0.018059511820296036, |
| "grad_norm": 0.07221481204032898, |
| "learning_rate": 0.0002, |
| "loss": 0.0726, |
| "step": 9930 |
| }, |
| { |
| "epoch": 0.018077698639853233, |
| "grad_norm": 0.15800146758556366, |
| "learning_rate": 0.0002, |
| "loss": 0.0607, |
| "step": 9940 |
| }, |
| { |
| "epoch": 0.01809588545941043, |
| "grad_norm": 0.007713336031883955, |
| "learning_rate": 0.0002, |
| "loss": 0.0148, |
| "step": 9950 |
| }, |
| { |
| "epoch": 0.018114072278967625, |
| "grad_norm": 0.04677269607782364, |
| "learning_rate": 0.0002, |
| "loss": 0.1718, |
| "step": 9960 |
| }, |
| { |
| "epoch": 0.01813225909852482, |
| "grad_norm": 0.1699189841747284, |
| "learning_rate": 0.0002, |
| "loss": 0.0865, |
| "step": 9970 |
| }, |
| { |
| "epoch": 0.018150445918082018, |
| "grad_norm": 0.04046279937028885, |
| "learning_rate": 0.0002, |
| "loss": 0.0781, |
| "step": 9980 |
| }, |
| { |
| "epoch": 0.018168632737639214, |
| "grad_norm": 0.164504736661911, |
| "learning_rate": 0.0002, |
| "loss": 0.0645, |
| "step": 9990 |
| }, |
| { |
| "epoch": 0.01818681955719641, |
| "grad_norm": 0.014479747042059898, |
| "learning_rate": 0.0002, |
| "loss": 0.0186, |
| "step": 10000 |
| }, |
| { |
| "epoch": 0.018205006376753607, |
| "grad_norm": 0.051388438791036606, |
| "learning_rate": 0.0002, |
| "loss": 0.1414, |
| "step": 10010 |
| }, |
| { |
| "epoch": 0.018223193196310803, |
| "grad_norm": 0.11734543740749359, |
| "learning_rate": 0.0002, |
| "loss": 0.0894, |
| "step": 10020 |
| }, |
| { |
| "epoch": 0.018241380015868, |
| "grad_norm": 0.022312749177217484, |
| "learning_rate": 0.0002, |
| "loss": 0.0775, |
| "step": 10030 |
| }, |
| { |
| "epoch": 0.018259566835425196, |
| "grad_norm": 0.1579144448041916, |
| "learning_rate": 0.0002, |
| "loss": 0.0668, |
| "step": 10040 |
| }, |
| { |
| "epoch": 0.018277753654982392, |
| "grad_norm": 0.02757895737886429, |
| "learning_rate": 0.0002, |
| "loss": 0.0197, |
| "step": 10050 |
| }, |
| { |
| "epoch": 0.01829594047453959, |
| "grad_norm": 0.07557844370603561, |
| "learning_rate": 0.0002, |
| "loss": 0.1526, |
| "step": 10060 |
| }, |
| { |
| "epoch": 0.018314127294096785, |
| "grad_norm": 0.1216227188706398, |
| "learning_rate": 0.0002, |
| "loss": 0.0871, |
| "step": 10070 |
| }, |
| { |
| "epoch": 0.01833231411365398, |
| "grad_norm": 0.04201141744852066, |
| "learning_rate": 0.0002, |
| "loss": 0.0723, |
| "step": 10080 |
| }, |
| { |
| "epoch": 0.018350500933211177, |
| "grad_norm": 0.151902437210083, |
| "learning_rate": 0.0002, |
| "loss": 0.063, |
| "step": 10090 |
| }, |
| { |
| "epoch": 0.018368687752768374, |
| "grad_norm": 0.028730260208249092, |
| "learning_rate": 0.0002, |
| "loss": 0.0154, |
| "step": 10100 |
| }, |
| { |
| "epoch": 0.01838687457232557, |
| "grad_norm": 0.0815989300608635, |
| "learning_rate": 0.0002, |
| "loss": 0.1439, |
| "step": 10110 |
| }, |
| { |
| "epoch": 0.018405061391882766, |
| "grad_norm": 0.16359028220176697, |
| "learning_rate": 0.0002, |
| "loss": 0.0901, |
| "step": 10120 |
| }, |
| { |
| "epoch": 0.018423248211439962, |
| "grad_norm": 0.055030226707458496, |
| "learning_rate": 0.0002, |
| "loss": 0.0812, |
| "step": 10130 |
| }, |
| { |
| "epoch": 0.018441435030997162, |
| "grad_norm": 0.17064853012561798, |
| "learning_rate": 0.0002, |
| "loss": 0.0713, |
| "step": 10140 |
| }, |
| { |
| "epoch": 0.01845962185055436, |
| "grad_norm": 0.024902408942580223, |
| "learning_rate": 0.0002, |
| "loss": 0.0201, |
| "step": 10150 |
| }, |
| { |
| "epoch": 0.018477808670111555, |
| "grad_norm": 0.037377748638391495, |
| "learning_rate": 0.0002, |
| "loss": 0.1394, |
| "step": 10160 |
| }, |
| { |
| "epoch": 0.01849599548966875, |
| "grad_norm": 0.14072410762310028, |
| "learning_rate": 0.0002, |
| "loss": 0.088, |
| "step": 10170 |
| }, |
| { |
| "epoch": 0.018514182309225947, |
| "grad_norm": 0.07339414954185486, |
| "learning_rate": 0.0002, |
| "loss": 0.0739, |
| "step": 10180 |
| }, |
| { |
| "epoch": 0.018532369128783144, |
| "grad_norm": 0.166766956448555, |
| "learning_rate": 0.0002, |
| "loss": 0.0648, |
| "step": 10190 |
| }, |
| { |
| "epoch": 0.01855055594834034, |
| "grad_norm": 0.009605699218809605, |
| "learning_rate": 0.0002, |
| "loss": 0.0148, |
| "step": 10200 |
| }, |
| { |
| "epoch": 0.018568742767897536, |
| "grad_norm": 0.045747216790914536, |
| "learning_rate": 0.0002, |
| "loss": 0.1426, |
| "step": 10210 |
| }, |
| { |
| "epoch": 0.018586929587454733, |
| "grad_norm": 0.09927495568990707, |
| "learning_rate": 0.0002, |
| "loss": 0.0757, |
| "step": 10220 |
| }, |
| { |
| "epoch": 0.01860511640701193, |
| "grad_norm": 0.032050736248493195, |
| "learning_rate": 0.0002, |
| "loss": 0.0732, |
| "step": 10230 |
| }, |
| { |
| "epoch": 0.018623303226569125, |
| "grad_norm": 0.14915086328983307, |
| "learning_rate": 0.0002, |
| "loss": 0.0619, |
| "step": 10240 |
| }, |
| { |
| "epoch": 0.01864149004612632, |
| "grad_norm": 0.019674960523843765, |
| "learning_rate": 0.0002, |
| "loss": 0.0176, |
| "step": 10250 |
| }, |
| { |
| "epoch": 0.018659676865683518, |
| "grad_norm": 0.0990150198340416, |
| "learning_rate": 0.0002, |
| "loss": 0.156, |
| "step": 10260 |
| }, |
| { |
| "epoch": 0.018677863685240714, |
| "grad_norm": 0.1409665048122406, |
| "learning_rate": 0.0002, |
| "loss": 0.0843, |
| "step": 10270 |
| }, |
| { |
| "epoch": 0.01869605050479791, |
| "grad_norm": 0.0232121329754591, |
| "learning_rate": 0.0002, |
| "loss": 0.0712, |
| "step": 10280 |
| }, |
| { |
| "epoch": 0.018714237324355107, |
| "grad_norm": 0.14811532199382782, |
| "learning_rate": 0.0002, |
| "loss": 0.0649, |
| "step": 10290 |
| }, |
| { |
| "epoch": 0.018732424143912303, |
| "grad_norm": 0.025812385603785515, |
| "learning_rate": 0.0002, |
| "loss": 0.0191, |
| "step": 10300 |
| }, |
| { |
| "epoch": 0.0187506109634695, |
| "grad_norm": 0.03710811957716942, |
| "learning_rate": 0.0002, |
| "loss": 0.1323, |
| "step": 10310 |
| }, |
| { |
| "epoch": 0.018768797783026696, |
| "grad_norm": 0.16586032509803772, |
| "learning_rate": 0.0002, |
| "loss": 0.0868, |
| "step": 10320 |
| }, |
| { |
| "epoch": 0.018786984602583892, |
| "grad_norm": 0.09154761582612991, |
| "learning_rate": 0.0002, |
| "loss": 0.0762, |
| "step": 10330 |
| }, |
| { |
| "epoch": 0.01880517142214109, |
| "grad_norm": 0.20400644838809967, |
| "learning_rate": 0.0002, |
| "loss": 0.072, |
| "step": 10340 |
| }, |
| { |
| "epoch": 0.018823358241698285, |
| "grad_norm": 0.04426256939768791, |
| "learning_rate": 0.0002, |
| "loss": 0.0281, |
| "step": 10350 |
| }, |
| { |
| "epoch": 0.01884154506125548, |
| "grad_norm": 0.10118848830461502, |
| "learning_rate": 0.0002, |
| "loss": 0.116, |
| "step": 10360 |
| }, |
| { |
| "epoch": 0.018859731880812677, |
| "grad_norm": 0.11934473365545273, |
| "learning_rate": 0.0002, |
| "loss": 0.0779, |
| "step": 10370 |
| }, |
| { |
| "epoch": 0.018877918700369874, |
| "grad_norm": 0.04116957262158394, |
| "learning_rate": 0.0002, |
| "loss": 0.0811, |
| "step": 10380 |
| }, |
| { |
| "epoch": 0.01889610551992707, |
| "grad_norm": 0.16668827831745148, |
| "learning_rate": 0.0002, |
| "loss": 0.064, |
| "step": 10390 |
| }, |
| { |
| "epoch": 0.018914292339484266, |
| "grad_norm": 0.04703928530216217, |
| "learning_rate": 0.0002, |
| "loss": 0.0189, |
| "step": 10400 |
| }, |
| { |
| "epoch": 0.018932479159041463, |
| "grad_norm": 0.10670439153909683, |
| "learning_rate": 0.0002, |
| "loss": 0.1329, |
| "step": 10410 |
| }, |
| { |
| "epoch": 0.01895066597859866, |
| "grad_norm": 0.033486492931842804, |
| "learning_rate": 0.0002, |
| "loss": 0.0812, |
| "step": 10420 |
| }, |
| { |
| "epoch": 0.018968852798155855, |
| "grad_norm": 0.03778929263353348, |
| "learning_rate": 0.0002, |
| "loss": 0.0739, |
| "step": 10430 |
| }, |
| { |
| "epoch": 0.01898703961771305, |
| "grad_norm": 0.1499231606721878, |
| "learning_rate": 0.0002, |
| "loss": 0.0691, |
| "step": 10440 |
| }, |
| { |
| "epoch": 0.019005226437270248, |
| "grad_norm": 0.020496509969234467, |
| "learning_rate": 0.0002, |
| "loss": 0.0166, |
| "step": 10450 |
| }, |
| { |
| "epoch": 0.019023413256827444, |
| "grad_norm": 0.07973606884479523, |
| "learning_rate": 0.0002, |
| "loss": 0.1647, |
| "step": 10460 |
| }, |
| { |
| "epoch": 0.01904160007638464, |
| "grad_norm": 0.2187214344739914, |
| "learning_rate": 0.0002, |
| "loss": 0.0851, |
| "step": 10470 |
| }, |
| { |
| "epoch": 0.01905978689594184, |
| "grad_norm": 0.05374719575047493, |
| "learning_rate": 0.0002, |
| "loss": 0.0763, |
| "step": 10480 |
| }, |
| { |
| "epoch": 0.019077973715499037, |
| "grad_norm": 0.20388802886009216, |
| "learning_rate": 0.0002, |
| "loss": 0.0661, |
| "step": 10490 |
| }, |
| { |
| "epoch": 0.019096160535056233, |
| "grad_norm": 0.023114027455449104, |
| "learning_rate": 0.0002, |
| "loss": 0.0196, |
| "step": 10500 |
| }, |
| { |
| "epoch": 0.01911434735461343, |
| "grad_norm": 0.07263924926519394, |
| "learning_rate": 0.0002, |
| "loss": 0.1397, |
| "step": 10510 |
| }, |
| { |
| "epoch": 0.019132534174170625, |
| "grad_norm": 0.13590484857559204, |
| "learning_rate": 0.0002, |
| "loss": 0.0866, |
| "step": 10520 |
| }, |
| { |
| "epoch": 0.019150720993727822, |
| "grad_norm": 0.03279007971286774, |
| "learning_rate": 0.0002, |
| "loss": 0.0845, |
| "step": 10530 |
| }, |
| { |
| "epoch": 0.019168907813285018, |
| "grad_norm": 0.16929341852664948, |
| "learning_rate": 0.0002, |
| "loss": 0.0638, |
| "step": 10540 |
| }, |
| { |
| "epoch": 0.019187094632842214, |
| "grad_norm": 0.043504901230335236, |
| "learning_rate": 0.0002, |
| "loss": 0.0217, |
| "step": 10550 |
| }, |
| { |
| "epoch": 0.01920528145239941, |
| "grad_norm": 0.05582214519381523, |
| "learning_rate": 0.0002, |
| "loss": 0.1454, |
| "step": 10560 |
| }, |
| { |
| "epoch": 0.019223468271956607, |
| "grad_norm": 0.12112174928188324, |
| "learning_rate": 0.0002, |
| "loss": 0.0773, |
| "step": 10570 |
| }, |
| { |
| "epoch": 0.019241655091513803, |
| "grad_norm": 0.028584860265254974, |
| "learning_rate": 0.0002, |
| "loss": 0.0766, |
| "step": 10580 |
| }, |
| { |
| "epoch": 0.019259841911071, |
| "grad_norm": 0.14817841351032257, |
| "learning_rate": 0.0002, |
| "loss": 0.07, |
| "step": 10590 |
| }, |
| { |
| "epoch": 0.019278028730628196, |
| "grad_norm": 0.0354049950838089, |
| "learning_rate": 0.0002, |
| "loss": 0.0205, |
| "step": 10600 |
| }, |
| { |
| "epoch": 0.019296215550185392, |
| "grad_norm": 0.0580359622836113, |
| "learning_rate": 0.0002, |
| "loss": 0.126, |
| "step": 10610 |
| }, |
| { |
| "epoch": 0.01931440236974259, |
| "grad_norm": 0.1495518982410431, |
| "learning_rate": 0.0002, |
| "loss": 0.0759, |
| "step": 10620 |
| }, |
| { |
| "epoch": 0.019332589189299785, |
| "grad_norm": 0.029057197272777557, |
| "learning_rate": 0.0002, |
| "loss": 0.0751, |
| "step": 10630 |
| }, |
| { |
| "epoch": 0.01935077600885698, |
| "grad_norm": 0.17057828605175018, |
| "learning_rate": 0.0002, |
| "loss": 0.0675, |
| "step": 10640 |
| }, |
| { |
| "epoch": 0.019368962828414178, |
| "grad_norm": 0.029123524203896523, |
| "learning_rate": 0.0002, |
| "loss": 0.0207, |
| "step": 10650 |
| }, |
| { |
| "epoch": 0.019387149647971374, |
| "grad_norm": 0.06929099559783936, |
| "learning_rate": 0.0002, |
| "loss": 0.1272, |
| "step": 10660 |
| }, |
| { |
| "epoch": 0.01940533646752857, |
| "grad_norm": 0.0806749165058136, |
| "learning_rate": 0.0002, |
| "loss": 0.0825, |
| "step": 10670 |
| }, |
| { |
| "epoch": 0.019423523287085766, |
| "grad_norm": 0.025454839691519737, |
| "learning_rate": 0.0002, |
| "loss": 0.0786, |
| "step": 10680 |
| }, |
| { |
| "epoch": 0.019441710106642963, |
| "grad_norm": 0.1879327893257141, |
| "learning_rate": 0.0002, |
| "loss": 0.0664, |
| "step": 10690 |
| }, |
| { |
| "epoch": 0.01945989692620016, |
| "grad_norm": 0.03334587439894676, |
| "learning_rate": 0.0002, |
| "loss": 0.0217, |
| "step": 10700 |
| }, |
| { |
| "epoch": 0.019478083745757355, |
| "grad_norm": 0.05760979652404785, |
| "learning_rate": 0.0002, |
| "loss": 0.141, |
| "step": 10710 |
| }, |
| { |
| "epoch": 0.01949627056531455, |
| "grad_norm": 0.03565089777112007, |
| "learning_rate": 0.0002, |
| "loss": 0.0849, |
| "step": 10720 |
| }, |
| { |
| "epoch": 0.019514457384871748, |
| "grad_norm": 0.1484966278076172, |
| "learning_rate": 0.0002, |
| "loss": 0.0839, |
| "step": 10730 |
| }, |
| { |
| "epoch": 0.019532644204428944, |
| "grad_norm": 0.22200991213321686, |
| "learning_rate": 0.0002, |
| "loss": 0.0673, |
| "step": 10740 |
| }, |
| { |
| "epoch": 0.01955083102398614, |
| "grad_norm": 0.017915472388267517, |
| "learning_rate": 0.0002, |
| "loss": 0.0203, |
| "step": 10750 |
| }, |
| { |
| "epoch": 0.019569017843543337, |
| "grad_norm": 0.11213338375091553, |
| "learning_rate": 0.0002, |
| "loss": 0.126, |
| "step": 10760 |
| }, |
| { |
| "epoch": 0.019587204663100533, |
| "grad_norm": 0.1563912183046341, |
| "learning_rate": 0.0002, |
| "loss": 0.0803, |
| "step": 10770 |
| }, |
| { |
| "epoch": 0.01960539148265773, |
| "grad_norm": 0.02315036952495575, |
| "learning_rate": 0.0002, |
| "loss": 0.0801, |
| "step": 10780 |
| }, |
| { |
| "epoch": 0.019623578302214926, |
| "grad_norm": 0.14482071995735168, |
| "learning_rate": 0.0002, |
| "loss": 0.0701, |
| "step": 10790 |
| }, |
| { |
| "epoch": 0.019641765121772122, |
| "grad_norm": 0.0369495190680027, |
| "learning_rate": 0.0002, |
| "loss": 0.022, |
| "step": 10800 |
| }, |
| { |
| "epoch": 0.01965995194132932, |
| "grad_norm": 0.0659516304731369, |
| "learning_rate": 0.0002, |
| "loss": 0.1282, |
| "step": 10810 |
| }, |
| { |
| "epoch": 0.019678138760886515, |
| "grad_norm": 0.09046377241611481, |
| "learning_rate": 0.0002, |
| "loss": 0.0748, |
| "step": 10820 |
| }, |
| { |
| "epoch": 0.019696325580443715, |
| "grad_norm": 0.05669049918651581, |
| "learning_rate": 0.0002, |
| "loss": 0.0808, |
| "step": 10830 |
| }, |
| { |
| "epoch": 0.01971451240000091, |
| "grad_norm": 0.16696439683437347, |
| "learning_rate": 0.0002, |
| "loss": 0.0696, |
| "step": 10840 |
| }, |
| { |
| "epoch": 0.019732699219558107, |
| "grad_norm": 0.02596648782491684, |
| "learning_rate": 0.0002, |
| "loss": 0.0189, |
| "step": 10850 |
| }, |
| { |
| "epoch": 0.019750886039115303, |
| "grad_norm": 0.030568787828087807, |
| "learning_rate": 0.0002, |
| "loss": 0.1431, |
| "step": 10860 |
| }, |
| { |
| "epoch": 0.0197690728586725, |
| "grad_norm": 0.11519906669855118, |
| "learning_rate": 0.0002, |
| "loss": 0.0832, |
| "step": 10870 |
| }, |
| { |
| "epoch": 0.019787259678229696, |
| "grad_norm": 0.12018325924873352, |
| "learning_rate": 0.0002, |
| "loss": 0.0769, |
| "step": 10880 |
| }, |
| { |
| "epoch": 0.019805446497786892, |
| "grad_norm": 0.15875691175460815, |
| "learning_rate": 0.0002, |
| "loss": 0.0679, |
| "step": 10890 |
| }, |
| { |
| "epoch": 0.01982363331734409, |
| "grad_norm": 0.02812560275197029, |
| "learning_rate": 0.0002, |
| "loss": 0.0236, |
| "step": 10900 |
| }, |
| { |
| "epoch": 0.019841820136901285, |
| "grad_norm": 0.039342913776636124, |
| "learning_rate": 0.0002, |
| "loss": 0.1433, |
| "step": 10910 |
| }, |
| { |
| "epoch": 0.01986000695645848, |
| "grad_norm": 0.1218978762626648, |
| "learning_rate": 0.0002, |
| "loss": 0.0785, |
| "step": 10920 |
| }, |
| { |
| "epoch": 0.019878193776015678, |
| "grad_norm": 0.02437124028801918, |
| "learning_rate": 0.0002, |
| "loss": 0.081, |
| "step": 10930 |
| }, |
| { |
| "epoch": 0.019896380595572874, |
| "grad_norm": 0.16295987367630005, |
| "learning_rate": 0.0002, |
| "loss": 0.0615, |
| "step": 10940 |
| }, |
| { |
| "epoch": 0.01991456741513007, |
| "grad_norm": 0.03147517144680023, |
| "learning_rate": 0.0002, |
| "loss": 0.0167, |
| "step": 10950 |
| }, |
| { |
| "epoch": 0.019932754234687267, |
| "grad_norm": 0.051139310002326965, |
| "learning_rate": 0.0002, |
| "loss": 0.1486, |
| "step": 10960 |
| }, |
| { |
| "epoch": 0.019950941054244463, |
| "grad_norm": 0.10385333746671677, |
| "learning_rate": 0.0002, |
| "loss": 0.0835, |
| "step": 10970 |
| }, |
| { |
| "epoch": 0.01996912787380166, |
| "grad_norm": 0.029570510610938072, |
| "learning_rate": 0.0002, |
| "loss": 0.0808, |
| "step": 10980 |
| }, |
| { |
| "epoch": 0.019987314693358856, |
| "grad_norm": 0.1457994282245636, |
| "learning_rate": 0.0002, |
| "loss": 0.062, |
| "step": 10990 |
| }, |
| { |
| "epoch": 0.020005501512916052, |
| "grad_norm": 0.013582763262093067, |
| "learning_rate": 0.0002, |
| "loss": 0.0149, |
| "step": 11000 |
| }, |
| { |
| "epoch": 0.020023688332473248, |
| "grad_norm": 0.13736847043037415, |
| "learning_rate": 0.0002, |
| "loss": 0.164, |
| "step": 11010 |
| }, |
| { |
| "epoch": 0.020041875152030444, |
| "grad_norm": 0.146778866648674, |
| "learning_rate": 0.0002, |
| "loss": 0.0865, |
| "step": 11020 |
| }, |
| { |
| "epoch": 0.02006006197158764, |
| "grad_norm": 0.09848106652498245, |
| "learning_rate": 0.0002, |
| "loss": 0.0785, |
| "step": 11030 |
| }, |
| { |
| "epoch": 0.020078248791144837, |
| "grad_norm": 0.19981160759925842, |
| "learning_rate": 0.0002, |
| "loss": 0.0738, |
| "step": 11040 |
| }, |
| { |
| "epoch": 0.020096435610702033, |
| "grad_norm": 0.0248726736754179, |
| "learning_rate": 0.0002, |
| "loss": 0.0232, |
| "step": 11050 |
| }, |
| { |
| "epoch": 0.02011462243025923, |
| "grad_norm": 0.09688897430896759, |
| "learning_rate": 0.0002, |
| "loss": 0.1361, |
| "step": 11060 |
| }, |
| { |
| "epoch": 0.020132809249816426, |
| "grad_norm": 0.09953918308019638, |
| "learning_rate": 0.0002, |
| "loss": 0.0815, |
| "step": 11070 |
| }, |
| { |
| "epoch": 0.020150996069373622, |
| "grad_norm": 0.05801590532064438, |
| "learning_rate": 0.0002, |
| "loss": 0.0794, |
| "step": 11080 |
| }, |
| { |
| "epoch": 0.02016918288893082, |
| "grad_norm": 0.2029600441455841, |
| "learning_rate": 0.0002, |
| "loss": 0.0661, |
| "step": 11090 |
| }, |
| { |
| "epoch": 0.020187369708488015, |
| "grad_norm": 0.026677627116441727, |
| "learning_rate": 0.0002, |
| "loss": 0.022, |
| "step": 11100 |
| }, |
| { |
| "epoch": 0.02020555652804521, |
| "grad_norm": 0.054907217621803284, |
| "learning_rate": 0.0002, |
| "loss": 0.1356, |
| "step": 11110 |
| }, |
| { |
| "epoch": 0.020223743347602408, |
| "grad_norm": 0.16302120685577393, |
| "learning_rate": 0.0002, |
| "loss": 0.0721, |
| "step": 11120 |
| }, |
| { |
| "epoch": 0.020241930167159604, |
| "grad_norm": 0.03393812105059624, |
| "learning_rate": 0.0002, |
| "loss": 0.0733, |
| "step": 11130 |
| }, |
| { |
| "epoch": 0.0202601169867168, |
| "grad_norm": 0.16455304622650146, |
| "learning_rate": 0.0002, |
| "loss": 0.062, |
| "step": 11140 |
| }, |
| { |
| "epoch": 0.020278303806273996, |
| "grad_norm": 0.026239484548568726, |
| "learning_rate": 0.0002, |
| "loss": 0.0148, |
| "step": 11150 |
| }, |
| { |
| "epoch": 0.020296490625831193, |
| "grad_norm": 0.10048040002584457, |
| "learning_rate": 0.0002, |
| "loss": 0.1398, |
| "step": 11160 |
| }, |
| { |
| "epoch": 0.020314677445388393, |
| "grad_norm": 0.14221400022506714, |
| "learning_rate": 0.0002, |
| "loss": 0.0782, |
| "step": 11170 |
| }, |
| { |
| "epoch": 0.02033286426494559, |
| "grad_norm": 0.08432412147521973, |
| "learning_rate": 0.0002, |
| "loss": 0.0807, |
| "step": 11180 |
| }, |
| { |
| "epoch": 0.020351051084502785, |
| "grad_norm": 0.172295480966568, |
| "learning_rate": 0.0002, |
| "loss": 0.0655, |
| "step": 11190 |
| }, |
| { |
| "epoch": 0.02036923790405998, |
| "grad_norm": 0.023976756259799004, |
| "learning_rate": 0.0002, |
| "loss": 0.0218, |
| "step": 11200 |
| }, |
| { |
| "epoch": 0.020387424723617178, |
| "grad_norm": 0.03286349028348923, |
| "learning_rate": 0.0002, |
| "loss": 0.1441, |
| "step": 11210 |
| }, |
| { |
| "epoch": 0.020405611543174374, |
| "grad_norm": 0.04403531551361084, |
| "learning_rate": 0.0002, |
| "loss": 0.0825, |
| "step": 11220 |
| }, |
| { |
| "epoch": 0.02042379836273157, |
| "grad_norm": 0.0398452989757061, |
| "learning_rate": 0.0002, |
| "loss": 0.0755, |
| "step": 11230 |
| }, |
| { |
| "epoch": 0.020441985182288767, |
| "grad_norm": 0.15185104310512543, |
| "learning_rate": 0.0002, |
| "loss": 0.0591, |
| "step": 11240 |
| }, |
| { |
| "epoch": 0.020460172001845963, |
| "grad_norm": 0.005839187186211348, |
| "learning_rate": 0.0002, |
| "loss": 0.0172, |
| "step": 11250 |
| }, |
| { |
| "epoch": 0.02047835882140316, |
| "grad_norm": 0.031195368617773056, |
| "learning_rate": 0.0002, |
| "loss": 0.1594, |
| "step": 11260 |
| }, |
| { |
| "epoch": 0.020496545640960356, |
| "grad_norm": 0.1997426599264145, |
| "learning_rate": 0.0002, |
| "loss": 0.0797, |
| "step": 11270 |
| }, |
| { |
| "epoch": 0.020514732460517552, |
| "grad_norm": 0.03075752593576908, |
| "learning_rate": 0.0002, |
| "loss": 0.0798, |
| "step": 11280 |
| }, |
| { |
| "epoch": 0.02053291928007475, |
| "grad_norm": 0.17717675864696503, |
| "learning_rate": 0.0002, |
| "loss": 0.0654, |
| "step": 11290 |
| }, |
| { |
| "epoch": 0.020551106099631945, |
| "grad_norm": 0.036260057240724564, |
| "learning_rate": 0.0002, |
| "loss": 0.0196, |
| "step": 11300 |
| }, |
| { |
| "epoch": 0.02056929291918914, |
| "grad_norm": 0.11961262673139572, |
| "learning_rate": 0.0002, |
| "loss": 0.1313, |
| "step": 11310 |
| }, |
| { |
| "epoch": 0.020587479738746337, |
| "grad_norm": 0.12344212830066681, |
| "learning_rate": 0.0002, |
| "loss": 0.0798, |
| "step": 11320 |
| }, |
| { |
| "epoch": 0.020605666558303534, |
| "grad_norm": 0.12796273827552795, |
| "learning_rate": 0.0002, |
| "loss": 0.0693, |
| "step": 11330 |
| }, |
| { |
| "epoch": 0.02062385337786073, |
| "grad_norm": 0.12038332223892212, |
| "learning_rate": 0.0002, |
| "loss": 0.0637, |
| "step": 11340 |
| }, |
| { |
| "epoch": 0.020642040197417926, |
| "grad_norm": 0.013724497519433498, |
| "learning_rate": 0.0002, |
| "loss": 0.0134, |
| "step": 11350 |
| }, |
| { |
| "epoch": 0.020660227016975122, |
| "grad_norm": 0.030014917254447937, |
| "learning_rate": 0.0002, |
| "loss": 0.1355, |
| "step": 11360 |
| }, |
| { |
| "epoch": 0.02067841383653232, |
| "grad_norm": 0.05455614998936653, |
| "learning_rate": 0.0002, |
| "loss": 0.0824, |
| "step": 11370 |
| }, |
| { |
| "epoch": 0.020696600656089515, |
| "grad_norm": 0.09036605060100555, |
| "learning_rate": 0.0002, |
| "loss": 0.0712, |
| "step": 11380 |
| }, |
| { |
| "epoch": 0.02071478747564671, |
| "grad_norm": 0.15607796609401703, |
| "learning_rate": 0.0002, |
| "loss": 0.0613, |
| "step": 11390 |
| }, |
| { |
| "epoch": 0.020732974295203908, |
| "grad_norm": 0.029900453984737396, |
| "learning_rate": 0.0002, |
| "loss": 0.0216, |
| "step": 11400 |
| }, |
| { |
| "epoch": 0.020751161114761104, |
| "grad_norm": 0.06108042970299721, |
| "learning_rate": 0.0002, |
| "loss": 0.1223, |
| "step": 11410 |
| }, |
| { |
| "epoch": 0.0207693479343183, |
| "grad_norm": 0.052377600222826004, |
| "learning_rate": 0.0002, |
| "loss": 0.0795, |
| "step": 11420 |
| }, |
| { |
| "epoch": 0.020787534753875497, |
| "grad_norm": 0.063735231757164, |
| "learning_rate": 0.0002, |
| "loss": 0.0746, |
| "step": 11430 |
| }, |
| { |
| "epoch": 0.020805721573432693, |
| "grad_norm": 0.16977328062057495, |
| "learning_rate": 0.0002, |
| "loss": 0.0634, |
| "step": 11440 |
| }, |
| { |
| "epoch": 0.02082390839298989, |
| "grad_norm": 0.04451785981655121, |
| "learning_rate": 0.0002, |
| "loss": 0.0298, |
| "step": 11450 |
| }, |
| { |
| "epoch": 0.020842095212547086, |
| "grad_norm": 1.1584863662719727, |
| "learning_rate": 0.0002, |
| "loss": 0.1133, |
| "step": 11460 |
| }, |
| { |
| "epoch": 0.020860282032104282, |
| "grad_norm": 0.09867832064628601, |
| "learning_rate": 0.0002, |
| "loss": 0.0774, |
| "step": 11470 |
| }, |
| { |
| "epoch": 0.020878468851661478, |
| "grad_norm": 0.05493566766381264, |
| "learning_rate": 0.0002, |
| "loss": 0.0752, |
| "step": 11480 |
| }, |
| { |
| "epoch": 0.020896655671218674, |
| "grad_norm": 0.2149093896150589, |
| "learning_rate": 0.0002, |
| "loss": 0.0682, |
| "step": 11490 |
| }, |
| { |
| "epoch": 0.02091484249077587, |
| "grad_norm": 0.02243107184767723, |
| "learning_rate": 0.0002, |
| "loss": 0.0191, |
| "step": 11500 |
| }, |
| { |
| "epoch": 0.02093302931033307, |
| "grad_norm": 0.27817150950431824, |
| "learning_rate": 0.0002, |
| "loss": 0.1658, |
| "step": 11510 |
| }, |
| { |
| "epoch": 0.020951216129890267, |
| "grad_norm": 0.14467410743236542, |
| "learning_rate": 0.0002, |
| "loss": 0.083, |
| "step": 11520 |
| }, |
| { |
| "epoch": 0.020969402949447463, |
| "grad_norm": 0.1027064323425293, |
| "learning_rate": 0.0002, |
| "loss": 0.0825, |
| "step": 11530 |
| }, |
| { |
| "epoch": 0.02098758976900466, |
| "grad_norm": 0.2156657725572586, |
| "learning_rate": 0.0002, |
| "loss": 0.0694, |
| "step": 11540 |
| }, |
| { |
| "epoch": 0.021005776588561856, |
| "grad_norm": 0.023746902123093605, |
| "learning_rate": 0.0002, |
| "loss": 0.024, |
| "step": 11550 |
| }, |
| { |
| "epoch": 0.021023963408119052, |
| "grad_norm": 0.19738778471946716, |
| "learning_rate": 0.0002, |
| "loss": 0.1473, |
| "step": 11560 |
| }, |
| { |
| "epoch": 0.02104215022767625, |
| "grad_norm": 0.19759760797023773, |
| "learning_rate": 0.0002, |
| "loss": 0.078, |
| "step": 11570 |
| }, |
| { |
| "epoch": 0.021060337047233445, |
| "grad_norm": 9.88092041015625, |
| "learning_rate": 0.0002, |
| "loss": 0.0866, |
| "step": 11580 |
| }, |
| { |
| "epoch": 0.02107852386679064, |
| "grad_norm": 0.22301238775253296, |
| "learning_rate": 0.0002, |
| "loss": 0.0685, |
| "step": 11590 |
| }, |
| { |
| "epoch": 0.021096710686347837, |
| "grad_norm": 0.023191403597593307, |
| "learning_rate": 0.0002, |
| "loss": 0.0468, |
| "step": 11600 |
| }, |
| { |
| "epoch": 0.021114897505905034, |
| "grad_norm": 0.10442623496055603, |
| "learning_rate": 0.0002, |
| "loss": 0.2046, |
| "step": 11610 |
| }, |
| { |
| "epoch": 0.02113308432546223, |
| "grad_norm": 0.18771864473819733, |
| "learning_rate": 0.0002, |
| "loss": 0.0805, |
| "step": 11620 |
| }, |
| { |
| "epoch": 0.021151271145019426, |
| "grad_norm": 0.05516243353486061, |
| "learning_rate": 0.0002, |
| "loss": 0.0795, |
| "step": 11630 |
| }, |
| { |
| "epoch": 0.021169457964576623, |
| "grad_norm": 0.21308554708957672, |
| "learning_rate": 0.0002, |
| "loss": 0.0725, |
| "step": 11640 |
| }, |
| { |
| "epoch": 0.02118764478413382, |
| "grad_norm": 0.010607315227389336, |
| "learning_rate": 0.0002, |
| "loss": 0.0241, |
| "step": 11650 |
| }, |
| { |
| "epoch": 0.021205831603691015, |
| "grad_norm": 0.0542677640914917, |
| "learning_rate": 0.0002, |
| "loss": 0.1648, |
| "step": 11660 |
| }, |
| { |
| "epoch": 0.02122401842324821, |
| "grad_norm": 0.11239166557788849, |
| "learning_rate": 0.0002, |
| "loss": 0.0825, |
| "step": 11670 |
| }, |
| { |
| "epoch": 0.021242205242805408, |
| "grad_norm": 0.032700493931770325, |
| "learning_rate": 0.0002, |
| "loss": 0.0727, |
| "step": 11680 |
| }, |
| { |
| "epoch": 0.021260392062362604, |
| "grad_norm": 0.2005159705877304, |
| "learning_rate": 0.0002, |
| "loss": 0.0708, |
| "step": 11690 |
| }, |
| { |
| "epoch": 0.0212785788819198, |
| "grad_norm": 0.01741277053952217, |
| "learning_rate": 0.0002, |
| "loss": 0.0232, |
| "step": 11700 |
| }, |
| { |
| "epoch": 0.021296765701476997, |
| "grad_norm": 0.04048267379403114, |
| "learning_rate": 0.0002, |
| "loss": 0.1403, |
| "step": 11710 |
| }, |
| { |
| "epoch": 0.021314952521034193, |
| "grad_norm": 0.18796616792678833, |
| "learning_rate": 0.0002, |
| "loss": 0.0886, |
| "step": 11720 |
| }, |
| { |
| "epoch": 0.02133313934059139, |
| "grad_norm": 0.06360754370689392, |
| "learning_rate": 0.0002, |
| "loss": 0.0731, |
| "step": 11730 |
| }, |
| { |
| "epoch": 0.021351326160148586, |
| "grad_norm": 0.14168913662433624, |
| "learning_rate": 0.0002, |
| "loss": 0.0622, |
| "step": 11740 |
| }, |
| { |
| "epoch": 0.021369512979705782, |
| "grad_norm": 0.012988853268325329, |
| "learning_rate": 0.0002, |
| "loss": 0.0144, |
| "step": 11750 |
| }, |
| { |
| "epoch": 0.02138769979926298, |
| "grad_norm": 0.09176674485206604, |
| "learning_rate": 0.0002, |
| "loss": 0.1574, |
| "step": 11760 |
| }, |
| { |
| "epoch": 0.021405886618820175, |
| "grad_norm": 0.11934395134449005, |
| "learning_rate": 0.0002, |
| "loss": 0.079, |
| "step": 11770 |
| }, |
| { |
| "epoch": 0.02142407343837737, |
| "grad_norm": 0.11853605508804321, |
| "learning_rate": 0.0002, |
| "loss": 0.076, |
| "step": 11780 |
| }, |
| { |
| "epoch": 0.021442260257934567, |
| "grad_norm": 0.1625816971063614, |
| "learning_rate": 0.0002, |
| "loss": 0.0649, |
| "step": 11790 |
| }, |
| { |
| "epoch": 0.021460447077491764, |
| "grad_norm": 0.023221928626298904, |
| "learning_rate": 0.0002, |
| "loss": 0.0228, |
| "step": 11800 |
| }, |
| { |
| "epoch": 0.02147863389704896, |
| "grad_norm": 0.0494253933429718, |
| "learning_rate": 0.0002, |
| "loss": 0.1418, |
| "step": 11810 |
| }, |
| { |
| "epoch": 0.021496820716606156, |
| "grad_norm": 0.18250688910484314, |
| "learning_rate": 0.0002, |
| "loss": 0.0827, |
| "step": 11820 |
| }, |
| { |
| "epoch": 0.021515007536163352, |
| "grad_norm": 0.13340160250663757, |
| "learning_rate": 0.0002, |
| "loss": 0.0794, |
| "step": 11830 |
| }, |
| { |
| "epoch": 0.02153319435572055, |
| "grad_norm": 0.15497778356075287, |
| "learning_rate": 0.0002, |
| "loss": 0.0613, |
| "step": 11840 |
| }, |
| { |
| "epoch": 0.021551381175277745, |
| "grad_norm": 0.03259354829788208, |
| "learning_rate": 0.0002, |
| "loss": 0.023, |
| "step": 11850 |
| }, |
| { |
| "epoch": 0.021569567994834945, |
| "grad_norm": 0.09126435220241547, |
| "learning_rate": 0.0002, |
| "loss": 0.1235, |
| "step": 11860 |
| }, |
| { |
| "epoch": 0.02158775481439214, |
| "grad_norm": 0.13455496728420258, |
| "learning_rate": 0.0002, |
| "loss": 0.0806, |
| "step": 11870 |
| }, |
| { |
| "epoch": 0.021605941633949338, |
| "grad_norm": 0.10817539691925049, |
| "learning_rate": 0.0002, |
| "loss": 0.0829, |
| "step": 11880 |
| }, |
| { |
| "epoch": 0.021624128453506534, |
| "grad_norm": 0.1913878321647644, |
| "learning_rate": 0.0002, |
| "loss": 0.0636, |
| "step": 11890 |
| }, |
| { |
| "epoch": 0.02164231527306373, |
| "grad_norm": 0.025634530931711197, |
| "learning_rate": 0.0002, |
| "loss": 0.0216, |
| "step": 11900 |
| }, |
| { |
| "epoch": 0.021660502092620926, |
| "grad_norm": 0.10507725924253464, |
| "learning_rate": 0.0002, |
| "loss": 0.1326, |
| "step": 11910 |
| }, |
| { |
| "epoch": 0.021678688912178123, |
| "grad_norm": 0.09721452742815018, |
| "learning_rate": 0.0002, |
| "loss": 0.0857, |
| "step": 11920 |
| }, |
| { |
| "epoch": 0.02169687573173532, |
| "grad_norm": 0.028759269043803215, |
| "learning_rate": 0.0002, |
| "loss": 0.0751, |
| "step": 11930 |
| }, |
| { |
| "epoch": 0.021715062551292515, |
| "grad_norm": 0.17618104815483093, |
| "learning_rate": 0.0002, |
| "loss": 0.062, |
| "step": 11940 |
| }, |
| { |
| "epoch": 0.02173324937084971, |
| "grad_norm": 0.02503124624490738, |
| "learning_rate": 0.0002, |
| "loss": 0.0182, |
| "step": 11950 |
| }, |
| { |
| "epoch": 0.021751436190406908, |
| "grad_norm": 0.10976126044988632, |
| "learning_rate": 0.0002, |
| "loss": 0.1564, |
| "step": 11960 |
| }, |
| { |
| "epoch": 0.021769623009964104, |
| "grad_norm": 0.0833989605307579, |
| "learning_rate": 0.0002, |
| "loss": 0.0784, |
| "step": 11970 |
| }, |
| { |
| "epoch": 0.0217878098295213, |
| "grad_norm": 0.06359647959470749, |
| "learning_rate": 0.0002, |
| "loss": 0.0795, |
| "step": 11980 |
| }, |
| { |
| "epoch": 0.021805996649078497, |
| "grad_norm": 0.1677824705839157, |
| "learning_rate": 0.0002, |
| "loss": 0.0699, |
| "step": 11990 |
| }, |
| { |
| "epoch": 0.021824183468635693, |
| "grad_norm": 0.018009621649980545, |
| "learning_rate": 0.0002, |
| "loss": 0.0185, |
| "step": 12000 |
| }, |
| { |
| "epoch": 0.02184237028819289, |
| "grad_norm": 0.12256644666194916, |
| "learning_rate": 0.0002, |
| "loss": 0.1839, |
| "step": 12010 |
| }, |
| { |
| "epoch": 0.021860557107750086, |
| "grad_norm": 0.11677028983831406, |
| "learning_rate": 0.0002, |
| "loss": 0.0829, |
| "step": 12020 |
| }, |
| { |
| "epoch": 0.021878743927307282, |
| "grad_norm": 0.12885046005249023, |
| "learning_rate": 0.0002, |
| "loss": 0.0812, |
| "step": 12030 |
| }, |
| { |
| "epoch": 0.02189693074686448, |
| "grad_norm": 0.1394425481557846, |
| "learning_rate": 0.0002, |
| "loss": 0.0668, |
| "step": 12040 |
| }, |
| { |
| "epoch": 0.021915117566421675, |
| "grad_norm": 0.024974076077342033, |
| "learning_rate": 0.0002, |
| "loss": 0.0192, |
| "step": 12050 |
| }, |
| { |
| "epoch": 0.02193330438597887, |
| "grad_norm": 0.11284986138343811, |
| "learning_rate": 0.0002, |
| "loss": 0.1492, |
| "step": 12060 |
| }, |
| { |
| "epoch": 0.021951491205536067, |
| "grad_norm": 0.0605492927134037, |
| "learning_rate": 0.0002, |
| "loss": 0.0787, |
| "step": 12070 |
| }, |
| { |
| "epoch": 0.021969678025093264, |
| "grad_norm": 0.040298718959093094, |
| "learning_rate": 0.0002, |
| "loss": 0.0778, |
| "step": 12080 |
| }, |
| { |
| "epoch": 0.02198786484465046, |
| "grad_norm": 0.1555332988500595, |
| "learning_rate": 0.0002, |
| "loss": 0.0683, |
| "step": 12090 |
| }, |
| { |
| "epoch": 0.022006051664207656, |
| "grad_norm": 0.022474724799394608, |
| "learning_rate": 0.0002, |
| "loss": 0.0139, |
| "step": 12100 |
| }, |
| { |
| "epoch": 0.022024238483764853, |
| "grad_norm": 0.08212363719940186, |
| "learning_rate": 0.0002, |
| "loss": 0.1513, |
| "step": 12110 |
| }, |
| { |
| "epoch": 0.02204242530332205, |
| "grad_norm": 0.16297335922718048, |
| "learning_rate": 0.0002, |
| "loss": 0.087, |
| "step": 12120 |
| }, |
| { |
| "epoch": 0.022060612122879245, |
| "grad_norm": 0.026817265897989273, |
| "learning_rate": 0.0002, |
| "loss": 0.0763, |
| "step": 12130 |
| }, |
| { |
| "epoch": 0.02207879894243644, |
| "grad_norm": 0.15199647843837738, |
| "learning_rate": 0.0002, |
| "loss": 0.0632, |
| "step": 12140 |
| }, |
| { |
| "epoch": 0.022096985761993638, |
| "grad_norm": 0.021619049832224846, |
| "learning_rate": 0.0002, |
| "loss": 0.0221, |
| "step": 12150 |
| }, |
| { |
| "epoch": 0.022115172581550834, |
| "grad_norm": 0.071327805519104, |
| "learning_rate": 0.0002, |
| "loss": 0.138, |
| "step": 12160 |
| }, |
| { |
| "epoch": 0.02213335940110803, |
| "grad_norm": 0.07506705075502396, |
| "learning_rate": 0.0002, |
| "loss": 0.0802, |
| "step": 12170 |
| }, |
| { |
| "epoch": 0.022151546220665227, |
| "grad_norm": 0.05193526670336723, |
| "learning_rate": 0.0002, |
| "loss": 0.0726, |
| "step": 12180 |
| }, |
| { |
| "epoch": 0.022169733040222423, |
| "grad_norm": 0.125730961561203, |
| "learning_rate": 0.0002, |
| "loss": 0.0658, |
| "step": 12190 |
| }, |
| { |
| "epoch": 0.022187919859779623, |
| "grad_norm": 0.01939002424478531, |
| "learning_rate": 0.0002, |
| "loss": 0.0174, |
| "step": 12200 |
| }, |
| { |
| "epoch": 0.02220610667933682, |
| "grad_norm": 0.05645585432648659, |
| "learning_rate": 0.0002, |
| "loss": 0.1447, |
| "step": 12210 |
| }, |
| { |
| "epoch": 0.022224293498894016, |
| "grad_norm": 0.12416274845600128, |
| "learning_rate": 0.0002, |
| "loss": 0.0727, |
| "step": 12220 |
| }, |
| { |
| "epoch": 0.022242480318451212, |
| "grad_norm": 0.05618545040488243, |
| "learning_rate": 0.0002, |
| "loss": 0.0801, |
| "step": 12230 |
| }, |
| { |
| "epoch": 0.022260667138008408, |
| "grad_norm": 0.12334968894720078, |
| "learning_rate": 0.0002, |
| "loss": 0.0598, |
| "step": 12240 |
| }, |
| { |
| "epoch": 0.022278853957565604, |
| "grad_norm": 0.024331970140337944, |
| "learning_rate": 0.0002, |
| "loss": 0.0179, |
| "step": 12250 |
| }, |
| { |
| "epoch": 0.0222970407771228, |
| "grad_norm": 0.05856281518936157, |
| "learning_rate": 0.0002, |
| "loss": 0.126, |
| "step": 12260 |
| }, |
| { |
| "epoch": 0.022315227596679997, |
| "grad_norm": 0.07432300597429276, |
| "learning_rate": 0.0002, |
| "loss": 0.0839, |
| "step": 12270 |
| }, |
| { |
| "epoch": 0.022333414416237193, |
| "grad_norm": 0.07249715179204941, |
| "learning_rate": 0.0002, |
| "loss": 0.0815, |
| "step": 12280 |
| }, |
| { |
| "epoch": 0.02235160123579439, |
| "grad_norm": 0.14335612952709198, |
| "learning_rate": 0.0002, |
| "loss": 0.0605, |
| "step": 12290 |
| }, |
| { |
| "epoch": 0.022369788055351586, |
| "grad_norm": 0.03603110462427139, |
| "learning_rate": 0.0002, |
| "loss": 0.0185, |
| "step": 12300 |
| }, |
| { |
| "epoch": 0.022387974874908782, |
| "grad_norm": 0.08532091230154037, |
| "learning_rate": 0.0002, |
| "loss": 0.1339, |
| "step": 12310 |
| }, |
| { |
| "epoch": 0.02240616169446598, |
| "grad_norm": 0.13663236796855927, |
| "learning_rate": 0.0002, |
| "loss": 0.0761, |
| "step": 12320 |
| }, |
| { |
| "epoch": 0.022424348514023175, |
| "grad_norm": 0.10088011622428894, |
| "learning_rate": 0.0002, |
| "loss": 0.0737, |
| "step": 12330 |
| }, |
| { |
| "epoch": 0.02244253533358037, |
| "grad_norm": 0.17186152935028076, |
| "learning_rate": 0.0002, |
| "loss": 0.0661, |
| "step": 12340 |
| }, |
| { |
| "epoch": 0.022460722153137568, |
| "grad_norm": 0.01941334828734398, |
| "learning_rate": 0.0002, |
| "loss": 0.0135, |
| "step": 12350 |
| }, |
| { |
| "epoch": 0.022478908972694764, |
| "grad_norm": 0.12438862770795822, |
| "learning_rate": 0.0002, |
| "loss": 0.1474, |
| "step": 12360 |
| }, |
| { |
| "epoch": 0.02249709579225196, |
| "grad_norm": 0.08050791174173355, |
| "learning_rate": 0.0002, |
| "loss": 0.0792, |
| "step": 12370 |
| }, |
| { |
| "epoch": 0.022515282611809156, |
| "grad_norm": 0.04660952091217041, |
| "learning_rate": 0.0002, |
| "loss": 0.0826, |
| "step": 12380 |
| }, |
| { |
| "epoch": 0.022533469431366353, |
| "grad_norm": 0.16433311998844147, |
| "learning_rate": 0.0002, |
| "loss": 0.0691, |
| "step": 12390 |
| }, |
| { |
| "epoch": 0.02255165625092355, |
| "grad_norm": 0.04376552626490593, |
| "learning_rate": 0.0002, |
| "loss": 0.0219, |
| "step": 12400 |
| }, |
| { |
| "epoch": 0.022569843070480745, |
| "grad_norm": 0.06648654490709305, |
| "learning_rate": 0.0002, |
| "loss": 0.1346, |
| "step": 12410 |
| }, |
| { |
| "epoch": 0.02258802989003794, |
| "grad_norm": 0.11318199336528778, |
| "learning_rate": 0.0002, |
| "loss": 0.0736, |
| "step": 12420 |
| }, |
| { |
| "epoch": 0.022606216709595138, |
| "grad_norm": 0.0922408252954483, |
| "learning_rate": 0.0002, |
| "loss": 0.0819, |
| "step": 12430 |
| }, |
| { |
| "epoch": 0.022624403529152334, |
| "grad_norm": 0.1696896106004715, |
| "learning_rate": 0.0002, |
| "loss": 0.0642, |
| "step": 12440 |
| }, |
| { |
| "epoch": 0.02264259034870953, |
| "grad_norm": 0.03212421387434006, |
| "learning_rate": 0.0002, |
| "loss": 0.0247, |
| "step": 12450 |
| }, |
| { |
| "epoch": 0.022660777168266727, |
| "grad_norm": 0.12295889109373093, |
| "learning_rate": 0.0002, |
| "loss": 0.1504, |
| "step": 12460 |
| }, |
| { |
| "epoch": 0.022678963987823923, |
| "grad_norm": 0.10351194441318512, |
| "learning_rate": 0.0002, |
| "loss": 0.0757, |
| "step": 12470 |
| }, |
| { |
| "epoch": 0.02269715080738112, |
| "grad_norm": 0.022580118849873543, |
| "learning_rate": 0.0002, |
| "loss": 0.0756, |
| "step": 12480 |
| }, |
| { |
| "epoch": 0.022715337626938316, |
| "grad_norm": 0.16330066323280334, |
| "learning_rate": 0.0002, |
| "loss": 0.0645, |
| "step": 12490 |
| }, |
| { |
| "epoch": 0.022733524446495512, |
| "grad_norm": 0.021431026980280876, |
| "learning_rate": 0.0002, |
| "loss": 0.0224, |
| "step": 12500 |
| }, |
| { |
| "epoch": 0.02275171126605271, |
| "grad_norm": 0.053853604942560196, |
| "learning_rate": 0.0002, |
| "loss": 0.1304, |
| "step": 12510 |
| }, |
| { |
| "epoch": 0.022769898085609905, |
| "grad_norm": 0.129705548286438, |
| "learning_rate": 0.0002, |
| "loss": 0.0799, |
| "step": 12520 |
| }, |
| { |
| "epoch": 0.0227880849051671, |
| "grad_norm": 0.027473529800772667, |
| "learning_rate": 0.0002, |
| "loss": 0.0771, |
| "step": 12530 |
| }, |
| { |
| "epoch": 0.0228062717247243, |
| "grad_norm": 0.2045305222272873, |
| "learning_rate": 0.0002, |
| "loss": 0.0615, |
| "step": 12540 |
| }, |
| { |
| "epoch": 0.022824458544281497, |
| "grad_norm": 0.041042860597372055, |
| "learning_rate": 0.0002, |
| "loss": 0.026, |
| "step": 12550 |
| }, |
| { |
| "epoch": 0.022842645363838694, |
| "grad_norm": 0.05624527484178543, |
| "learning_rate": 0.0002, |
| "loss": 0.1327, |
| "step": 12560 |
| }, |
| { |
| "epoch": 0.02286083218339589, |
| "grad_norm": 0.09647081047296524, |
| "learning_rate": 0.0002, |
| "loss": 0.0758, |
| "step": 12570 |
| }, |
| { |
| "epoch": 0.022879019002953086, |
| "grad_norm": 0.03362264856696129, |
| "learning_rate": 0.0002, |
| "loss": 0.076, |
| "step": 12580 |
| }, |
| { |
| "epoch": 0.022897205822510282, |
| "grad_norm": 0.1459503322839737, |
| "learning_rate": 0.0002, |
| "loss": 0.0603, |
| "step": 12590 |
| }, |
| { |
| "epoch": 0.02291539264206748, |
| "grad_norm": 0.025729481130838394, |
| "learning_rate": 0.0002, |
| "loss": 0.0196, |
| "step": 12600 |
| }, |
| { |
| "epoch": 0.022933579461624675, |
| "grad_norm": 0.19940927624702454, |
| "learning_rate": 0.0002, |
| "loss": 0.1298, |
| "step": 12610 |
| }, |
| { |
| "epoch": 0.02295176628118187, |
| "grad_norm": 0.13796600699424744, |
| "learning_rate": 0.0002, |
| "loss": 0.086, |
| "step": 12620 |
| }, |
| { |
| "epoch": 0.022969953100739068, |
| "grad_norm": 0.08884158730506897, |
| "learning_rate": 0.0002, |
| "loss": 0.0808, |
| "step": 12630 |
| }, |
| { |
| "epoch": 0.022988139920296264, |
| "grad_norm": 0.15814751386642456, |
| "learning_rate": 0.0002, |
| "loss": 0.0658, |
| "step": 12640 |
| }, |
| { |
| "epoch": 0.02300632673985346, |
| "grad_norm": 0.03503837063908577, |
| "learning_rate": 0.0002, |
| "loss": 0.0232, |
| "step": 12650 |
| }, |
| { |
| "epoch": 0.023024513559410657, |
| "grad_norm": 0.09701854735612869, |
| "learning_rate": 0.0002, |
| "loss": 0.136, |
| "step": 12660 |
| }, |
| { |
| "epoch": 0.023042700378967853, |
| "grad_norm": 0.13909977674484253, |
| "learning_rate": 0.0002, |
| "loss": 0.0839, |
| "step": 12670 |
| }, |
| { |
| "epoch": 0.02306088719852505, |
| "grad_norm": 0.03152406960725784, |
| "learning_rate": 0.0002, |
| "loss": 0.0733, |
| "step": 12680 |
| }, |
| { |
| "epoch": 0.023079074018082246, |
| "grad_norm": 0.13872750103473663, |
| "learning_rate": 0.0002, |
| "loss": 0.0604, |
| "step": 12690 |
| }, |
| { |
| "epoch": 0.023097260837639442, |
| "grad_norm": 0.03626656159758568, |
| "learning_rate": 0.0002, |
| "loss": 0.0234, |
| "step": 12700 |
| }, |
| { |
| "epoch": 0.023115447657196638, |
| "grad_norm": 0.10111619532108307, |
| "learning_rate": 0.0002, |
| "loss": 0.1507, |
| "step": 12710 |
| }, |
| { |
| "epoch": 0.023133634476753834, |
| "grad_norm": 0.09038366377353668, |
| "learning_rate": 0.0002, |
| "loss": 0.0839, |
| "step": 12720 |
| }, |
| { |
| "epoch": 0.02315182129631103, |
| "grad_norm": 0.026116544380784035, |
| "learning_rate": 0.0002, |
| "loss": 0.0777, |
| "step": 12730 |
| }, |
| { |
| "epoch": 0.023170008115868227, |
| "grad_norm": 0.2067679613828659, |
| "learning_rate": 0.0002, |
| "loss": 0.0661, |
| "step": 12740 |
| }, |
| { |
| "epoch": 0.023188194935425423, |
| "grad_norm": 0.02005072310566902, |
| "learning_rate": 0.0002, |
| "loss": 0.0165, |
| "step": 12750 |
| }, |
| { |
| "epoch": 0.02320638175498262, |
| "grad_norm": 0.03261101245880127, |
| "learning_rate": 0.0002, |
| "loss": 0.159, |
| "step": 12760 |
| }, |
| { |
| "epoch": 0.023224568574539816, |
| "grad_norm": 0.1416555494070053, |
| "learning_rate": 0.0002, |
| "loss": 0.0856, |
| "step": 12770 |
| }, |
| { |
| "epoch": 0.023242755394097012, |
| "grad_norm": 0.09400717914104462, |
| "learning_rate": 0.0002, |
| "loss": 0.0745, |
| "step": 12780 |
| }, |
| { |
| "epoch": 0.02326094221365421, |
| "grad_norm": 0.17093195021152496, |
| "learning_rate": 0.0002, |
| "loss": 0.0691, |
| "step": 12790 |
| }, |
| { |
| "epoch": 0.023279129033211405, |
| "grad_norm": 0.0209200382232666, |
| "learning_rate": 0.0002, |
| "loss": 0.0168, |
| "step": 12800 |
| }, |
| { |
| "epoch": 0.0232973158527686, |
| "grad_norm": 0.10523302853107452, |
| "learning_rate": 0.0002, |
| "loss": 0.1628, |
| "step": 12810 |
| }, |
| { |
| "epoch": 0.023315502672325798, |
| "grad_norm": 0.06932856142520905, |
| "learning_rate": 0.0002, |
| "loss": 0.079, |
| "step": 12820 |
| }, |
| { |
| "epoch": 0.023333689491882994, |
| "grad_norm": 0.03244032710790634, |
| "learning_rate": 0.0002, |
| "loss": 0.0699, |
| "step": 12830 |
| }, |
| { |
| "epoch": 0.02335187631144019, |
| "grad_norm": 0.13403338193893433, |
| "learning_rate": 0.0002, |
| "loss": 0.0619, |
| "step": 12840 |
| }, |
| { |
| "epoch": 0.023370063130997386, |
| "grad_norm": 0.034033093601465225, |
| "learning_rate": 0.0002, |
| "loss": 0.0166, |
| "step": 12850 |
| }, |
| { |
| "epoch": 0.023388249950554583, |
| "grad_norm": 0.07277385890483856, |
| "learning_rate": 0.0002, |
| "loss": 0.1377, |
| "step": 12860 |
| }, |
| { |
| "epoch": 0.02340643677011178, |
| "grad_norm": 0.10873163491487503, |
| "learning_rate": 0.0002, |
| "loss": 0.0895, |
| "step": 12870 |
| }, |
| { |
| "epoch": 0.023424623589668975, |
| "grad_norm": 0.06244732066988945, |
| "learning_rate": 0.0002, |
| "loss": 0.0745, |
| "step": 12880 |
| }, |
| { |
| "epoch": 0.023442810409226175, |
| "grad_norm": 0.1937248259782791, |
| "learning_rate": 0.0002, |
| "loss": 0.0633, |
| "step": 12890 |
| }, |
| { |
| "epoch": 0.02346099722878337, |
| "grad_norm": 0.03432930260896683, |
| "learning_rate": 0.0002, |
| "loss": 0.0246, |
| "step": 12900 |
| }, |
| { |
| "epoch": 0.023479184048340568, |
| "grad_norm": 0.33358234167099, |
| "learning_rate": 0.0002, |
| "loss": 0.1249, |
| "step": 12910 |
| }, |
| { |
| "epoch": 0.023497370867897764, |
| "grad_norm": 0.12039615213871002, |
| "learning_rate": 0.0002, |
| "loss": 0.0734, |
| "step": 12920 |
| }, |
| { |
| "epoch": 0.02351555768745496, |
| "grad_norm": 0.02666555717587471, |
| "learning_rate": 0.0002, |
| "loss": 0.0849, |
| "step": 12930 |
| }, |
| { |
| "epoch": 0.023533744507012157, |
| "grad_norm": 0.128091961145401, |
| "learning_rate": 0.0002, |
| "loss": 0.0647, |
| "step": 12940 |
| }, |
| { |
| "epoch": 0.023551931326569353, |
| "grad_norm": 0.030916422605514526, |
| "learning_rate": 0.0002, |
| "loss": 0.0217, |
| "step": 12950 |
| }, |
| { |
| "epoch": 0.02357011814612655, |
| "grad_norm": 0.09280567616224289, |
| "learning_rate": 0.0002, |
| "loss": 0.1281, |
| "step": 12960 |
| }, |
| { |
| "epoch": 0.023588304965683746, |
| "grad_norm": 0.09032955765724182, |
| "learning_rate": 0.0002, |
| "loss": 0.0834, |
| "step": 12970 |
| }, |
| { |
| "epoch": 0.023606491785240942, |
| "grad_norm": 0.3660918176174164, |
| "learning_rate": 0.0002, |
| "loss": 0.0776, |
| "step": 12980 |
| }, |
| { |
| "epoch": 0.02362467860479814, |
| "grad_norm": 0.15715408325195312, |
| "learning_rate": 0.0002, |
| "loss": 0.0611, |
| "step": 12990 |
| }, |
| { |
| "epoch": 0.023642865424355335, |
| "grad_norm": 0.03867153823375702, |
| "learning_rate": 0.0002, |
| "loss": 0.0214, |
| "step": 13000 |
| }, |
| { |
| "epoch": 0.02366105224391253, |
| "grad_norm": 0.37568527460098267, |
| "learning_rate": 0.0002, |
| "loss": 0.2529, |
| "step": 13010 |
| }, |
| { |
| "epoch": 0.023679239063469727, |
| "grad_norm": 0.14888867735862732, |
| "learning_rate": 0.0002, |
| "loss": 0.0801, |
| "step": 13020 |
| }, |
| { |
| "epoch": 0.023697425883026924, |
| "grad_norm": 0.04271422699093819, |
| "learning_rate": 0.0002, |
| "loss": 0.0863, |
| "step": 13030 |
| }, |
| { |
| "epoch": 0.02371561270258412, |
| "grad_norm": 0.190608948469162, |
| "learning_rate": 0.0002, |
| "loss": 0.0773, |
| "step": 13040 |
| }, |
| { |
| "epoch": 0.023733799522141316, |
| "grad_norm": 0.020333535969257355, |
| "learning_rate": 0.0002, |
| "loss": 0.0201, |
| "step": 13050 |
| }, |
| { |
| "epoch": 0.023751986341698512, |
| "grad_norm": 0.143577441573143, |
| "learning_rate": 0.0002, |
| "loss": 0.1709, |
| "step": 13060 |
| }, |
| { |
| "epoch": 0.02377017316125571, |
| "grad_norm": 0.09225071966648102, |
| "learning_rate": 0.0002, |
| "loss": 0.0854, |
| "step": 13070 |
| }, |
| { |
| "epoch": 0.023788359980812905, |
| "grad_norm": 0.08655473589897156, |
| "learning_rate": 0.0002, |
| "loss": 0.0727, |
| "step": 13080 |
| }, |
| { |
| "epoch": 0.0238065468003701, |
| "grad_norm": 0.14465250074863434, |
| "learning_rate": 0.0002, |
| "loss": 0.0632, |
| "step": 13090 |
| }, |
| { |
| "epoch": 0.023824733619927298, |
| "grad_norm": 0.019399341195821762, |
| "learning_rate": 0.0002, |
| "loss": 0.0204, |
| "step": 13100 |
| }, |
| { |
| "epoch": 0.023842920439484494, |
| "grad_norm": 0.09221036732196808, |
| "learning_rate": 0.0002, |
| "loss": 0.1646, |
| "step": 13110 |
| }, |
| { |
| "epoch": 0.02386110725904169, |
| "grad_norm": 0.1308157742023468, |
| "learning_rate": 0.0002, |
| "loss": 0.089, |
| "step": 13120 |
| }, |
| { |
| "epoch": 0.023879294078598887, |
| "grad_norm": 0.04212506487965584, |
| "learning_rate": 0.0002, |
| "loss": 0.0776, |
| "step": 13130 |
| }, |
| { |
| "epoch": 0.023897480898156083, |
| "grad_norm": 0.13541243970394135, |
| "learning_rate": 0.0002, |
| "loss": 0.0694, |
| "step": 13140 |
| }, |
| { |
| "epoch": 0.02391566771771328, |
| "grad_norm": 0.016859933733940125, |
| "learning_rate": 0.0002, |
| "loss": 0.0191, |
| "step": 13150 |
| }, |
| { |
| "epoch": 0.023933854537270476, |
| "grad_norm": 0.1553143709897995, |
| "learning_rate": 0.0002, |
| "loss": 0.1653, |
| "step": 13160 |
| }, |
| { |
| "epoch": 0.023952041356827672, |
| "grad_norm": 0.07960142940282822, |
| "learning_rate": 0.0002, |
| "loss": 0.0938, |
| "step": 13170 |
| }, |
| { |
| "epoch": 0.023970228176384868, |
| "grad_norm": 0.0719163790345192, |
| "learning_rate": 0.0002, |
| "loss": 0.0767, |
| "step": 13180 |
| }, |
| { |
| "epoch": 0.023988414995942065, |
| "grad_norm": 0.14845407009124756, |
| "learning_rate": 0.0002, |
| "loss": 0.0642, |
| "step": 13190 |
| }, |
| { |
| "epoch": 0.02400660181549926, |
| "grad_norm": 0.01817360520362854, |
| "learning_rate": 0.0002, |
| "loss": 0.0229, |
| "step": 13200 |
| }, |
| { |
| "epoch": 0.024024788635056457, |
| "grad_norm": 0.03876543045043945, |
| "learning_rate": 0.0002, |
| "loss": 0.1377, |
| "step": 13210 |
| }, |
| { |
| "epoch": 0.024042975454613653, |
| "grad_norm": 0.05972164496779442, |
| "learning_rate": 0.0002, |
| "loss": 0.0802, |
| "step": 13220 |
| }, |
| { |
| "epoch": 0.024061162274170853, |
| "grad_norm": 0.09239703416824341, |
| "learning_rate": 0.0002, |
| "loss": 0.0816, |
| "step": 13230 |
| }, |
| { |
| "epoch": 0.02407934909372805, |
| "grad_norm": 0.15912885963916779, |
| "learning_rate": 0.0002, |
| "loss": 0.0598, |
| "step": 13240 |
| }, |
| { |
| "epoch": 0.024097535913285246, |
| "grad_norm": 0.024279551580548286, |
| "learning_rate": 0.0002, |
| "loss": 0.0235, |
| "step": 13250 |
| }, |
| { |
| "epoch": 0.024115722732842442, |
| "grad_norm": 0.06568270921707153, |
| "learning_rate": 0.0002, |
| "loss": 0.1255, |
| "step": 13260 |
| }, |
| { |
| "epoch": 0.02413390955239964, |
| "grad_norm": 0.04041383042931557, |
| "learning_rate": 0.0002, |
| "loss": 0.0718, |
| "step": 13270 |
| }, |
| { |
| "epoch": 0.024152096371956835, |
| "grad_norm": 0.046768829226493835, |
| "learning_rate": 0.0002, |
| "loss": 0.0741, |
| "step": 13280 |
| }, |
| { |
| "epoch": 0.02417028319151403, |
| "grad_norm": 0.21418194472789764, |
| "learning_rate": 0.0002, |
| "loss": 0.0683, |
| "step": 13290 |
| }, |
| { |
| "epoch": 0.024188470011071227, |
| "grad_norm": 0.04398053511977196, |
| "learning_rate": 0.0002, |
| "loss": 0.0262, |
| "step": 13300 |
| }, |
| { |
| "epoch": 0.024206656830628424, |
| "grad_norm": 0.1672079861164093, |
| "learning_rate": 0.0002, |
| "loss": 0.1408, |
| "step": 13310 |
| }, |
| { |
| "epoch": 0.02422484365018562, |
| "grad_norm": 0.05705881491303444, |
| "learning_rate": 0.0002, |
| "loss": 0.0773, |
| "step": 13320 |
| }, |
| { |
| "epoch": 0.024243030469742816, |
| "grad_norm": 0.0667627677321434, |
| "learning_rate": 0.0002, |
| "loss": 0.0823, |
| "step": 13330 |
| }, |
| { |
| "epoch": 0.024261217289300013, |
| "grad_norm": 0.16610710322856903, |
| "learning_rate": 0.0002, |
| "loss": 0.0682, |
| "step": 13340 |
| }, |
| { |
| "epoch": 0.02427940410885721, |
| "grad_norm": 0.028300171718001366, |
| "learning_rate": 0.0002, |
| "loss": 0.0185, |
| "step": 13350 |
| }, |
| { |
| "epoch": 0.024297590928414405, |
| "grad_norm": 0.10226302593946457, |
| "learning_rate": 0.0002, |
| "loss": 0.1406, |
| "step": 13360 |
| }, |
| { |
| "epoch": 0.0243157777479716, |
| "grad_norm": 0.0939667820930481, |
| "learning_rate": 0.0002, |
| "loss": 0.0755, |
| "step": 13370 |
| }, |
| { |
| "epoch": 0.024333964567528798, |
| "grad_norm": 0.029998745769262314, |
| "learning_rate": 0.0002, |
| "loss": 0.0748, |
| "step": 13380 |
| }, |
| { |
| "epoch": 0.024352151387085994, |
| "grad_norm": 0.1240144744515419, |
| "learning_rate": 0.0002, |
| "loss": 0.0639, |
| "step": 13390 |
| }, |
| { |
| "epoch": 0.02437033820664319, |
| "grad_norm": 0.017499787732958794, |
| "learning_rate": 0.0002, |
| "loss": 0.0156, |
| "step": 13400 |
| }, |
| { |
| "epoch": 0.024388525026200387, |
| "grad_norm": 0.11781036853790283, |
| "learning_rate": 0.0002, |
| "loss": 0.1385, |
| "step": 13410 |
| }, |
| { |
| "epoch": 0.024406711845757583, |
| "grad_norm": 0.09330960363149643, |
| "learning_rate": 0.0002, |
| "loss": 0.0789, |
| "step": 13420 |
| }, |
| { |
| "epoch": 0.02442489866531478, |
| "grad_norm": 0.03347505256533623, |
| "learning_rate": 0.0002, |
| "loss": 0.0742, |
| "step": 13430 |
| }, |
| { |
| "epoch": 0.024443085484871976, |
| "grad_norm": 0.18877847492694855, |
| "learning_rate": 0.0002, |
| "loss": 0.0701, |
| "step": 13440 |
| }, |
| { |
| "epoch": 0.024461272304429172, |
| "grad_norm": 0.03831986337900162, |
| "learning_rate": 0.0002, |
| "loss": 0.0243, |
| "step": 13450 |
| }, |
| { |
| "epoch": 0.02447945912398637, |
| "grad_norm": 0.07360157370567322, |
| "learning_rate": 0.0002, |
| "loss": 0.1237, |
| "step": 13460 |
| }, |
| { |
| "epoch": 0.024497645943543565, |
| "grad_norm": 0.0442088283598423, |
| "learning_rate": 0.0002, |
| "loss": 0.0742, |
| "step": 13470 |
| }, |
| { |
| "epoch": 0.02451583276310076, |
| "grad_norm": 0.07053640484809875, |
| "learning_rate": 0.0002, |
| "loss": 0.0793, |
| "step": 13480 |
| }, |
| { |
| "epoch": 0.024534019582657957, |
| "grad_norm": 0.20134539902210236, |
| "learning_rate": 0.0002, |
| "loss": 0.0621, |
| "step": 13490 |
| }, |
| { |
| "epoch": 0.024552206402215154, |
| "grad_norm": 0.016353536397218704, |
| "learning_rate": 0.0002, |
| "loss": 0.0204, |
| "step": 13500 |
| }, |
| { |
| "epoch": 0.02457039322177235, |
| "grad_norm": 0.15373657643795013, |
| "learning_rate": 0.0002, |
| "loss": 0.1446, |
| "step": 13510 |
| }, |
| { |
| "epoch": 0.024588580041329546, |
| "grad_norm": 2.457998037338257, |
| "learning_rate": 0.0002, |
| "loss": 0.0959, |
| "step": 13520 |
| }, |
| { |
| "epoch": 0.024606766860886743, |
| "grad_norm": 0.11631426215171814, |
| "learning_rate": 0.0002, |
| "loss": 0.0718, |
| "step": 13530 |
| }, |
| { |
| "epoch": 0.02462495368044394, |
| "grad_norm": 0.15928395092487335, |
| "learning_rate": 0.0002, |
| "loss": 0.0638, |
| "step": 13540 |
| }, |
| { |
| "epoch": 0.024643140500001135, |
| "grad_norm": 0.01724998839199543, |
| "learning_rate": 0.0002, |
| "loss": 0.0127, |
| "step": 13550 |
| }, |
| { |
| "epoch": 0.02466132731955833, |
| "grad_norm": 0.10434440523386002, |
| "learning_rate": 0.0002, |
| "loss": 0.1676, |
| "step": 13560 |
| }, |
| { |
| "epoch": 0.02467951413911553, |
| "grad_norm": 0.09029936045408249, |
| "learning_rate": 0.0002, |
| "loss": 0.0792, |
| "step": 13570 |
| }, |
| { |
| "epoch": 0.024697700958672728, |
| "grad_norm": 0.07413540780544281, |
| "learning_rate": 0.0002, |
| "loss": 0.074, |
| "step": 13580 |
| }, |
| { |
| "epoch": 0.024715887778229924, |
| "grad_norm": 0.15171368420124054, |
| "learning_rate": 0.0002, |
| "loss": 0.0646, |
| "step": 13590 |
| }, |
| { |
| "epoch": 0.02473407459778712, |
| "grad_norm": 0.03615165874361992, |
| "learning_rate": 0.0002, |
| "loss": 0.0253, |
| "step": 13600 |
| }, |
| { |
| "epoch": 0.024752261417344316, |
| "grad_norm": 0.08074207603931427, |
| "learning_rate": 0.0002, |
| "loss": 0.1251, |
| "step": 13610 |
| }, |
| { |
| "epoch": 0.024770448236901513, |
| "grad_norm": 0.12725302577018738, |
| "learning_rate": 0.0002, |
| "loss": 0.0868, |
| "step": 13620 |
| }, |
| { |
| "epoch": 0.02478863505645871, |
| "grad_norm": 0.02872832864522934, |
| "learning_rate": 0.0002, |
| "loss": 0.072, |
| "step": 13630 |
| }, |
| { |
| "epoch": 0.024806821876015905, |
| "grad_norm": 0.14573116600513458, |
| "learning_rate": 0.0002, |
| "loss": 0.06, |
| "step": 13640 |
| }, |
| { |
| "epoch": 0.0248250086955731, |
| "grad_norm": 0.039421938359737396, |
| "learning_rate": 0.0002, |
| "loss": 0.0259, |
| "step": 13650 |
| }, |
| { |
| "epoch": 0.024843195515130298, |
| "grad_norm": 0.08786037564277649, |
| "learning_rate": 0.0002, |
| "loss": 0.1255, |
| "step": 13660 |
| }, |
| { |
| "epoch": 0.024861382334687494, |
| "grad_norm": 0.7118334174156189, |
| "learning_rate": 0.0002, |
| "loss": 0.1096, |
| "step": 13670 |
| }, |
| { |
| "epoch": 0.02487956915424469, |
| "grad_norm": 0.05718977376818657, |
| "learning_rate": 0.0002, |
| "loss": 0.1057, |
| "step": 13680 |
| }, |
| { |
| "epoch": 0.024897755973801887, |
| "grad_norm": 0.19388055801391602, |
| "learning_rate": 0.0002, |
| "loss": 0.0668, |
| "step": 13690 |
| }, |
| { |
| "epoch": 0.024915942793359083, |
| "grad_norm": 0.02519839070737362, |
| "learning_rate": 0.0002, |
| "loss": 0.0182, |
| "step": 13700 |
| }, |
| { |
| "epoch": 0.02493412961291628, |
| "grad_norm": 0.15939857065677643, |
| "learning_rate": 0.0002, |
| "loss": 0.1685, |
| "step": 13710 |
| }, |
| { |
| "epoch": 0.024952316432473476, |
| "grad_norm": 0.07893367856740952, |
| "learning_rate": 0.0002, |
| "loss": 0.0781, |
| "step": 13720 |
| }, |
| { |
| "epoch": 0.024970503252030672, |
| "grad_norm": 0.0573757067322731, |
| "learning_rate": 0.0002, |
| "loss": 0.0819, |
| "step": 13730 |
| }, |
| { |
| "epoch": 0.02498869007158787, |
| "grad_norm": 0.1089317575097084, |
| "learning_rate": 0.0002, |
| "loss": 0.0645, |
| "step": 13740 |
| }, |
| { |
| "epoch": 0.025006876891145065, |
| "grad_norm": 0.03239568695425987, |
| "learning_rate": 0.0002, |
| "loss": 0.0199, |
| "step": 13750 |
| }, |
| { |
| "epoch": 0.02502506371070226, |
| "grad_norm": 0.04015114903450012, |
| "learning_rate": 0.0002, |
| "loss": 0.146, |
| "step": 13760 |
| }, |
| { |
| "epoch": 0.025043250530259457, |
| "grad_norm": 0.15218386054039001, |
| "learning_rate": 0.0002, |
| "loss": 0.0854, |
| "step": 13770 |
| }, |
| { |
| "epoch": 0.025061437349816654, |
| "grad_norm": 0.04461386427283287, |
| "learning_rate": 0.0002, |
| "loss": 0.0734, |
| "step": 13780 |
| }, |
| { |
| "epoch": 0.02507962416937385, |
| "grad_norm": 0.17443357408046722, |
| "learning_rate": 0.0002, |
| "loss": 0.0677, |
| "step": 13790 |
| }, |
| { |
| "epoch": 0.025097810988931046, |
| "grad_norm": 1.0899302959442139, |
| "learning_rate": 0.0002, |
| "loss": 0.0312, |
| "step": 13800 |
| }, |
| { |
| "epoch": 0.025115997808488243, |
| "grad_norm": 0.04115718603134155, |
| "learning_rate": 0.0002, |
| "loss": 0.1392, |
| "step": 13810 |
| }, |
| { |
| "epoch": 0.02513418462804544, |
| "grad_norm": 0.06605038046836853, |
| "learning_rate": 0.0002, |
| "loss": 0.0826, |
| "step": 13820 |
| }, |
| { |
| "epoch": 0.025152371447602635, |
| "grad_norm": 0.115416020154953, |
| "learning_rate": 0.0002, |
| "loss": 0.0709, |
| "step": 13830 |
| }, |
| { |
| "epoch": 0.02517055826715983, |
| "grad_norm": 0.1582881212234497, |
| "learning_rate": 0.0002, |
| "loss": 0.066, |
| "step": 13840 |
| }, |
| { |
| "epoch": 0.025188745086717028, |
| "grad_norm": 0.037643156945705414, |
| "learning_rate": 0.0002, |
| "loss": 0.0226, |
| "step": 13850 |
| }, |
| { |
| "epoch": 0.025206931906274224, |
| "grad_norm": 0.08343279361724854, |
| "learning_rate": 0.0002, |
| "loss": 0.1197, |
| "step": 13860 |
| }, |
| { |
| "epoch": 0.02522511872583142, |
| "grad_norm": 0.13482169806957245, |
| "learning_rate": 0.0002, |
| "loss": 0.0799, |
| "step": 13870 |
| }, |
| { |
| "epoch": 0.025243305545388617, |
| "grad_norm": 0.10373103618621826, |
| "learning_rate": 0.0002, |
| "loss": 0.075, |
| "step": 13880 |
| }, |
| { |
| "epoch": 0.025261492364945813, |
| "grad_norm": 0.1348303109407425, |
| "learning_rate": 0.0002, |
| "loss": 0.0603, |
| "step": 13890 |
| }, |
| { |
| "epoch": 0.02527967918450301, |
| "grad_norm": 0.058479245752096176, |
| "learning_rate": 0.0002, |
| "loss": 0.0252, |
| "step": 13900 |
| }, |
| { |
| "epoch": 0.025297866004060206, |
| "grad_norm": 0.19177350401878357, |
| "learning_rate": 0.0002, |
| "loss": 0.122, |
| "step": 13910 |
| }, |
| { |
| "epoch": 0.025316052823617406, |
| "grad_norm": 0.11044300347566605, |
| "learning_rate": 0.0002, |
| "loss": 0.0724, |
| "step": 13920 |
| }, |
| { |
| "epoch": 0.025334239643174602, |
| "grad_norm": 0.05279375612735748, |
| "learning_rate": 0.0002, |
| "loss": 0.0836, |
| "step": 13930 |
| }, |
| { |
| "epoch": 0.025352426462731798, |
| "grad_norm": 0.12162257730960846, |
| "learning_rate": 0.0002, |
| "loss": 0.0615, |
| "step": 13940 |
| }, |
| { |
| "epoch": 0.025370613282288994, |
| "grad_norm": 0.026728983968496323, |
| "learning_rate": 0.0002, |
| "loss": 0.0207, |
| "step": 13950 |
| }, |
| { |
| "epoch": 0.02538880010184619, |
| "grad_norm": 0.08440329879522324, |
| "learning_rate": 0.0002, |
| "loss": 0.1171, |
| "step": 13960 |
| }, |
| { |
| "epoch": 0.025406986921403387, |
| "grad_norm": 0.10090481489896774, |
| "learning_rate": 0.0002, |
| "loss": 0.0851, |
| "step": 13970 |
| }, |
| { |
| "epoch": 0.025425173740960583, |
| "grad_norm": 0.03063822351396084, |
| "learning_rate": 0.0002, |
| "loss": 0.0783, |
| "step": 13980 |
| }, |
| { |
| "epoch": 0.02544336056051778, |
| "grad_norm": 0.14754973351955414, |
| "learning_rate": 0.0002, |
| "loss": 0.0662, |
| "step": 13990 |
| }, |
| { |
| "epoch": 0.025461547380074976, |
| "grad_norm": 0.04844941198825836, |
| "learning_rate": 0.0002, |
| "loss": 0.0204, |
| "step": 14000 |
| }, |
| { |
| "epoch": 0.025479734199632172, |
| "grad_norm": 0.08291894942522049, |
| "learning_rate": 0.0002, |
| "loss": 0.13, |
| "step": 14010 |
| }, |
| { |
| "epoch": 0.02549792101918937, |
| "grad_norm": 0.05875542387366295, |
| "learning_rate": 0.0002, |
| "loss": 0.0732, |
| "step": 14020 |
| }, |
| { |
| "epoch": 0.025516107838746565, |
| "grad_norm": 0.04103298857808113, |
| "learning_rate": 0.0002, |
| "loss": 0.0796, |
| "step": 14030 |
| }, |
| { |
| "epoch": 0.02553429465830376, |
| "grad_norm": 0.20349934697151184, |
| "learning_rate": 0.0002, |
| "loss": 0.0672, |
| "step": 14040 |
| }, |
| { |
| "epoch": 0.025552481477860958, |
| "grad_norm": 0.05419473722577095, |
| "learning_rate": 0.0002, |
| "loss": 0.0231, |
| "step": 14050 |
| }, |
| { |
| "epoch": 0.025570668297418154, |
| "grad_norm": 0.05501960590481758, |
| "learning_rate": 0.0002, |
| "loss": 0.1281, |
| "step": 14060 |
| }, |
| { |
| "epoch": 0.02558885511697535, |
| "grad_norm": 0.07140739262104034, |
| "learning_rate": 0.0002, |
| "loss": 0.0746, |
| "step": 14070 |
| }, |
| { |
| "epoch": 0.025607041936532546, |
| "grad_norm": 0.04564960300922394, |
| "learning_rate": 0.0002, |
| "loss": 0.0746, |
| "step": 14080 |
| }, |
| { |
| "epoch": 0.025625228756089743, |
| "grad_norm": 0.16987308859825134, |
| "learning_rate": 0.0002, |
| "loss": 0.0642, |
| "step": 14090 |
| }, |
| { |
| "epoch": 0.02564341557564694, |
| "grad_norm": 0.017460890114307404, |
| "learning_rate": 0.0002, |
| "loss": 0.0218, |
| "step": 14100 |
| }, |
| { |
| "epoch": 0.025661602395204135, |
| "grad_norm": 0.15666340291500092, |
| "learning_rate": 0.0002, |
| "loss": 0.1572, |
| "step": 14110 |
| }, |
| { |
| "epoch": 0.02567978921476133, |
| "grad_norm": 0.06847309321165085, |
| "learning_rate": 0.0002, |
| "loss": 0.0744, |
| "step": 14120 |
| }, |
| { |
| "epoch": 0.025697976034318528, |
| "grad_norm": 0.03678276389837265, |
| "learning_rate": 0.0002, |
| "loss": 0.0718, |
| "step": 14130 |
| }, |
| { |
| "epoch": 0.025716162853875724, |
| "grad_norm": 0.1861123889684677, |
| "learning_rate": 0.0002, |
| "loss": 0.06, |
| "step": 14140 |
| }, |
| { |
| "epoch": 0.02573434967343292, |
| "grad_norm": 0.010294788517057896, |
| "learning_rate": 0.0002, |
| "loss": 0.0183, |
| "step": 14150 |
| }, |
| { |
| "epoch": 0.025752536492990117, |
| "grad_norm": 0.0643458440899849, |
| "learning_rate": 0.0002, |
| "loss": 0.1594, |
| "step": 14160 |
| }, |
| { |
| "epoch": 0.025770723312547313, |
| "grad_norm": 0.10639938712120056, |
| "learning_rate": 0.0002, |
| "loss": 0.0761, |
| "step": 14170 |
| }, |
| { |
| "epoch": 0.02578891013210451, |
| "grad_norm": 0.056529924273490906, |
| "learning_rate": 0.0002, |
| "loss": 0.082, |
| "step": 14180 |
| }, |
| { |
| "epoch": 0.025807096951661706, |
| "grad_norm": 0.18884658813476562, |
| "learning_rate": 0.0002, |
| "loss": 0.0683, |
| "step": 14190 |
| }, |
| { |
| "epoch": 0.025825283771218902, |
| "grad_norm": 0.035667784512043, |
| "learning_rate": 0.0002, |
| "loss": 0.0263, |
| "step": 14200 |
| }, |
| { |
| "epoch": 0.0258434705907761, |
| "grad_norm": 0.14650103449821472, |
| "learning_rate": 0.0002, |
| "loss": 0.1314, |
| "step": 14210 |
| }, |
| { |
| "epoch": 0.025861657410333295, |
| "grad_norm": 0.12219654768705368, |
| "learning_rate": 0.0002, |
| "loss": 0.0755, |
| "step": 14220 |
| }, |
| { |
| "epoch": 0.02587984422989049, |
| "grad_norm": 0.05271647870540619, |
| "learning_rate": 0.0002, |
| "loss": 0.0789, |
| "step": 14230 |
| }, |
| { |
| "epoch": 0.025898031049447687, |
| "grad_norm": 0.1669916957616806, |
| "learning_rate": 0.0002, |
| "loss": 0.0641, |
| "step": 14240 |
| }, |
| { |
| "epoch": 0.025916217869004884, |
| "grad_norm": 0.035175371915102005, |
| "learning_rate": 0.0002, |
| "loss": 0.0222, |
| "step": 14250 |
| }, |
| { |
| "epoch": 0.025934404688562084, |
| "grad_norm": 0.14658409357070923, |
| "learning_rate": 0.0002, |
| "loss": 0.1382, |
| "step": 14260 |
| }, |
| { |
| "epoch": 0.02595259150811928, |
| "grad_norm": 0.07525639981031418, |
| "learning_rate": 0.0002, |
| "loss": 0.0815, |
| "step": 14270 |
| }, |
| { |
| "epoch": 0.025970778327676476, |
| "grad_norm": 0.02428872510790825, |
| "learning_rate": 0.0002, |
| "loss": 0.076, |
| "step": 14280 |
| }, |
| { |
| "epoch": 0.025988965147233672, |
| "grad_norm": 0.1825665533542633, |
| "learning_rate": 0.0002, |
| "loss": 0.0652, |
| "step": 14290 |
| }, |
| { |
| "epoch": 0.02600715196679087, |
| "grad_norm": 0.033867619931697845, |
| "learning_rate": 0.0002, |
| "loss": 0.0206, |
| "step": 14300 |
| }, |
| { |
| "epoch": 0.026025338786348065, |
| "grad_norm": 0.051891107112169266, |
| "learning_rate": 0.0002, |
| "loss": 0.1576, |
| "step": 14310 |
| }, |
| { |
| "epoch": 0.02604352560590526, |
| "grad_norm": 0.1111353188753128, |
| "learning_rate": 0.0002, |
| "loss": 0.0889, |
| "step": 14320 |
| }, |
| { |
| "epoch": 0.026061712425462458, |
| "grad_norm": 0.04253942146897316, |
| "learning_rate": 0.0002, |
| "loss": 0.079, |
| "step": 14330 |
| }, |
| { |
| "epoch": 0.026079899245019654, |
| "grad_norm": 0.17151106894016266, |
| "learning_rate": 0.0002, |
| "loss": 0.0678, |
| "step": 14340 |
| }, |
| { |
| "epoch": 0.02609808606457685, |
| "grad_norm": 0.03877005726099014, |
| "learning_rate": 0.0002, |
| "loss": 0.0206, |
| "step": 14350 |
| }, |
| { |
| "epoch": 0.026116272884134047, |
| "grad_norm": 0.03517235442996025, |
| "learning_rate": 0.0002, |
| "loss": 0.1343, |
| "step": 14360 |
| }, |
| { |
| "epoch": 0.026134459703691243, |
| "grad_norm": 0.08157488703727722, |
| "learning_rate": 0.0002, |
| "loss": 0.0764, |
| "step": 14370 |
| }, |
| { |
| "epoch": 0.02615264652324844, |
| "grad_norm": 0.03245632350444794, |
| "learning_rate": 0.0002, |
| "loss": 0.0758, |
| "step": 14380 |
| }, |
| { |
| "epoch": 0.026170833342805636, |
| "grad_norm": 0.20079655945301056, |
| "learning_rate": 0.0002, |
| "loss": 0.0691, |
| "step": 14390 |
| }, |
| { |
| "epoch": 0.026189020162362832, |
| "grad_norm": 0.03477077558636665, |
| "learning_rate": 0.0002, |
| "loss": 0.0232, |
| "step": 14400 |
| }, |
| { |
| "epoch": 0.026207206981920028, |
| "grad_norm": 0.14853888750076294, |
| "learning_rate": 0.0002, |
| "loss": 0.1436, |
| "step": 14410 |
| }, |
| { |
| "epoch": 0.026225393801477224, |
| "grad_norm": 0.12416905164718628, |
| "learning_rate": 0.0002, |
| "loss": 0.0755, |
| "step": 14420 |
| }, |
| { |
| "epoch": 0.02624358062103442, |
| "grad_norm": 0.03126871958374977, |
| "learning_rate": 0.0002, |
| "loss": 0.0762, |
| "step": 14430 |
| }, |
| { |
| "epoch": 0.026261767440591617, |
| "grad_norm": 0.20726743340492249, |
| "learning_rate": 0.0002, |
| "loss": 0.0614, |
| "step": 14440 |
| }, |
| { |
| "epoch": 0.026279954260148813, |
| "grad_norm": 0.039617493748664856, |
| "learning_rate": 0.0002, |
| "loss": 0.0181, |
| "step": 14450 |
| }, |
| { |
| "epoch": 0.02629814107970601, |
| "grad_norm": 0.08146277070045471, |
| "learning_rate": 0.0002, |
| "loss": 0.132, |
| "step": 14460 |
| }, |
| { |
| "epoch": 0.026316327899263206, |
| "grad_norm": 0.07181694358587265, |
| "learning_rate": 0.0002, |
| "loss": 0.0706, |
| "step": 14470 |
| }, |
| { |
| "epoch": 0.026334514718820402, |
| "grad_norm": 0.04080040752887726, |
| "learning_rate": 0.0002, |
| "loss": 0.0792, |
| "step": 14480 |
| }, |
| { |
| "epoch": 0.0263527015383776, |
| "grad_norm": 0.1903056502342224, |
| "learning_rate": 0.0002, |
| "loss": 0.0647, |
| "step": 14490 |
| }, |
| { |
| "epoch": 0.026370888357934795, |
| "grad_norm": 0.027256207540631294, |
| "learning_rate": 0.0002, |
| "loss": 0.0202, |
| "step": 14500 |
| }, |
| { |
| "epoch": 0.02638907517749199, |
| "grad_norm": 0.1434287130832672, |
| "learning_rate": 0.0002, |
| "loss": 0.1262, |
| "step": 14510 |
| }, |
| { |
| "epoch": 0.026407261997049188, |
| "grad_norm": 0.06977452337741852, |
| "learning_rate": 0.0002, |
| "loss": 0.0722, |
| "step": 14520 |
| }, |
| { |
| "epoch": 0.026425448816606384, |
| "grad_norm": 0.03453589975833893, |
| "learning_rate": 0.0002, |
| "loss": 0.0778, |
| "step": 14530 |
| }, |
| { |
| "epoch": 0.02644363563616358, |
| "grad_norm": 0.1455768346786499, |
| "learning_rate": 0.0002, |
| "loss": 0.0678, |
| "step": 14540 |
| }, |
| { |
| "epoch": 0.026461822455720777, |
| "grad_norm": 0.02977900207042694, |
| "learning_rate": 0.0002, |
| "loss": 0.0227, |
| "step": 14550 |
| }, |
| { |
| "epoch": 0.026480009275277973, |
| "grad_norm": 0.06667467951774597, |
| "learning_rate": 0.0002, |
| "loss": 0.1345, |
| "step": 14560 |
| }, |
| { |
| "epoch": 0.02649819609483517, |
| "grad_norm": 0.05125528201460838, |
| "learning_rate": 0.0002, |
| "loss": 0.078, |
| "step": 14570 |
| }, |
| { |
| "epoch": 0.026516382914392365, |
| "grad_norm": 0.02796974405646324, |
| "learning_rate": 0.0002, |
| "loss": 0.0782, |
| "step": 14580 |
| }, |
| { |
| "epoch": 0.026534569733949562, |
| "grad_norm": 0.18518763780593872, |
| "learning_rate": 0.0002, |
| "loss": 0.0722, |
| "step": 14590 |
| }, |
| { |
| "epoch": 0.02655275655350676, |
| "grad_norm": 0.01827179454267025, |
| "learning_rate": 0.0002, |
| "loss": 0.0193, |
| "step": 14600 |
| }, |
| { |
| "epoch": 0.026570943373063958, |
| "grad_norm": 0.1146678775548935, |
| "learning_rate": 0.0002, |
| "loss": 0.1651, |
| "step": 14610 |
| }, |
| { |
| "epoch": 0.026589130192621154, |
| "grad_norm": 3.385193109512329, |
| "learning_rate": 0.0002, |
| "loss": 0.2165, |
| "step": 14620 |
| }, |
| { |
| "epoch": 0.02660731701217835, |
| "grad_norm": 0.3052279055118561, |
| "learning_rate": 0.0002, |
| "loss": 0.1489, |
| "step": 14630 |
| }, |
| { |
| "epoch": 0.026625503831735547, |
| "grad_norm": 0.12762853503227234, |
| "learning_rate": 0.0002, |
| "loss": 0.0693, |
| "step": 14640 |
| }, |
| { |
| "epoch": 0.026643690651292743, |
| "grad_norm": 0.003925936296582222, |
| "learning_rate": 0.0002, |
| "loss": 0.0078, |
| "step": 14650 |
| }, |
| { |
| "epoch": 0.02666187747084994, |
| "grad_norm": 0.28632932901382446, |
| "learning_rate": 0.0002, |
| "loss": 0.2533, |
| "step": 14660 |
| }, |
| { |
| "epoch": 0.026680064290407136, |
| "grad_norm": 0.037552788853645325, |
| "learning_rate": 0.0002, |
| "loss": 0.0852, |
| "step": 14670 |
| }, |
| { |
| "epoch": 0.026698251109964332, |
| "grad_norm": 0.0911126434803009, |
| "learning_rate": 0.0002, |
| "loss": 0.0751, |
| "step": 14680 |
| }, |
| { |
| "epoch": 0.02671643792952153, |
| "grad_norm": 0.18434865772724152, |
| "learning_rate": 0.0002, |
| "loss": 0.084, |
| "step": 14690 |
| }, |
| { |
| "epoch": 0.026734624749078725, |
| "grad_norm": 0.03813793510198593, |
| "learning_rate": 0.0002, |
| "loss": 0.0165, |
| "step": 14700 |
| }, |
| { |
| "epoch": 0.02675281156863592, |
| "grad_norm": 0.04764392226934433, |
| "learning_rate": 0.0002, |
| "loss": 0.1642, |
| "step": 14710 |
| }, |
| { |
| "epoch": 0.026770998388193117, |
| "grad_norm": 0.04611713066697121, |
| "learning_rate": 0.0002, |
| "loss": 0.088, |
| "step": 14720 |
| }, |
| { |
| "epoch": 0.026789185207750314, |
| "grad_norm": 0.07171179354190826, |
| "learning_rate": 0.0002, |
| "loss": 0.1417, |
| "step": 14730 |
| }, |
| { |
| "epoch": 0.02680737202730751, |
| "grad_norm": 0.14135649800300598, |
| "learning_rate": 0.0002, |
| "loss": 0.0692, |
| "step": 14740 |
| }, |
| { |
| "epoch": 0.026825558846864706, |
| "grad_norm": 0.004508219193667173, |
| "learning_rate": 0.0002, |
| "loss": 0.016, |
| "step": 14750 |
| }, |
| { |
| "epoch": 0.026843745666421902, |
| "grad_norm": 0.09732682257890701, |
| "learning_rate": 0.0002, |
| "loss": 0.2089, |
| "step": 14760 |
| }, |
| { |
| "epoch": 0.0268619324859791, |
| "grad_norm": 0.12676575779914856, |
| "learning_rate": 0.0002, |
| "loss": 0.0849, |
| "step": 14770 |
| }, |
| { |
| "epoch": 0.026880119305536295, |
| "grad_norm": 0.0696650817990303, |
| "learning_rate": 0.0002, |
| "loss": 0.08, |
| "step": 14780 |
| }, |
| { |
| "epoch": 0.02689830612509349, |
| "grad_norm": 0.17883484065532684, |
| "learning_rate": 0.0002, |
| "loss": 0.0682, |
| "step": 14790 |
| }, |
| { |
| "epoch": 0.026916492944650688, |
| "grad_norm": 0.0567975677549839, |
| "learning_rate": 0.0002, |
| "loss": 0.0149, |
| "step": 14800 |
| }, |
| { |
| "epoch": 0.026934679764207884, |
| "grad_norm": 0.4884565472602844, |
| "learning_rate": 0.0002, |
| "loss": 0.6381, |
| "step": 14810 |
| }, |
| { |
| "epoch": 0.02695286658376508, |
| "grad_norm": 0.0742981806397438, |
| "learning_rate": 0.0002, |
| "loss": 0.0845, |
| "step": 14820 |
| }, |
| { |
| "epoch": 0.026971053403322277, |
| "grad_norm": 0.030466781929135323, |
| "learning_rate": 0.0002, |
| "loss": 0.0818, |
| "step": 14830 |
| }, |
| { |
| "epoch": 0.026989240222879473, |
| "grad_norm": 0.13108357787132263, |
| "learning_rate": 0.0002, |
| "loss": 0.065, |
| "step": 14840 |
| }, |
| { |
| "epoch": 0.02700742704243667, |
| "grad_norm": 0.019065184518694878, |
| "learning_rate": 0.0002, |
| "loss": 0.0168, |
| "step": 14850 |
| }, |
| { |
| "epoch": 0.027025613861993866, |
| "grad_norm": 0.21891777217388153, |
| "learning_rate": 0.0002, |
| "loss": 0.1456, |
| "step": 14860 |
| }, |
| { |
| "epoch": 0.027043800681551062, |
| "grad_norm": 0.0836934968829155, |
| "learning_rate": 0.0002, |
| "loss": 0.0781, |
| "step": 14870 |
| }, |
| { |
| "epoch": 0.027061987501108258, |
| "grad_norm": 0.0643845945596695, |
| "learning_rate": 0.0002, |
| "loss": 0.0795, |
| "step": 14880 |
| }, |
| { |
| "epoch": 0.027080174320665455, |
| "grad_norm": 0.27108556032180786, |
| "learning_rate": 0.0002, |
| "loss": 0.0722, |
| "step": 14890 |
| }, |
| { |
| "epoch": 0.02709836114022265, |
| "grad_norm": 0.008289041928946972, |
| "learning_rate": 0.0002, |
| "loss": 0.0201, |
| "step": 14900 |
| }, |
| { |
| "epoch": 0.027116547959779847, |
| "grad_norm": 0.03284185752272606, |
| "learning_rate": 0.0002, |
| "loss": 0.1509, |
| "step": 14910 |
| }, |
| { |
| "epoch": 0.027134734779337043, |
| "grad_norm": 0.051129039376974106, |
| "learning_rate": 0.0002, |
| "loss": 0.0831, |
| "step": 14920 |
| }, |
| { |
| "epoch": 0.02715292159889424, |
| "grad_norm": 0.046401191502809525, |
| "learning_rate": 0.0002, |
| "loss": 0.0694, |
| "step": 14930 |
| }, |
| { |
| "epoch": 0.027171108418451436, |
| "grad_norm": 0.19945313036441803, |
| "learning_rate": 0.0002, |
| "loss": 0.0734, |
| "step": 14940 |
| }, |
| { |
| "epoch": 0.027189295238008636, |
| "grad_norm": 0.03877973556518555, |
| "learning_rate": 0.0002, |
| "loss": 0.026, |
| "step": 14950 |
| }, |
| { |
| "epoch": 0.027207482057565832, |
| "grad_norm": 0.19090695679187775, |
| "learning_rate": 0.0002, |
| "loss": 0.136, |
| "step": 14960 |
| }, |
| { |
| "epoch": 0.02722566887712303, |
| "grad_norm": 0.11352288722991943, |
| "learning_rate": 0.0002, |
| "loss": 0.0797, |
| "step": 14970 |
| }, |
| { |
| "epoch": 0.027243855696680225, |
| "grad_norm": 0.055218834429979324, |
| "learning_rate": 0.0002, |
| "loss": 0.0763, |
| "step": 14980 |
| }, |
| { |
| "epoch": 0.02726204251623742, |
| "grad_norm": 0.1060803234577179, |
| "learning_rate": 0.0002, |
| "loss": 0.059, |
| "step": 14990 |
| }, |
| { |
| "epoch": 0.027280229335794617, |
| "grad_norm": 0.03370797634124756, |
| "learning_rate": 0.0002, |
| "loss": 0.0172, |
| "step": 15000 |
| }, |
| { |
| "epoch": 0.027298416155351814, |
| "grad_norm": 0.19884982705116272, |
| "learning_rate": 0.0002, |
| "loss": 0.1408, |
| "step": 15010 |
| }, |
| { |
| "epoch": 0.02731660297490901, |
| "grad_norm": 0.1186273992061615, |
| "learning_rate": 0.0002, |
| "loss": 0.0769, |
| "step": 15020 |
| }, |
| { |
| "epoch": 0.027334789794466206, |
| "grad_norm": 0.0494297556579113, |
| "learning_rate": 0.0002, |
| "loss": 0.0818, |
| "step": 15030 |
| }, |
| { |
| "epoch": 0.027352976614023403, |
| "grad_norm": 0.17990480363368988, |
| "learning_rate": 0.0002, |
| "loss": 0.06, |
| "step": 15040 |
| }, |
| { |
| "epoch": 0.0273711634335806, |
| "grad_norm": 0.015269913710653782, |
| "learning_rate": 0.0002, |
| "loss": 0.0143, |
| "step": 15050 |
| }, |
| { |
| "epoch": 0.027389350253137795, |
| "grad_norm": 0.1387794464826584, |
| "learning_rate": 0.0002, |
| "loss": 0.171, |
| "step": 15060 |
| }, |
| { |
| "epoch": 0.02740753707269499, |
| "grad_norm": 0.11648393422365189, |
| "learning_rate": 0.0002, |
| "loss": 0.0769, |
| "step": 15070 |
| }, |
| { |
| "epoch": 0.027425723892252188, |
| "grad_norm": 0.04039733111858368, |
| "learning_rate": 0.0002, |
| "loss": 0.0707, |
| "step": 15080 |
| }, |
| { |
| "epoch": 0.027443910711809384, |
| "grad_norm": 0.19274230301380157, |
| "learning_rate": 0.0002, |
| "loss": 0.0657, |
| "step": 15090 |
| }, |
| { |
| "epoch": 0.02746209753136658, |
| "grad_norm": 0.03266929090023041, |
| "learning_rate": 0.0002, |
| "loss": 0.0155, |
| "step": 15100 |
| }, |
| { |
| "epoch": 0.027480284350923777, |
| "grad_norm": 0.44524702429771423, |
| "learning_rate": 0.0002, |
| "loss": 0.3075, |
| "step": 15110 |
| }, |
| { |
| "epoch": 0.027498471170480973, |
| "grad_norm": 0.15604422986507416, |
| "learning_rate": 0.0002, |
| "loss": 0.0874, |
| "step": 15120 |
| }, |
| { |
| "epoch": 0.02751665799003817, |
| "grad_norm": 0.043061114847660065, |
| "learning_rate": 0.0002, |
| "loss": 0.0814, |
| "step": 15130 |
| }, |
| { |
| "epoch": 0.027534844809595366, |
| "grad_norm": 0.2331482172012329, |
| "learning_rate": 0.0002, |
| "loss": 0.0638, |
| "step": 15140 |
| }, |
| { |
| "epoch": 0.027553031629152562, |
| "grad_norm": 0.011037157848477364, |
| "learning_rate": 0.0002, |
| "loss": 0.0197, |
| "step": 15150 |
| }, |
| { |
| "epoch": 0.02757121844870976, |
| "grad_norm": 0.0758776143193245, |
| "learning_rate": 0.0002, |
| "loss": 0.1481, |
| "step": 15160 |
| }, |
| { |
| "epoch": 0.027589405268266955, |
| "grad_norm": 0.18878699839115143, |
| "learning_rate": 0.0002, |
| "loss": 0.083, |
| "step": 15170 |
| }, |
| { |
| "epoch": 0.02760759208782415, |
| "grad_norm": 0.042469121515750885, |
| "learning_rate": 0.0002, |
| "loss": 0.0799, |
| "step": 15180 |
| }, |
| { |
| "epoch": 0.027625778907381347, |
| "grad_norm": 0.1603335440158844, |
| "learning_rate": 0.0002, |
| "loss": 0.0579, |
| "step": 15190 |
| }, |
| { |
| "epoch": 0.027643965726938544, |
| "grad_norm": 0.03533349186182022, |
| "learning_rate": 0.0002, |
| "loss": 0.0195, |
| "step": 15200 |
| }, |
| { |
| "epoch": 0.02766215254649574, |
| "grad_norm": 0.2014724314212799, |
| "learning_rate": 0.0002, |
| "loss": 0.1443, |
| "step": 15210 |
| }, |
| { |
| "epoch": 0.027680339366052936, |
| "grad_norm": 0.04604899883270264, |
| "learning_rate": 0.0002, |
| "loss": 0.0701, |
| "step": 15220 |
| }, |
| { |
| "epoch": 0.027698526185610133, |
| "grad_norm": 0.04726789519190788, |
| "learning_rate": 0.0002, |
| "loss": 0.078, |
| "step": 15230 |
| }, |
| { |
| "epoch": 0.02771671300516733, |
| "grad_norm": 0.16189764440059662, |
| "learning_rate": 0.0002, |
| "loss": 0.0686, |
| "step": 15240 |
| }, |
| { |
| "epoch": 0.027734899824724525, |
| "grad_norm": 0.018077973276376724, |
| "learning_rate": 0.0002, |
| "loss": 0.0155, |
| "step": 15250 |
| }, |
| { |
| "epoch": 0.02775308664428172, |
| "grad_norm": 0.09486963599920273, |
| "learning_rate": 0.0002, |
| "loss": 0.1695, |
| "step": 15260 |
| }, |
| { |
| "epoch": 0.027771273463838918, |
| "grad_norm": 0.19950449466705322, |
| "learning_rate": 0.0002, |
| "loss": 0.0784, |
| "step": 15270 |
| }, |
| { |
| "epoch": 0.027789460283396114, |
| "grad_norm": 0.03350493311882019, |
| "learning_rate": 0.0002, |
| "loss": 0.0797, |
| "step": 15280 |
| }, |
| { |
| "epoch": 0.027807647102953314, |
| "grad_norm": 0.14408868551254272, |
| "learning_rate": 0.0002, |
| "loss": 0.0624, |
| "step": 15290 |
| }, |
| { |
| "epoch": 0.02782583392251051, |
| "grad_norm": 0.03824521601200104, |
| "learning_rate": 0.0002, |
| "loss": 0.0182, |
| "step": 15300 |
| }, |
| { |
| "epoch": 0.027844020742067706, |
| "grad_norm": 0.051167964935302734, |
| "learning_rate": 0.0002, |
| "loss": 0.1342, |
| "step": 15310 |
| }, |
| { |
| "epoch": 0.027862207561624903, |
| "grad_norm": 0.08440420031547546, |
| "learning_rate": 0.0002, |
| "loss": 0.0775, |
| "step": 15320 |
| }, |
| { |
| "epoch": 0.0278803943811821, |
| "grad_norm": 0.05162487551569939, |
| "learning_rate": 0.0002, |
| "loss": 0.0824, |
| "step": 15330 |
| }, |
| { |
| "epoch": 0.027898581200739295, |
| "grad_norm": 0.1576220989227295, |
| "learning_rate": 0.0002, |
| "loss": 0.0607, |
| "step": 15340 |
| }, |
| { |
| "epoch": 0.02791676802029649, |
| "grad_norm": 0.03840797394514084, |
| "learning_rate": 0.0002, |
| "loss": 0.0197, |
| "step": 15350 |
| }, |
| { |
| "epoch": 0.027934954839853688, |
| "grad_norm": 0.1418246179819107, |
| "learning_rate": 0.0002, |
| "loss": 0.151, |
| "step": 15360 |
| }, |
| { |
| "epoch": 0.027953141659410884, |
| "grad_norm": 0.07326096296310425, |
| "learning_rate": 0.0002, |
| "loss": 0.0764, |
| "step": 15370 |
| }, |
| { |
| "epoch": 0.02797132847896808, |
| "grad_norm": 0.0582844614982605, |
| "learning_rate": 0.0002, |
| "loss": 0.0745, |
| "step": 15380 |
| }, |
| { |
| "epoch": 0.027989515298525277, |
| "grad_norm": 0.2234935164451599, |
| "learning_rate": 0.0002, |
| "loss": 0.0687, |
| "step": 15390 |
| }, |
| { |
| "epoch": 0.028007702118082473, |
| "grad_norm": 0.04384669288992882, |
| "learning_rate": 0.0002, |
| "loss": 0.023, |
| "step": 15400 |
| }, |
| { |
| "epoch": 0.02802588893763967, |
| "grad_norm": 0.14306089282035828, |
| "learning_rate": 0.0002, |
| "loss": 0.1477, |
| "step": 15410 |
| }, |
| { |
| "epoch": 0.028044075757196866, |
| "grad_norm": 0.1326105296611786, |
| "learning_rate": 0.0002, |
| "loss": 0.0784, |
| "step": 15420 |
| }, |
| { |
| "epoch": 0.028062262576754062, |
| "grad_norm": 0.05531894043087959, |
| "learning_rate": 0.0002, |
| "loss": 0.0813, |
| "step": 15430 |
| }, |
| { |
| "epoch": 0.02808044939631126, |
| "grad_norm": 0.14875297248363495, |
| "learning_rate": 0.0002, |
| "loss": 0.0622, |
| "step": 15440 |
| }, |
| { |
| "epoch": 0.028098636215868455, |
| "grad_norm": 0.03749268501996994, |
| "learning_rate": 0.0002, |
| "loss": 0.0181, |
| "step": 15450 |
| }, |
| { |
| "epoch": 0.02811682303542565, |
| "grad_norm": 0.05747106671333313, |
| "learning_rate": 0.0002, |
| "loss": 0.1157, |
| "step": 15460 |
| }, |
| { |
| "epoch": 0.028135009854982847, |
| "grad_norm": 0.06197863444685936, |
| "learning_rate": 0.0002, |
| "loss": 0.0779, |
| "step": 15470 |
| }, |
| { |
| "epoch": 0.028153196674540044, |
| "grad_norm": 0.09997677057981491, |
| "learning_rate": 0.0002, |
| "loss": 0.0784, |
| "step": 15480 |
| }, |
| { |
| "epoch": 0.02817138349409724, |
| "grad_norm": 0.18067684769630432, |
| "learning_rate": 0.0002, |
| "loss": 0.0728, |
| "step": 15490 |
| }, |
| { |
| "epoch": 0.028189570313654436, |
| "grad_norm": 0.03378088399767876, |
| "learning_rate": 0.0002, |
| "loss": 0.0252, |
| "step": 15500 |
| }, |
| { |
| "epoch": 0.028207757133211633, |
| "grad_norm": 0.14048723876476288, |
| "learning_rate": 0.0002, |
| "loss": 0.1392, |
| "step": 15510 |
| }, |
| { |
| "epoch": 0.02822594395276883, |
| "grad_norm": 0.09573493152856827, |
| "learning_rate": 0.0002, |
| "loss": 0.0751, |
| "step": 15520 |
| }, |
| { |
| "epoch": 0.028244130772326025, |
| "grad_norm": 0.11000777781009674, |
| "learning_rate": 0.0002, |
| "loss": 0.08, |
| "step": 15530 |
| }, |
| { |
| "epoch": 0.02826231759188322, |
| "grad_norm": 0.17712855339050293, |
| "learning_rate": 0.0002, |
| "loss": 0.0658, |
| "step": 15540 |
| }, |
| { |
| "epoch": 0.028280504411440418, |
| "grad_norm": 0.0183733981102705, |
| "learning_rate": 0.0002, |
| "loss": 0.0188, |
| "step": 15550 |
| }, |
| { |
| "epoch": 0.028298691230997614, |
| "grad_norm": 0.15027762949466705, |
| "learning_rate": 0.0002, |
| "loss": 0.1235, |
| "step": 15560 |
| }, |
| { |
| "epoch": 0.02831687805055481, |
| "grad_norm": 0.10586661100387573, |
| "learning_rate": 0.0002, |
| "loss": 0.0791, |
| "step": 15570 |
| }, |
| { |
| "epoch": 0.028335064870112007, |
| "grad_norm": 0.031083540990948677, |
| "learning_rate": 0.0002, |
| "loss": 0.0765, |
| "step": 15580 |
| }, |
| { |
| "epoch": 0.028353251689669203, |
| "grad_norm": 0.12294827401638031, |
| "learning_rate": 0.0002, |
| "loss": 0.0615, |
| "step": 15590 |
| }, |
| { |
| "epoch": 0.0283714385092264, |
| "grad_norm": 0.03652534633874893, |
| "learning_rate": 0.0002, |
| "loss": 0.0203, |
| "step": 15600 |
| }, |
| { |
| "epoch": 0.028389625328783596, |
| "grad_norm": 0.046638645231723785, |
| "learning_rate": 0.0002, |
| "loss": 0.1327, |
| "step": 15610 |
| }, |
| { |
| "epoch": 0.028407812148340792, |
| "grad_norm": 0.07200415432453156, |
| "learning_rate": 0.0002, |
| "loss": 0.0765, |
| "step": 15620 |
| }, |
| { |
| "epoch": 0.028425998967897992, |
| "grad_norm": 0.040679559111595154, |
| "learning_rate": 0.0002, |
| "loss": 0.0812, |
| "step": 15630 |
| }, |
| { |
| "epoch": 0.028444185787455188, |
| "grad_norm": 0.1572960615158081, |
| "learning_rate": 0.0002, |
| "loss": 0.0637, |
| "step": 15640 |
| }, |
| { |
| "epoch": 0.028462372607012384, |
| "grad_norm": 0.036091506481170654, |
| "learning_rate": 0.0002, |
| "loss": 0.0266, |
| "step": 15650 |
| }, |
| { |
| "epoch": 0.02848055942656958, |
| "grad_norm": 0.10555437207221985, |
| "learning_rate": 0.0002, |
| "loss": 0.1093, |
| "step": 15660 |
| }, |
| { |
| "epoch": 0.028498746246126777, |
| "grad_norm": 0.08854329586029053, |
| "learning_rate": 0.0002, |
| "loss": 0.0741, |
| "step": 15670 |
| }, |
| { |
| "epoch": 0.028516933065683973, |
| "grad_norm": 0.02908560261130333, |
| "learning_rate": 0.0002, |
| "loss": 0.0732, |
| "step": 15680 |
| }, |
| { |
| "epoch": 0.02853511988524117, |
| "grad_norm": 0.1568380743265152, |
| "learning_rate": 0.0002, |
| "loss": 0.0586, |
| "step": 15690 |
| }, |
| { |
| "epoch": 0.028553306704798366, |
| "grad_norm": 0.04985487833619118, |
| "learning_rate": 0.0002, |
| "loss": 0.0247, |
| "step": 15700 |
| }, |
| { |
| "epoch": 0.028571493524355562, |
| "grad_norm": 0.07582605630159378, |
| "learning_rate": 0.0002, |
| "loss": 0.1196, |
| "step": 15710 |
| }, |
| { |
| "epoch": 0.02858968034391276, |
| "grad_norm": 0.02401849813759327, |
| "learning_rate": 0.0002, |
| "loss": 0.075, |
| "step": 15720 |
| }, |
| { |
| "epoch": 0.028607867163469955, |
| "grad_norm": 0.032545965164899826, |
| "learning_rate": 0.0002, |
| "loss": 0.0761, |
| "step": 15730 |
| }, |
| { |
| "epoch": 0.02862605398302715, |
| "grad_norm": 0.1098649650812149, |
| "learning_rate": 0.0002, |
| "loss": 0.0599, |
| "step": 15740 |
| }, |
| { |
| "epoch": 0.028644240802584348, |
| "grad_norm": 0.021166007965803146, |
| "learning_rate": 0.0002, |
| "loss": 0.0169, |
| "step": 15750 |
| }, |
| { |
| "epoch": 0.028662427622141544, |
| "grad_norm": 0.0823541134595871, |
| "learning_rate": 0.0002, |
| "loss": 0.1337, |
| "step": 15760 |
| }, |
| { |
| "epoch": 0.02868061444169874, |
| "grad_norm": 0.1009572371840477, |
| "learning_rate": 0.0002, |
| "loss": 0.0779, |
| "step": 15770 |
| }, |
| { |
| "epoch": 0.028698801261255937, |
| "grad_norm": 0.09160738438367844, |
| "learning_rate": 0.0002, |
| "loss": 0.0766, |
| "step": 15780 |
| }, |
| { |
| "epoch": 0.028716988080813133, |
| "grad_norm": 0.14419673383235931, |
| "learning_rate": 0.0002, |
| "loss": 0.0594, |
| "step": 15790 |
| }, |
| { |
| "epoch": 0.02873517490037033, |
| "grad_norm": 0.01628550887107849, |
| "learning_rate": 0.0002, |
| "loss": 0.0218, |
| "step": 15800 |
| }, |
| { |
| "epoch": 0.028753361719927525, |
| "grad_norm": 0.15207678079605103, |
| "learning_rate": 0.0002, |
| "loss": 0.1262, |
| "step": 15810 |
| }, |
| { |
| "epoch": 0.028771548539484722, |
| "grad_norm": 0.14951761066913605, |
| "learning_rate": 0.0002, |
| "loss": 0.0812, |
| "step": 15820 |
| }, |
| { |
| "epoch": 0.028789735359041918, |
| "grad_norm": 0.028078215196728706, |
| "learning_rate": 0.0002, |
| "loss": 0.0783, |
| "step": 15830 |
| }, |
| { |
| "epoch": 0.028807922178599114, |
| "grad_norm": 0.16079741716384888, |
| "learning_rate": 0.0002, |
| "loss": 0.0633, |
| "step": 15840 |
| }, |
| { |
| "epoch": 0.02882610899815631, |
| "grad_norm": 0.04218870773911476, |
| "learning_rate": 0.0002, |
| "loss": 0.0217, |
| "step": 15850 |
| }, |
| { |
| "epoch": 0.028844295817713507, |
| "grad_norm": 0.13758492469787598, |
| "learning_rate": 0.0002, |
| "loss": 0.1358, |
| "step": 15860 |
| }, |
| { |
| "epoch": 0.028862482637270703, |
| "grad_norm": 0.10366559028625488, |
| "learning_rate": 0.0002, |
| "loss": 0.0787, |
| "step": 15870 |
| }, |
| { |
| "epoch": 0.0288806694568279, |
| "grad_norm": 0.04433147609233856, |
| "learning_rate": 0.0002, |
| "loss": 0.0781, |
| "step": 15880 |
| }, |
| { |
| "epoch": 0.028898856276385096, |
| "grad_norm": 0.16709402203559875, |
| "learning_rate": 0.0002, |
| "loss": 0.0684, |
| "step": 15890 |
| }, |
| { |
| "epoch": 0.028917043095942292, |
| "grad_norm": 0.03370310738682747, |
| "learning_rate": 0.0002, |
| "loss": 0.0191, |
| "step": 15900 |
| }, |
| { |
| "epoch": 0.02893522991549949, |
| "grad_norm": 0.15469267964363098, |
| "learning_rate": 0.0002, |
| "loss": 0.1487, |
| "step": 15910 |
| }, |
| { |
| "epoch": 0.028953416735056685, |
| "grad_norm": 0.19974654912948608, |
| "learning_rate": 0.0002, |
| "loss": 0.0769, |
| "step": 15920 |
| }, |
| { |
| "epoch": 0.02897160355461388, |
| "grad_norm": 0.04307623952627182, |
| "learning_rate": 0.0002, |
| "loss": 0.075, |
| "step": 15930 |
| }, |
| { |
| "epoch": 0.028989790374171077, |
| "grad_norm": 0.21828149259090424, |
| "learning_rate": 0.0002, |
| "loss": 0.0691, |
| "step": 15940 |
| }, |
| { |
| "epoch": 0.029007977193728274, |
| "grad_norm": 0.0268656387925148, |
| "learning_rate": 0.0002, |
| "loss": 0.022, |
| "step": 15950 |
| }, |
| { |
| "epoch": 0.02902616401328547, |
| "grad_norm": 0.11213699728250504, |
| "learning_rate": 0.0002, |
| "loss": 0.1326, |
| "step": 15960 |
| }, |
| { |
| "epoch": 0.029044350832842666, |
| "grad_norm": 0.2018963098526001, |
| "learning_rate": 0.0002, |
| "loss": 0.0772, |
| "step": 15970 |
| }, |
| { |
| "epoch": 0.029062537652399866, |
| "grad_norm": 0.06034110113978386, |
| "learning_rate": 0.0002, |
| "loss": 0.0712, |
| "step": 15980 |
| }, |
| { |
| "epoch": 0.029080724471957062, |
| "grad_norm": 0.1817707121372223, |
| "learning_rate": 0.0002, |
| "loss": 0.0692, |
| "step": 15990 |
| }, |
| { |
| "epoch": 0.02909891129151426, |
| "grad_norm": 0.03466440737247467, |
| "learning_rate": 0.0002, |
| "loss": 0.0205, |
| "step": 16000 |
| }, |
| { |
| "epoch": 0.029117098111071455, |
| "grad_norm": 0.1375580132007599, |
| "learning_rate": 0.0002, |
| "loss": 0.1499, |
| "step": 16010 |
| }, |
| { |
| "epoch": 0.02913528493062865, |
| "grad_norm": 0.14308910071849823, |
| "learning_rate": 0.0002, |
| "loss": 0.083, |
| "step": 16020 |
| }, |
| { |
| "epoch": 0.029153471750185848, |
| "grad_norm": 0.041022926568984985, |
| "learning_rate": 0.0002, |
| "loss": 0.0785, |
| "step": 16030 |
| }, |
| { |
| "epoch": 0.029171658569743044, |
| "grad_norm": 0.1701498180627823, |
| "learning_rate": 0.0002, |
| "loss": 0.0656, |
| "step": 16040 |
| }, |
| { |
| "epoch": 0.02918984538930024, |
| "grad_norm": 0.023075805976986885, |
| "learning_rate": 0.0002, |
| "loss": 0.0225, |
| "step": 16050 |
| }, |
| { |
| "epoch": 0.029208032208857437, |
| "grad_norm": 0.05303549766540527, |
| "learning_rate": 0.0002, |
| "loss": 0.1369, |
| "step": 16060 |
| }, |
| { |
| "epoch": 0.029226219028414633, |
| "grad_norm": 0.044178470969200134, |
| "learning_rate": 0.0002, |
| "loss": 0.0754, |
| "step": 16070 |
| }, |
| { |
| "epoch": 0.02924440584797183, |
| "grad_norm": 0.03951259329915047, |
| "learning_rate": 0.0002, |
| "loss": 0.0759, |
| "step": 16080 |
| }, |
| { |
| "epoch": 0.029262592667529026, |
| "grad_norm": 0.13762067258358002, |
| "learning_rate": 0.0002, |
| "loss": 0.0605, |
| "step": 16090 |
| }, |
| { |
| "epoch": 0.029280779487086222, |
| "grad_norm": 0.021227868273854256, |
| "learning_rate": 0.0002, |
| "loss": 0.0173, |
| "step": 16100 |
| }, |
| { |
| "epoch": 0.029298966306643418, |
| "grad_norm": 0.19493195414543152, |
| "learning_rate": 0.0002, |
| "loss": 0.1307, |
| "step": 16110 |
| }, |
| { |
| "epoch": 0.029317153126200615, |
| "grad_norm": 0.09980791062116623, |
| "learning_rate": 0.0002, |
| "loss": 0.0724, |
| "step": 16120 |
| }, |
| { |
| "epoch": 0.02933533994575781, |
| "grad_norm": 0.08762095868587494, |
| "learning_rate": 0.0002, |
| "loss": 0.0734, |
| "step": 16130 |
| }, |
| { |
| "epoch": 0.029353526765315007, |
| "grad_norm": 0.14261308312416077, |
| "learning_rate": 0.0002, |
| "loss": 0.071, |
| "step": 16140 |
| }, |
| { |
| "epoch": 0.029371713584872203, |
| "grad_norm": 0.033154651522636414, |
| "learning_rate": 0.0002, |
| "loss": 0.0238, |
| "step": 16150 |
| }, |
| { |
| "epoch": 0.0293899004044294, |
| "grad_norm": 0.1422877162694931, |
| "learning_rate": 0.0002, |
| "loss": 0.1285, |
| "step": 16160 |
| }, |
| { |
| "epoch": 0.029408087223986596, |
| "grad_norm": 0.1342266947031021, |
| "learning_rate": 0.0002, |
| "loss": 0.0765, |
| "step": 16170 |
| }, |
| { |
| "epoch": 0.029426274043543792, |
| "grad_norm": 0.031525906175374985, |
| "learning_rate": 0.0002, |
| "loss": 0.0772, |
| "step": 16180 |
| }, |
| { |
| "epoch": 0.02944446086310099, |
| "grad_norm": 0.14790122210979462, |
| "learning_rate": 0.0002, |
| "loss": 0.0627, |
| "step": 16190 |
| }, |
| { |
| "epoch": 0.029462647682658185, |
| "grad_norm": 0.025354932993650436, |
| "learning_rate": 0.0002, |
| "loss": 0.0212, |
| "step": 16200 |
| }, |
| { |
| "epoch": 0.02948083450221538, |
| "grad_norm": 0.1287624090909958, |
| "learning_rate": 0.0002, |
| "loss": 0.1457, |
| "step": 16210 |
| }, |
| { |
| "epoch": 0.029499021321772578, |
| "grad_norm": 0.1079782247543335, |
| "learning_rate": 0.0002, |
| "loss": 0.0819, |
| "step": 16220 |
| }, |
| { |
| "epoch": 0.029517208141329774, |
| "grad_norm": 0.04884497448801994, |
| "learning_rate": 0.0002, |
| "loss": 0.0843, |
| "step": 16230 |
| }, |
| { |
| "epoch": 0.02953539496088697, |
| "grad_norm": 0.14452646672725677, |
| "learning_rate": 0.0002, |
| "loss": 0.0664, |
| "step": 16240 |
| }, |
| { |
| "epoch": 0.029553581780444167, |
| "grad_norm": 0.029236188158392906, |
| "learning_rate": 0.0002, |
| "loss": 0.0182, |
| "step": 16250 |
| }, |
| { |
| "epoch": 0.029571768600001363, |
| "grad_norm": 0.18048252165317535, |
| "learning_rate": 0.0002, |
| "loss": 0.1382, |
| "step": 16260 |
| }, |
| { |
| "epoch": 0.02958995541955856, |
| "grad_norm": 0.08402508497238159, |
| "learning_rate": 0.0002, |
| "loss": 0.078, |
| "step": 16270 |
| }, |
| { |
| "epoch": 0.029608142239115755, |
| "grad_norm": 0.07740433514118195, |
| "learning_rate": 0.0002, |
| "loss": 0.0776, |
| "step": 16280 |
| }, |
| { |
| "epoch": 0.029626329058672952, |
| "grad_norm": 0.1414123773574829, |
| "learning_rate": 0.0002, |
| "loss": 0.0611, |
| "step": 16290 |
| }, |
| { |
| "epoch": 0.029644515878230148, |
| "grad_norm": 0.03296574577689171, |
| "learning_rate": 0.0002, |
| "loss": 0.0228, |
| "step": 16300 |
| }, |
| { |
| "epoch": 0.029662702697787344, |
| "grad_norm": 0.09312735497951508, |
| "learning_rate": 0.0002, |
| "loss": 0.1213, |
| "step": 16310 |
| }, |
| { |
| "epoch": 0.029680889517344544, |
| "grad_norm": 0.07857484370470047, |
| "learning_rate": 0.0002, |
| "loss": 0.0812, |
| "step": 16320 |
| }, |
| { |
| "epoch": 0.02969907633690174, |
| "grad_norm": 0.0680379793047905, |
| "learning_rate": 0.0002, |
| "loss": 0.0774, |
| "step": 16330 |
| }, |
| { |
| "epoch": 0.029717263156458937, |
| "grad_norm": 0.18506748974323273, |
| "learning_rate": 0.0002, |
| "loss": 0.0675, |
| "step": 16340 |
| }, |
| { |
| "epoch": 0.029735449976016133, |
| "grad_norm": 0.029233543202280998, |
| "learning_rate": 0.0002, |
| "loss": 0.0187, |
| "step": 16350 |
| }, |
| { |
| "epoch": 0.02975363679557333, |
| "grad_norm": 0.1133171021938324, |
| "learning_rate": 0.0002, |
| "loss": 0.1217, |
| "step": 16360 |
| }, |
| { |
| "epoch": 0.029771823615130526, |
| "grad_norm": 0.06985988467931747, |
| "learning_rate": 0.0002, |
| "loss": 0.0761, |
| "step": 16370 |
| }, |
| { |
| "epoch": 0.029790010434687722, |
| "grad_norm": 0.13158757984638214, |
| "learning_rate": 0.0002, |
| "loss": 0.0764, |
| "step": 16380 |
| }, |
| { |
| "epoch": 0.02980819725424492, |
| "grad_norm": 0.19751304388046265, |
| "learning_rate": 0.0002, |
| "loss": 0.0652, |
| "step": 16390 |
| }, |
| { |
| "epoch": 0.029826384073802115, |
| "grad_norm": 0.019567493349313736, |
| "learning_rate": 0.0002, |
| "loss": 0.0166, |
| "step": 16400 |
| }, |
| { |
| "epoch": 0.02984457089335931, |
| "grad_norm": 0.1859702467918396, |
| "learning_rate": 0.0002, |
| "loss": 0.1482, |
| "step": 16410 |
| }, |
| { |
| "epoch": 0.029862757712916507, |
| "grad_norm": 0.03211350366473198, |
| "learning_rate": 0.0002, |
| "loss": 0.073, |
| "step": 16420 |
| }, |
| { |
| "epoch": 0.029880944532473704, |
| "grad_norm": 0.10664219409227371, |
| "learning_rate": 0.0002, |
| "loss": 0.075, |
| "step": 16430 |
| }, |
| { |
| "epoch": 0.0298991313520309, |
| "grad_norm": 0.18254978954792023, |
| "learning_rate": 0.0002, |
| "loss": 0.0666, |
| "step": 16440 |
| }, |
| { |
| "epoch": 0.029917318171588096, |
| "grad_norm": 0.03076091594994068, |
| "learning_rate": 0.0002, |
| "loss": 0.0217, |
| "step": 16450 |
| }, |
| { |
| "epoch": 0.029935504991145293, |
| "grad_norm": 0.11172248423099518, |
| "learning_rate": 0.0002, |
| "loss": 0.1115, |
| "step": 16460 |
| }, |
| { |
| "epoch": 0.02995369181070249, |
| "grad_norm": 0.1121174767613411, |
| "learning_rate": 0.0002, |
| "loss": 0.0838, |
| "step": 16470 |
| }, |
| { |
| "epoch": 0.029971878630259685, |
| "grad_norm": 0.05544061213731766, |
| "learning_rate": 0.0002, |
| "loss": 0.0773, |
| "step": 16480 |
| }, |
| { |
| "epoch": 0.02999006544981688, |
| "grad_norm": 0.13899610936641693, |
| "learning_rate": 0.0002, |
| "loss": 0.0648, |
| "step": 16490 |
| }, |
| { |
| "epoch": 0.030008252269374078, |
| "grad_norm": 0.031017031520605087, |
| "learning_rate": 0.0002, |
| "loss": 0.0205, |
| "step": 16500 |
| }, |
| { |
| "epoch": 0.030026439088931274, |
| "grad_norm": 0.5919166803359985, |
| "learning_rate": 0.0002, |
| "loss": 0.1454, |
| "step": 16510 |
| }, |
| { |
| "epoch": 0.03004462590848847, |
| "grad_norm": 2.5127646923065186, |
| "learning_rate": 0.0002, |
| "loss": 0.0925, |
| "step": 16520 |
| }, |
| { |
| "epoch": 0.030062812728045667, |
| "grad_norm": 0.12587642669677734, |
| "learning_rate": 0.0002, |
| "loss": 0.0896, |
| "step": 16530 |
| }, |
| { |
| "epoch": 0.030080999547602863, |
| "grad_norm": 0.29352524876594543, |
| "learning_rate": 0.0002, |
| "loss": 0.0692, |
| "step": 16540 |
| }, |
| { |
| "epoch": 0.03009918636716006, |
| "grad_norm": 0.012585405260324478, |
| "learning_rate": 0.0002, |
| "loss": 0.021, |
| "step": 16550 |
| }, |
| { |
| "epoch": 0.030117373186717256, |
| "grad_norm": 2.432018756866455, |
| "learning_rate": 0.0002, |
| "loss": 0.239, |
| "step": 16560 |
| }, |
| { |
| "epoch": 0.030135560006274452, |
| "grad_norm": 0.09337054193019867, |
| "learning_rate": 0.0002, |
| "loss": 0.0859, |
| "step": 16570 |
| }, |
| { |
| "epoch": 0.030153746825831648, |
| "grad_norm": 0.05135548114776611, |
| "learning_rate": 0.0002, |
| "loss": 0.0794, |
| "step": 16580 |
| }, |
| { |
| "epoch": 0.030171933645388845, |
| "grad_norm": 0.15056684613227844, |
| "learning_rate": 0.0002, |
| "loss": 0.0697, |
| "step": 16590 |
| }, |
| { |
| "epoch": 0.03019012046494604, |
| "grad_norm": 5.883757694391534e-05, |
| "learning_rate": 0.0002, |
| "loss": 0.0085, |
| "step": 16600 |
| }, |
| { |
| "epoch": 0.030208307284503237, |
| "grad_norm": 1.0368543863296509, |
| "learning_rate": 0.0002, |
| "loss": 0.1861, |
| "step": 16610 |
| }, |
| { |
| "epoch": 0.030226494104060433, |
| "grad_norm": 0.07987317442893982, |
| "learning_rate": 0.0002, |
| "loss": 0.0938, |
| "step": 16620 |
| }, |
| { |
| "epoch": 0.03024468092361763, |
| "grad_norm": 0.02812887355685234, |
| "learning_rate": 0.0002, |
| "loss": 0.0753, |
| "step": 16630 |
| }, |
| { |
| "epoch": 0.030262867743174826, |
| "grad_norm": 0.24061231315135956, |
| "learning_rate": 0.0002, |
| "loss": 0.0653, |
| "step": 16640 |
| }, |
| { |
| "epoch": 0.030281054562732022, |
| "grad_norm": 0.0402507558465004, |
| "learning_rate": 0.0002, |
| "loss": 0.0266, |
| "step": 16650 |
| }, |
| { |
| "epoch": 0.030299241382289222, |
| "grad_norm": 0.13552093505859375, |
| "learning_rate": 0.0002, |
| "loss": 0.1709, |
| "step": 16660 |
| }, |
| { |
| "epoch": 0.03031742820184642, |
| "grad_norm": 0.6093604564666748, |
| "learning_rate": 0.0002, |
| "loss": 0.0857, |
| "step": 16670 |
| }, |
| { |
| "epoch": 0.030335615021403615, |
| "grad_norm": 0.11608528345823288, |
| "learning_rate": 0.0002, |
| "loss": 0.0874, |
| "step": 16680 |
| }, |
| { |
| "epoch": 0.03035380184096081, |
| "grad_norm": 0.23376339673995972, |
| "learning_rate": 0.0002, |
| "loss": 0.0688, |
| "step": 16690 |
| }, |
| { |
| "epoch": 0.030371988660518007, |
| "grad_norm": 0.03484225273132324, |
| "learning_rate": 0.0002, |
| "loss": 0.0172, |
| "step": 16700 |
| }, |
| { |
| "epoch": 0.030390175480075204, |
| "grad_norm": 0.30532532930374146, |
| "learning_rate": 0.0002, |
| "loss": 0.1686, |
| "step": 16710 |
| }, |
| { |
| "epoch": 0.0304083622996324, |
| "grad_norm": 0.05142231658101082, |
| "learning_rate": 0.0002, |
| "loss": 0.0766, |
| "step": 16720 |
| }, |
| { |
| "epoch": 0.030426549119189596, |
| "grad_norm": 0.08218207955360413, |
| "learning_rate": 0.0002, |
| "loss": 0.0839, |
| "step": 16730 |
| }, |
| { |
| "epoch": 0.030444735938746793, |
| "grad_norm": 0.15296520292758942, |
| "learning_rate": 0.0002, |
| "loss": 0.0717, |
| "step": 16740 |
| }, |
| { |
| "epoch": 0.03046292275830399, |
| "grad_norm": 0.009951476007699966, |
| "learning_rate": 0.0002, |
| "loss": 0.0103, |
| "step": 16750 |
| }, |
| { |
| "epoch": 0.030481109577861185, |
| "grad_norm": 0.18752850592136383, |
| "learning_rate": 0.0002, |
| "loss": 0.2382, |
| "step": 16760 |
| }, |
| { |
| "epoch": 0.03049929639741838, |
| "grad_norm": 0.1473335325717926, |
| "learning_rate": 0.0002, |
| "loss": 0.0975, |
| "step": 16770 |
| }, |
| { |
| "epoch": 0.030517483216975578, |
| "grad_norm": 0.04578230902552605, |
| "learning_rate": 0.0002, |
| "loss": 0.0812, |
| "step": 16780 |
| }, |
| { |
| "epoch": 0.030535670036532774, |
| "grad_norm": 0.2557182312011719, |
| "learning_rate": 0.0002, |
| "loss": 0.0691, |
| "step": 16790 |
| }, |
| { |
| "epoch": 0.03055385685608997, |
| "grad_norm": 1.473021388053894, |
| "learning_rate": 0.0002, |
| "loss": 0.2088, |
| "step": 16800 |
| }, |
| { |
| "epoch": 0.030572043675647167, |
| "grad_norm": 1.0227181911468506, |
| "learning_rate": 0.0002, |
| "loss": 0.7207, |
| "step": 16810 |
| }, |
| { |
| "epoch": 0.030590230495204363, |
| "grad_norm": 0.11395780742168427, |
| "learning_rate": 0.0002, |
| "loss": 0.0943, |
| "step": 16820 |
| }, |
| { |
| "epoch": 0.03060841731476156, |
| "grad_norm": 6.501937389373779, |
| "learning_rate": 0.0002, |
| "loss": 0.0871, |
| "step": 16830 |
| }, |
| { |
| "epoch": 0.030626604134318756, |
| "grad_norm": 0.17187578976154327, |
| "learning_rate": 0.0002, |
| "loss": 0.0672, |
| "step": 16840 |
| }, |
| { |
| "epoch": 0.030644790953875952, |
| "grad_norm": 0.03396519273519516, |
| "learning_rate": 0.0002, |
| "loss": 0.0224, |
| "step": 16850 |
| }, |
| { |
| "epoch": 0.03066297777343315, |
| "grad_norm": 3.397012948989868, |
| "learning_rate": 0.0002, |
| "loss": 0.1641, |
| "step": 16860 |
| }, |
| { |
| "epoch": 0.030681164592990345, |
| "grad_norm": 0.44838130474090576, |
| "learning_rate": 0.0002, |
| "loss": 0.0868, |
| "step": 16870 |
| }, |
| { |
| "epoch": 0.03069935141254754, |
| "grad_norm": 0.08598771691322327, |
| "learning_rate": 0.0002, |
| "loss": 0.0766, |
| "step": 16880 |
| }, |
| { |
| "epoch": 0.030717538232104737, |
| "grad_norm": 0.15339739620685577, |
| "learning_rate": 0.0002, |
| "loss": 0.0609, |
| "step": 16890 |
| }, |
| { |
| "epoch": 0.030735725051661934, |
| "grad_norm": 0.04086040332913399, |
| "learning_rate": 0.0002, |
| "loss": 0.0218, |
| "step": 16900 |
| }, |
| { |
| "epoch": 0.03075391187121913, |
| "grad_norm": 0.40313076972961426, |
| "learning_rate": 0.0002, |
| "loss": 0.2017, |
| "step": 16910 |
| }, |
| { |
| "epoch": 0.030772098690776326, |
| "grad_norm": 0.2068721503019333, |
| "learning_rate": 0.0002, |
| "loss": 0.0906, |
| "step": 16920 |
| }, |
| { |
| "epoch": 0.030790285510333523, |
| "grad_norm": 0.12770770490169525, |
| "learning_rate": 0.0002, |
| "loss": 0.0801, |
| "step": 16930 |
| }, |
| { |
| "epoch": 0.03080847232989072, |
| "grad_norm": 17.294641494750977, |
| "learning_rate": 0.0002, |
| "loss": 0.0701, |
| "step": 16940 |
| }, |
| { |
| "epoch": 0.030826659149447915, |
| "grad_norm": 0.04612286388874054, |
| "learning_rate": 0.0002, |
| "loss": 0.0287, |
| "step": 16950 |
| }, |
| { |
| "epoch": 0.03084484596900511, |
| "grad_norm": 0.10311487317085266, |
| "learning_rate": 0.0002, |
| "loss": 0.136, |
| "step": 16960 |
| }, |
| { |
| "epoch": 0.030863032788562308, |
| "grad_norm": 0.20878446102142334, |
| "learning_rate": 0.0002, |
| "loss": 0.0886, |
| "step": 16970 |
| }, |
| { |
| "epoch": 0.030881219608119504, |
| "grad_norm": 1.412353515625, |
| "learning_rate": 0.0002, |
| "loss": 0.0843, |
| "step": 16980 |
| }, |
| { |
| "epoch": 0.0308994064276767, |
| "grad_norm": 0.27046918869018555, |
| "learning_rate": 0.0002, |
| "loss": 0.0755, |
| "step": 16990 |
| }, |
| { |
| "epoch": 0.030917593247233897, |
| "grad_norm": 0.5227788090705872, |
| "learning_rate": 0.0002, |
| "loss": 0.0234, |
| "step": 17000 |
| }, |
| { |
| "epoch": 0.030935780066791096, |
| "grad_norm": 0.16006655991077423, |
| "learning_rate": 0.0002, |
| "loss": 0.183, |
| "step": 17010 |
| }, |
| { |
| "epoch": 0.030953966886348293, |
| "grad_norm": 0.1297607421875, |
| "learning_rate": 0.0002, |
| "loss": 0.0868, |
| "step": 17020 |
| }, |
| { |
| "epoch": 0.03097215370590549, |
| "grad_norm": 11.198999404907227, |
| "learning_rate": 0.0002, |
| "loss": 0.0998, |
| "step": 17030 |
| }, |
| { |
| "epoch": 0.030990340525462685, |
| "grad_norm": 0.39887136220932007, |
| "learning_rate": 0.0002, |
| "loss": 0.0898, |
| "step": 17040 |
| }, |
| { |
| "epoch": 0.03100852734501988, |
| "grad_norm": 0.009262642823159695, |
| "learning_rate": 0.0002, |
| "loss": 0.0215, |
| "step": 17050 |
| }, |
| { |
| "epoch": 0.031026714164577078, |
| "grad_norm": 0.15820527076721191, |
| "learning_rate": 0.0002, |
| "loss": 0.2017, |
| "step": 17060 |
| }, |
| { |
| "epoch": 0.031044900984134274, |
| "grad_norm": 0.11645558476448059, |
| "learning_rate": 0.0002, |
| "loss": 0.085, |
| "step": 17070 |
| }, |
| { |
| "epoch": 0.03106308780369147, |
| "grad_norm": 0.03981775790452957, |
| "learning_rate": 0.0002, |
| "loss": 0.0803, |
| "step": 17080 |
| }, |
| { |
| "epoch": 0.031081274623248667, |
| "grad_norm": 0.1584177166223526, |
| "learning_rate": 0.0002, |
| "loss": 0.0635, |
| "step": 17090 |
| }, |
| { |
| "epoch": 0.031099461442805863, |
| "grad_norm": 0.0005907397717237473, |
| "learning_rate": 0.0002, |
| "loss": 0.006, |
| "step": 17100 |
| }, |
| { |
| "epoch": 0.03111764826236306, |
| "grad_norm": 0.05344061553478241, |
| "learning_rate": 0.0002, |
| "loss": 0.3098, |
| "step": 17110 |
| }, |
| { |
| "epoch": 0.031135835081920256, |
| "grad_norm": 0.05249408632516861, |
| "learning_rate": 0.0002, |
| "loss": 0.1002, |
| "step": 17120 |
| }, |
| { |
| "epoch": 0.031154021901477452, |
| "grad_norm": 0.04177263006567955, |
| "learning_rate": 0.0002, |
| "loss": 0.0969, |
| "step": 17130 |
| }, |
| { |
| "epoch": 0.03117220872103465, |
| "grad_norm": 0.18396486341953278, |
| "learning_rate": 0.0002, |
| "loss": 0.0727, |
| "step": 17140 |
| }, |
| { |
| "epoch": 0.031190395540591845, |
| "grad_norm": 0.0019848416559398174, |
| "learning_rate": 0.0002, |
| "loss": 0.0092, |
| "step": 17150 |
| }, |
| { |
| "epoch": 0.03120858236014904, |
| "grad_norm": 0.23747271299362183, |
| "learning_rate": 0.0002, |
| "loss": 0.3243, |
| "step": 17160 |
| }, |
| { |
| "epoch": 0.031226769179706237, |
| "grad_norm": 0.2365376353263855, |
| "learning_rate": 0.0002, |
| "loss": 0.094, |
| "step": 17170 |
| }, |
| { |
| "epoch": 0.031244955999263434, |
| "grad_norm": 0.21784919500350952, |
| "learning_rate": 0.0002, |
| "loss": 0.0795, |
| "step": 17180 |
| }, |
| { |
| "epoch": 0.03126314281882063, |
| "grad_norm": 0.27253153920173645, |
| "learning_rate": 0.0002, |
| "loss": 0.0748, |
| "step": 17190 |
| }, |
| { |
| "epoch": 0.031281329638377826, |
| "grad_norm": 0.004298684187233448, |
| "learning_rate": 0.0002, |
| "loss": 0.014, |
| "step": 17200 |
| }, |
| { |
| "epoch": 0.03129951645793502, |
| "grad_norm": 0.267871230840683, |
| "learning_rate": 0.0002, |
| "loss": 0.2938, |
| "step": 17210 |
| }, |
| { |
| "epoch": 0.03131770327749222, |
| "grad_norm": 0.1428530067205429, |
| "learning_rate": 0.0002, |
| "loss": 0.0901, |
| "step": 17220 |
| }, |
| { |
| "epoch": 0.031335890097049415, |
| "grad_norm": 0.10623782873153687, |
| "learning_rate": 0.0002, |
| "loss": 0.0752, |
| "step": 17230 |
| }, |
| { |
| "epoch": 0.03135407691660661, |
| "grad_norm": 0.2869247496128082, |
| "learning_rate": 0.0002, |
| "loss": 0.0707, |
| "step": 17240 |
| }, |
| { |
| "epoch": 0.03137226373616381, |
| "grad_norm": 0.011321209371089935, |
| "learning_rate": 0.0002, |
| "loss": 0.0168, |
| "step": 17250 |
| }, |
| { |
| "epoch": 0.031390450555721004, |
| "grad_norm": 0.09432020783424377, |
| "learning_rate": 0.0002, |
| "loss": 0.2046, |
| "step": 17260 |
| }, |
| { |
| "epoch": 0.0314086373752782, |
| "grad_norm": 0.190867081284523, |
| "learning_rate": 0.0002, |
| "loss": 0.0866, |
| "step": 17270 |
| }, |
| { |
| "epoch": 0.0314268241948354, |
| "grad_norm": 0.14274829626083374, |
| "learning_rate": 0.0002, |
| "loss": 0.0796, |
| "step": 17280 |
| }, |
| { |
| "epoch": 0.03144501101439259, |
| "grad_norm": 0.29910504817962646, |
| "learning_rate": 0.0002, |
| "loss": 0.0711, |
| "step": 17290 |
| }, |
| { |
| "epoch": 0.03146319783394979, |
| "grad_norm": 0.031730011105537415, |
| "learning_rate": 0.0002, |
| "loss": 0.0217, |
| "step": 17300 |
| }, |
| { |
| "epoch": 0.031481384653506986, |
| "grad_norm": 0.23042625188827515, |
| "learning_rate": 0.0002, |
| "loss": 0.1491, |
| "step": 17310 |
| }, |
| { |
| "epoch": 0.03149957147306418, |
| "grad_norm": 0.15560220181941986, |
| "learning_rate": 0.0002, |
| "loss": 0.0761, |
| "step": 17320 |
| }, |
| { |
| "epoch": 0.03151775829262138, |
| "grad_norm": 0.051929160952568054, |
| "learning_rate": 0.0002, |
| "loss": 0.0893, |
| "step": 17330 |
| }, |
| { |
| "epoch": 0.031535945112178575, |
| "grad_norm": 0.16162756085395813, |
| "learning_rate": 0.0002, |
| "loss": 0.0623, |
| "step": 17340 |
| }, |
| { |
| "epoch": 0.03155413193173577, |
| "grad_norm": 0.019480068236589432, |
| "learning_rate": 0.0002, |
| "loss": 0.0137, |
| "step": 17350 |
| }, |
| { |
| "epoch": 0.03157231875129297, |
| "grad_norm": 0.24700693786144257, |
| "learning_rate": 0.0002, |
| "loss": 0.1481, |
| "step": 17360 |
| }, |
| { |
| "epoch": 0.031590505570850164, |
| "grad_norm": 0.17574873566627502, |
| "learning_rate": 0.0002, |
| "loss": 0.079, |
| "step": 17370 |
| }, |
| { |
| "epoch": 0.03160869239040736, |
| "grad_norm": 0.10368580371141434, |
| "learning_rate": 0.0002, |
| "loss": 0.0811, |
| "step": 17380 |
| }, |
| { |
| "epoch": 0.031626879209964556, |
| "grad_norm": 0.23330622911453247, |
| "learning_rate": 0.0002, |
| "loss": 0.0669, |
| "step": 17390 |
| }, |
| { |
| "epoch": 0.03164506602952175, |
| "grad_norm": 0.031393859535455704, |
| "learning_rate": 0.0002, |
| "loss": 0.0183, |
| "step": 17400 |
| }, |
| { |
| "epoch": 0.03166325284907895, |
| "grad_norm": 0.22080129384994507, |
| "learning_rate": 0.0002, |
| "loss": 0.1567, |
| "step": 17410 |
| }, |
| { |
| "epoch": 0.031681439668636145, |
| "grad_norm": 0.177025705575943, |
| "learning_rate": 0.0002, |
| "loss": 0.0798, |
| "step": 17420 |
| }, |
| { |
| "epoch": 0.03169962648819334, |
| "grad_norm": 0.054285600781440735, |
| "learning_rate": 0.0002, |
| "loss": 0.0709, |
| "step": 17430 |
| }, |
| { |
| "epoch": 0.03171781330775054, |
| "grad_norm": 0.20625421404838562, |
| "learning_rate": 0.0002, |
| "loss": 0.0592, |
| "step": 17440 |
| }, |
| { |
| "epoch": 0.031736000127307734, |
| "grad_norm": 0.042640089988708496, |
| "learning_rate": 0.0002, |
| "loss": 0.0199, |
| "step": 17450 |
| }, |
| { |
| "epoch": 0.03175418694686493, |
| "grad_norm": 0.2505437731742859, |
| "learning_rate": 0.0002, |
| "loss": 0.131, |
| "step": 17460 |
| }, |
| { |
| "epoch": 0.03177237376642213, |
| "grad_norm": 0.24848629534244537, |
| "learning_rate": 0.0002, |
| "loss": 0.0826, |
| "step": 17470 |
| }, |
| { |
| "epoch": 0.03179056058597932, |
| "grad_norm": 0.056854844093322754, |
| "learning_rate": 0.0002, |
| "loss": 0.0779, |
| "step": 17480 |
| }, |
| { |
| "epoch": 0.03180874740553652, |
| "grad_norm": 0.23022660613059998, |
| "learning_rate": 0.0002, |
| "loss": 0.0703, |
| "step": 17490 |
| }, |
| { |
| "epoch": 0.031826934225093716, |
| "grad_norm": 0.033501993864774704, |
| "learning_rate": 0.0002, |
| "loss": 0.0229, |
| "step": 17500 |
| }, |
| { |
| "epoch": 0.03184512104465091, |
| "grad_norm": 0.25061148405075073, |
| "learning_rate": 0.0002, |
| "loss": 0.1588, |
| "step": 17510 |
| }, |
| { |
| "epoch": 0.031863307864208115, |
| "grad_norm": 0.21534167230129242, |
| "learning_rate": 0.0002, |
| "loss": 0.079, |
| "step": 17520 |
| }, |
| { |
| "epoch": 0.03188149468376531, |
| "grad_norm": 0.04823959991335869, |
| "learning_rate": 0.0002, |
| "loss": 0.0826, |
| "step": 17530 |
| }, |
| { |
| "epoch": 0.03189968150332251, |
| "grad_norm": 0.23680952191352844, |
| "learning_rate": 0.0002, |
| "loss": 0.0617, |
| "step": 17540 |
| }, |
| { |
| "epoch": 0.031917868322879704, |
| "grad_norm": 0.016636351123452187, |
| "learning_rate": 0.0002, |
| "loss": 0.0143, |
| "step": 17550 |
| }, |
| { |
| "epoch": 0.0319360551424369, |
| "grad_norm": 0.3684225082397461, |
| "learning_rate": 0.0002, |
| "loss": 0.2011, |
| "step": 17560 |
| }, |
| { |
| "epoch": 0.0319542419619941, |
| "grad_norm": 0.07126643508672714, |
| "learning_rate": 0.0002, |
| "loss": 0.0792, |
| "step": 17570 |
| }, |
| { |
| "epoch": 0.03197242878155129, |
| "grad_norm": 0.05354290455579758, |
| "learning_rate": 0.0002, |
| "loss": 0.0831, |
| "step": 17580 |
| }, |
| { |
| "epoch": 0.03199061560110849, |
| "grad_norm": 0.20318995416164398, |
| "learning_rate": 0.0002, |
| "loss": 0.0617, |
| "step": 17590 |
| }, |
| { |
| "epoch": 0.032008802420665686, |
| "grad_norm": 0.021502351388335228, |
| "learning_rate": 0.0002, |
| "loss": 0.0137, |
| "step": 17600 |
| }, |
| { |
| "epoch": 0.03202698924022288, |
| "grad_norm": 0.3471545875072479, |
| "learning_rate": 0.0002, |
| "loss": 0.1823, |
| "step": 17610 |
| }, |
| { |
| "epoch": 0.03204517605978008, |
| "grad_norm": 0.23191972076892853, |
| "learning_rate": 0.0002, |
| "loss": 0.0837, |
| "step": 17620 |
| }, |
| { |
| "epoch": 0.032063362879337275, |
| "grad_norm": 0.0479818731546402, |
| "learning_rate": 0.0002, |
| "loss": 0.0845, |
| "step": 17630 |
| }, |
| { |
| "epoch": 0.03208154969889447, |
| "grad_norm": 0.2193339467048645, |
| "learning_rate": 0.0002, |
| "loss": 0.068, |
| "step": 17640 |
| }, |
| { |
| "epoch": 0.03209973651845167, |
| "grad_norm": 0.03661821037530899, |
| "learning_rate": 0.0002, |
| "loss": 0.0234, |
| "step": 17650 |
| }, |
| { |
| "epoch": 0.032117923338008864, |
| "grad_norm": 0.10396943986415863, |
| "learning_rate": 0.0002, |
| "loss": 0.1295, |
| "step": 17660 |
| }, |
| { |
| "epoch": 0.03213611015756606, |
| "grad_norm": 0.16999179124832153, |
| "learning_rate": 0.0002, |
| "loss": 0.0823, |
| "step": 17670 |
| }, |
| { |
| "epoch": 0.032154296977123256, |
| "grad_norm": 0.09069819748401642, |
| "learning_rate": 0.0002, |
| "loss": 0.0748, |
| "step": 17680 |
| }, |
| { |
| "epoch": 0.03217248379668045, |
| "grad_norm": 0.24210433661937714, |
| "learning_rate": 0.0002, |
| "loss": 0.0611, |
| "step": 17690 |
| }, |
| { |
| "epoch": 0.03219067061623765, |
| "grad_norm": 0.028281020000576973, |
| "learning_rate": 0.0002, |
| "loss": 0.018, |
| "step": 17700 |
| }, |
| { |
| "epoch": 0.032208857435794845, |
| "grad_norm": 0.4133516252040863, |
| "learning_rate": 0.0002, |
| "loss": 0.1704, |
| "step": 17710 |
| }, |
| { |
| "epoch": 0.03222704425535204, |
| "grad_norm": 0.20207400619983673, |
| "learning_rate": 0.0002, |
| "loss": 0.0804, |
| "step": 17720 |
| }, |
| { |
| "epoch": 0.03224523107490924, |
| "grad_norm": 0.043604232370853424, |
| "learning_rate": 0.0002, |
| "loss": 0.0929, |
| "step": 17730 |
| }, |
| { |
| "epoch": 0.032263417894466434, |
| "grad_norm": 0.1995580494403839, |
| "learning_rate": 0.0002, |
| "loss": 0.062, |
| "step": 17740 |
| }, |
| { |
| "epoch": 0.03228160471402363, |
| "grad_norm": 0.03241848200559616, |
| "learning_rate": 0.0002, |
| "loss": 0.0137, |
| "step": 17750 |
| }, |
| { |
| "epoch": 0.03229979153358083, |
| "grad_norm": 0.28819000720977783, |
| "learning_rate": 0.0002, |
| "loss": 0.1696, |
| "step": 17760 |
| }, |
| { |
| "epoch": 0.03231797835313802, |
| "grad_norm": 0.2625056803226471, |
| "learning_rate": 0.0002, |
| "loss": 0.0704, |
| "step": 17770 |
| }, |
| { |
| "epoch": 0.03233616517269522, |
| "grad_norm": 0.03986202925443649, |
| "learning_rate": 0.0002, |
| "loss": 0.0848, |
| "step": 17780 |
| }, |
| { |
| "epoch": 0.032354351992252416, |
| "grad_norm": 0.24770867824554443, |
| "learning_rate": 0.0002, |
| "loss": 0.0608, |
| "step": 17790 |
| }, |
| { |
| "epoch": 0.03237253881180961, |
| "grad_norm": 0.031353630125522614, |
| "learning_rate": 0.0002, |
| "loss": 0.0145, |
| "step": 17800 |
| }, |
| { |
| "epoch": 0.03239072563136681, |
| "grad_norm": 0.2273588478565216, |
| "learning_rate": 0.0002, |
| "loss": 0.1765, |
| "step": 17810 |
| }, |
| { |
| "epoch": 0.032408912450924005, |
| "grad_norm": 0.19741755723953247, |
| "learning_rate": 0.0002, |
| "loss": 0.0818, |
| "step": 17820 |
| }, |
| { |
| "epoch": 0.0324270992704812, |
| "grad_norm": 0.03193483129143715, |
| "learning_rate": 0.0002, |
| "loss": 0.0737, |
| "step": 17830 |
| }, |
| { |
| "epoch": 0.0324452860900384, |
| "grad_norm": 0.13962946832180023, |
| "learning_rate": 0.0002, |
| "loss": 0.0575, |
| "step": 17840 |
| }, |
| { |
| "epoch": 0.03246347290959559, |
| "grad_norm": 0.01755092851817608, |
| "learning_rate": 0.0002, |
| "loss": 0.0159, |
| "step": 17850 |
| }, |
| { |
| "epoch": 0.03248165972915279, |
| "grad_norm": 0.21713244915008545, |
| "learning_rate": 0.0002, |
| "loss": 0.1476, |
| "step": 17860 |
| }, |
| { |
| "epoch": 0.032499846548709986, |
| "grad_norm": 0.15362155437469482, |
| "learning_rate": 0.0002, |
| "loss": 0.0747, |
| "step": 17870 |
| }, |
| { |
| "epoch": 0.03251803336826718, |
| "grad_norm": 0.02643916755914688, |
| "learning_rate": 0.0002, |
| "loss": 0.0793, |
| "step": 17880 |
| }, |
| { |
| "epoch": 0.03253622018782438, |
| "grad_norm": 0.2702760100364685, |
| "learning_rate": 0.0002, |
| "loss": 0.0641, |
| "step": 17890 |
| }, |
| { |
| "epoch": 0.032554407007381575, |
| "grad_norm": 0.05910428613424301, |
| "learning_rate": 0.0002, |
| "loss": 0.022, |
| "step": 17900 |
| }, |
| { |
| "epoch": 0.03257259382693877, |
| "grad_norm": 0.17692551016807556, |
| "learning_rate": 0.0002, |
| "loss": 0.1407, |
| "step": 17910 |
| }, |
| { |
| "epoch": 0.03259078064649597, |
| "grad_norm": 0.19877870380878448, |
| "learning_rate": 0.0002, |
| "loss": 0.0798, |
| "step": 17920 |
| }, |
| { |
| "epoch": 0.032608967466053164, |
| "grad_norm": 0.06731924414634705, |
| "learning_rate": 0.0002, |
| "loss": 0.0798, |
| "step": 17930 |
| }, |
| { |
| "epoch": 0.03262715428561036, |
| "grad_norm": 0.20342952013015747, |
| "learning_rate": 0.0002, |
| "loss": 0.0571, |
| "step": 17940 |
| }, |
| { |
| "epoch": 0.03264534110516756, |
| "grad_norm": 0.06299301236867905, |
| "learning_rate": 0.0002, |
| "loss": 0.0154, |
| "step": 17950 |
| }, |
| { |
| "epoch": 0.03266352792472475, |
| "grad_norm": 0.30317986011505127, |
| "learning_rate": 0.0002, |
| "loss": 0.1496, |
| "step": 17960 |
| }, |
| { |
| "epoch": 0.03268171474428195, |
| "grad_norm": 0.2737327218055725, |
| "learning_rate": 0.0002, |
| "loss": 0.0777, |
| "step": 17970 |
| }, |
| { |
| "epoch": 0.032699901563839145, |
| "grad_norm": 0.03226702660322189, |
| "learning_rate": 0.0002, |
| "loss": 0.0799, |
| "step": 17980 |
| }, |
| { |
| "epoch": 0.03271808838339634, |
| "grad_norm": 0.20195341110229492, |
| "learning_rate": 0.0002, |
| "loss": 0.0654, |
| "step": 17990 |
| }, |
| { |
| "epoch": 0.03273627520295354, |
| "grad_norm": 0.03351292014122009, |
| "learning_rate": 0.0002, |
| "loss": 0.0194, |
| "step": 18000 |
| }, |
| { |
| "epoch": 0.032754462022510734, |
| "grad_norm": 0.2281372845172882, |
| "learning_rate": 0.0002, |
| "loss": 0.154, |
| "step": 18010 |
| }, |
| { |
| "epoch": 0.03277264884206793, |
| "grad_norm": 0.19263891875743866, |
| "learning_rate": 0.0002, |
| "loss": 0.0803, |
| "step": 18020 |
| }, |
| { |
| "epoch": 0.03279083566162513, |
| "grad_norm": 0.04183288663625717, |
| "learning_rate": 0.0002, |
| "loss": 0.0842, |
| "step": 18030 |
| }, |
| { |
| "epoch": 0.03280902248118232, |
| "grad_norm": 0.284759521484375, |
| "learning_rate": 0.0002, |
| "loss": 0.067, |
| "step": 18040 |
| }, |
| { |
| "epoch": 0.03282720930073952, |
| "grad_norm": 0.02972390688955784, |
| "learning_rate": 0.0002, |
| "loss": 0.016, |
| "step": 18050 |
| }, |
| { |
| "epoch": 0.032845396120296716, |
| "grad_norm": 0.28630614280700684, |
| "learning_rate": 0.0002, |
| "loss": 0.1866, |
| "step": 18060 |
| }, |
| { |
| "epoch": 0.03286358293985391, |
| "grad_norm": 0.16426514089107513, |
| "learning_rate": 0.0002, |
| "loss": 0.0812, |
| "step": 18070 |
| }, |
| { |
| "epoch": 0.03288176975941111, |
| "grad_norm": 0.05643441155552864, |
| "learning_rate": 0.0002, |
| "loss": 0.0773, |
| "step": 18080 |
| }, |
| { |
| "epoch": 0.032899956578968305, |
| "grad_norm": 0.19082742929458618, |
| "learning_rate": 0.0002, |
| "loss": 0.0582, |
| "step": 18090 |
| }, |
| { |
| "epoch": 0.0329181433985255, |
| "grad_norm": 0.017512233927845955, |
| "learning_rate": 0.0002, |
| "loss": 0.0174, |
| "step": 18100 |
| }, |
| { |
| "epoch": 0.0329363302180827, |
| "grad_norm": 0.22619640827178955, |
| "learning_rate": 0.0002, |
| "loss": 0.166, |
| "step": 18110 |
| }, |
| { |
| "epoch": 0.032954517037639894, |
| "grad_norm": 0.10430974513292313, |
| "learning_rate": 0.0002, |
| "loss": 0.0716, |
| "step": 18120 |
| }, |
| { |
| "epoch": 0.03297270385719709, |
| "grad_norm": 0.07371710985898972, |
| "learning_rate": 0.0002, |
| "loss": 0.0733, |
| "step": 18130 |
| }, |
| { |
| "epoch": 0.032990890676754286, |
| "grad_norm": 0.19163483381271362, |
| "learning_rate": 0.0002, |
| "loss": 0.0609, |
| "step": 18140 |
| }, |
| { |
| "epoch": 0.03300907749631148, |
| "grad_norm": 0.03743975609540939, |
| "learning_rate": 0.0002, |
| "loss": 0.017, |
| "step": 18150 |
| }, |
| { |
| "epoch": 0.03302726431586868, |
| "grad_norm": 0.19496546685695648, |
| "learning_rate": 0.0002, |
| "loss": 0.1622, |
| "step": 18160 |
| }, |
| { |
| "epoch": 0.033045451135425875, |
| "grad_norm": 0.13054883480072021, |
| "learning_rate": 0.0002, |
| "loss": 0.0728, |
| "step": 18170 |
| }, |
| { |
| "epoch": 0.03306363795498307, |
| "grad_norm": 0.10058756172657013, |
| "learning_rate": 0.0002, |
| "loss": 0.0738, |
| "step": 18180 |
| }, |
| { |
| "epoch": 0.03308182477454027, |
| "grad_norm": 0.220932736992836, |
| "learning_rate": 0.0002, |
| "loss": 0.063, |
| "step": 18190 |
| }, |
| { |
| "epoch": 0.033100011594097464, |
| "grad_norm": 0.04396356642246246, |
| "learning_rate": 0.0002, |
| "loss": 0.0207, |
| "step": 18200 |
| }, |
| { |
| "epoch": 0.03311819841365467, |
| "grad_norm": 0.23554326593875885, |
| "learning_rate": 0.0002, |
| "loss": 0.1484, |
| "step": 18210 |
| }, |
| { |
| "epoch": 0.033136385233211864, |
| "grad_norm": 0.11277181655168533, |
| "learning_rate": 0.0002, |
| "loss": 0.0763, |
| "step": 18220 |
| }, |
| { |
| "epoch": 0.03315457205276906, |
| "grad_norm": 0.05176365375518799, |
| "learning_rate": 0.0002, |
| "loss": 0.076, |
| "step": 18230 |
| }, |
| { |
| "epoch": 0.033172758872326256, |
| "grad_norm": 0.1521395444869995, |
| "learning_rate": 0.0002, |
| "loss": 0.0605, |
| "step": 18240 |
| }, |
| { |
| "epoch": 0.03319094569188345, |
| "grad_norm": 0.04682580381631851, |
| "learning_rate": 0.0002, |
| "loss": 0.0149, |
| "step": 18250 |
| }, |
| { |
| "epoch": 0.03320913251144065, |
| "grad_norm": 0.16890883445739746, |
| "learning_rate": 0.0002, |
| "loss": 0.1402, |
| "step": 18260 |
| }, |
| { |
| "epoch": 0.033227319330997845, |
| "grad_norm": 0.17221559584140778, |
| "learning_rate": 0.0002, |
| "loss": 0.0819, |
| "step": 18270 |
| }, |
| { |
| "epoch": 0.03324550615055504, |
| "grad_norm": 0.07434559613466263, |
| "learning_rate": 0.0002, |
| "loss": 0.0784, |
| "step": 18280 |
| }, |
| { |
| "epoch": 0.03326369297011224, |
| "grad_norm": 0.1912834346294403, |
| "learning_rate": 0.0002, |
| "loss": 0.0614, |
| "step": 18290 |
| }, |
| { |
| "epoch": 0.033281879789669434, |
| "grad_norm": 0.04286884889006615, |
| "learning_rate": 0.0002, |
| "loss": 0.0185, |
| "step": 18300 |
| }, |
| { |
| "epoch": 0.03330006660922663, |
| "grad_norm": 0.29059842228889465, |
| "learning_rate": 0.0002, |
| "loss": 0.1357, |
| "step": 18310 |
| }, |
| { |
| "epoch": 0.03331825342878383, |
| "grad_norm": 0.2289486825466156, |
| "learning_rate": 0.0002, |
| "loss": 0.0865, |
| "step": 18320 |
| }, |
| { |
| "epoch": 0.03333644024834102, |
| "grad_norm": 0.027094636112451553, |
| "learning_rate": 0.0002, |
| "loss": 0.0841, |
| "step": 18330 |
| }, |
| { |
| "epoch": 0.03335462706789822, |
| "grad_norm": 0.21263600885868073, |
| "learning_rate": 0.0002, |
| "loss": 0.0628, |
| "step": 18340 |
| }, |
| { |
| "epoch": 0.033372813887455416, |
| "grad_norm": 0.03497980535030365, |
| "learning_rate": 0.0002, |
| "loss": 0.0158, |
| "step": 18350 |
| }, |
| { |
| "epoch": 0.03339100070701261, |
| "grad_norm": 0.20155973732471466, |
| "learning_rate": 0.0002, |
| "loss": 0.1523, |
| "step": 18360 |
| }, |
| { |
| "epoch": 0.03340918752656981, |
| "grad_norm": 0.03746286779642105, |
| "learning_rate": 0.0002, |
| "loss": 0.0781, |
| "step": 18370 |
| }, |
| { |
| "epoch": 0.033427374346127005, |
| "grad_norm": 0.06747066229581833, |
| "learning_rate": 0.0002, |
| "loss": 0.0792, |
| "step": 18380 |
| }, |
| { |
| "epoch": 0.0334455611656842, |
| "grad_norm": 0.23699060082435608, |
| "learning_rate": 0.0002, |
| "loss": 0.0651, |
| "step": 18390 |
| }, |
| { |
| "epoch": 0.0334637479852414, |
| "grad_norm": 0.047832150012254715, |
| "learning_rate": 0.0002, |
| "loss": 0.0181, |
| "step": 18400 |
| }, |
| { |
| "epoch": 0.033481934804798594, |
| "grad_norm": 0.3178698420524597, |
| "learning_rate": 0.0002, |
| "loss": 0.1537, |
| "step": 18410 |
| }, |
| { |
| "epoch": 0.03350012162435579, |
| "grad_norm": 0.16258081793785095, |
| "learning_rate": 0.0002, |
| "loss": 0.0722, |
| "step": 18420 |
| }, |
| { |
| "epoch": 0.033518308443912986, |
| "grad_norm": 0.02807716652750969, |
| "learning_rate": 0.0002, |
| "loss": 0.0844, |
| "step": 18430 |
| }, |
| { |
| "epoch": 0.03353649526347018, |
| "grad_norm": 0.16596710681915283, |
| "learning_rate": 0.0002, |
| "loss": 0.0607, |
| "step": 18440 |
| }, |
| { |
| "epoch": 0.03355468208302738, |
| "grad_norm": 0.04448723793029785, |
| "learning_rate": 0.0002, |
| "loss": 0.0183, |
| "step": 18450 |
| }, |
| { |
| "epoch": 0.033572868902584575, |
| "grad_norm": 0.39318934082984924, |
| "learning_rate": 0.0002, |
| "loss": 0.1497, |
| "step": 18460 |
| }, |
| { |
| "epoch": 0.03359105572214177, |
| "grad_norm": 0.17387263476848602, |
| "learning_rate": 0.0002, |
| "loss": 0.0787, |
| "step": 18470 |
| }, |
| { |
| "epoch": 0.03360924254169897, |
| "grad_norm": 0.14859163761138916, |
| "learning_rate": 0.0002, |
| "loss": 0.0837, |
| "step": 18480 |
| }, |
| { |
| "epoch": 0.033627429361256164, |
| "grad_norm": 0.24148601293563843, |
| "learning_rate": 0.0002, |
| "loss": 0.0655, |
| "step": 18490 |
| }, |
| { |
| "epoch": 0.03364561618081336, |
| "grad_norm": 0.04743284359574318, |
| "learning_rate": 0.0002, |
| "loss": 0.0174, |
| "step": 18500 |
| }, |
| { |
| "epoch": 0.03366380300037056, |
| "grad_norm": 0.25396591424942017, |
| "learning_rate": 0.0002, |
| "loss": 0.1438, |
| "step": 18510 |
| }, |
| { |
| "epoch": 0.03368198981992775, |
| "grad_norm": 0.1759178638458252, |
| "learning_rate": 0.0002, |
| "loss": 0.0758, |
| "step": 18520 |
| }, |
| { |
| "epoch": 0.03370017663948495, |
| "grad_norm": 0.06611669808626175, |
| "learning_rate": 0.0002, |
| "loss": 0.0787, |
| "step": 18530 |
| }, |
| { |
| "epoch": 0.033718363459042146, |
| "grad_norm": 0.22699445486068726, |
| "learning_rate": 0.0002, |
| "loss": 0.0697, |
| "step": 18540 |
| }, |
| { |
| "epoch": 0.03373655027859934, |
| "grad_norm": 0.02634899877011776, |
| "learning_rate": 0.0002, |
| "loss": 0.0189, |
| "step": 18550 |
| }, |
| { |
| "epoch": 0.03375473709815654, |
| "grad_norm": 0.3238360285758972, |
| "learning_rate": 0.0002, |
| "loss": 0.1496, |
| "step": 18560 |
| }, |
| { |
| "epoch": 0.033772923917713735, |
| "grad_norm": 0.16044601798057556, |
| "learning_rate": 0.0002, |
| "loss": 0.076, |
| "step": 18570 |
| }, |
| { |
| "epoch": 0.03379111073727093, |
| "grad_norm": 0.029841836541891098, |
| "learning_rate": 0.0002, |
| "loss": 0.0718, |
| "step": 18580 |
| }, |
| { |
| "epoch": 0.03380929755682813, |
| "grad_norm": 0.21851007640361786, |
| "learning_rate": 0.0002, |
| "loss": 0.0656, |
| "step": 18590 |
| }, |
| { |
| "epoch": 0.033827484376385324, |
| "grad_norm": 0.02096417360007763, |
| "learning_rate": 0.0002, |
| "loss": 0.0173, |
| "step": 18600 |
| }, |
| { |
| "epoch": 0.03384567119594252, |
| "grad_norm": 0.29625844955444336, |
| "learning_rate": 0.0002, |
| "loss": 0.1716, |
| "step": 18610 |
| }, |
| { |
| "epoch": 0.033863858015499716, |
| "grad_norm": 0.1510130614042282, |
| "learning_rate": 0.0002, |
| "loss": 0.0792, |
| "step": 18620 |
| }, |
| { |
| "epoch": 0.03388204483505691, |
| "grad_norm": 0.04192917421460152, |
| "learning_rate": 0.0002, |
| "loss": 0.0717, |
| "step": 18630 |
| }, |
| { |
| "epoch": 0.03390023165461411, |
| "grad_norm": 0.23139427602291107, |
| "learning_rate": 0.0002, |
| "loss": 0.0609, |
| "step": 18640 |
| }, |
| { |
| "epoch": 0.033918418474171305, |
| "grad_norm": 0.03887970373034477, |
| "learning_rate": 0.0002, |
| "loss": 0.0127, |
| "step": 18650 |
| }, |
| { |
| "epoch": 0.0339366052937285, |
| "grad_norm": 0.1315147578716278, |
| "learning_rate": 0.0002, |
| "loss": 0.1434, |
| "step": 18660 |
| }, |
| { |
| "epoch": 0.0339547921132857, |
| "grad_norm": 0.13328243792057037, |
| "learning_rate": 0.0002, |
| "loss": 0.0673, |
| "step": 18670 |
| }, |
| { |
| "epoch": 0.033972978932842894, |
| "grad_norm": 0.07161080092191696, |
| "learning_rate": 0.0002, |
| "loss": 0.0692, |
| "step": 18680 |
| }, |
| { |
| "epoch": 0.03399116575240009, |
| "grad_norm": 0.16019296646118164, |
| "learning_rate": 0.0002, |
| "loss": 0.0641, |
| "step": 18690 |
| }, |
| { |
| "epoch": 0.03400935257195729, |
| "grad_norm": 0.042882539331912994, |
| "learning_rate": 0.0002, |
| "loss": 0.0196, |
| "step": 18700 |
| }, |
| { |
| "epoch": 0.03402753939151448, |
| "grad_norm": 0.15019817650318146, |
| "learning_rate": 0.0002, |
| "loss": 0.1239, |
| "step": 18710 |
| }, |
| { |
| "epoch": 0.03404572621107168, |
| "grad_norm": 0.140267476439476, |
| "learning_rate": 0.0002, |
| "loss": 0.0715, |
| "step": 18720 |
| }, |
| { |
| "epoch": 0.034063913030628876, |
| "grad_norm": 0.060760073363780975, |
| "learning_rate": 0.0002, |
| "loss": 0.079, |
| "step": 18730 |
| }, |
| { |
| "epoch": 0.03408209985018607, |
| "grad_norm": 0.1783122718334198, |
| "learning_rate": 0.0002, |
| "loss": 0.0616, |
| "step": 18740 |
| }, |
| { |
| "epoch": 0.03410028666974327, |
| "grad_norm": 0.023139121010899544, |
| "learning_rate": 0.0002, |
| "loss": 0.0171, |
| "step": 18750 |
| }, |
| { |
| "epoch": 0.034118473489300465, |
| "grad_norm": 0.2645978331565857, |
| "learning_rate": 0.0002, |
| "loss": 0.1355, |
| "step": 18760 |
| }, |
| { |
| "epoch": 0.03413666030885766, |
| "grad_norm": 0.21009914577007294, |
| "learning_rate": 0.0002, |
| "loss": 0.0757, |
| "step": 18770 |
| }, |
| { |
| "epoch": 0.03415484712841486, |
| "grad_norm": 0.13494494557380676, |
| "learning_rate": 0.0002, |
| "loss": 0.0774, |
| "step": 18780 |
| }, |
| { |
| "epoch": 0.034173033947972054, |
| "grad_norm": 0.19806784391403198, |
| "learning_rate": 0.0002, |
| "loss": 0.0636, |
| "step": 18790 |
| }, |
| { |
| "epoch": 0.03419122076752925, |
| "grad_norm": 0.020482519641518593, |
| "learning_rate": 0.0002, |
| "loss": 0.0194, |
| "step": 18800 |
| }, |
| { |
| "epoch": 0.034209407587086446, |
| "grad_norm": 0.34826937317848206, |
| "learning_rate": 0.0002, |
| "loss": 0.1521, |
| "step": 18810 |
| }, |
| { |
| "epoch": 0.03422759440664364, |
| "grad_norm": 0.1293957680463791, |
| "learning_rate": 0.0002, |
| "loss": 0.0742, |
| "step": 18820 |
| }, |
| { |
| "epoch": 0.03424578122620084, |
| "grad_norm": 0.06574539095163345, |
| "learning_rate": 0.0002, |
| "loss": 0.0792, |
| "step": 18830 |
| }, |
| { |
| "epoch": 0.034263968045758035, |
| "grad_norm": 0.2005399614572525, |
| "learning_rate": 0.0002, |
| "loss": 0.0618, |
| "step": 18840 |
| }, |
| { |
| "epoch": 0.03428215486531523, |
| "grad_norm": 0.04699913039803505, |
| "learning_rate": 0.0002, |
| "loss": 0.0176, |
| "step": 18850 |
| }, |
| { |
| "epoch": 0.03430034168487243, |
| "grad_norm": 0.2593109905719757, |
| "learning_rate": 0.0002, |
| "loss": 0.1709, |
| "step": 18860 |
| }, |
| { |
| "epoch": 0.034318528504429624, |
| "grad_norm": 0.587365448474884, |
| "learning_rate": 0.0002, |
| "loss": 0.0794, |
| "step": 18870 |
| }, |
| { |
| "epoch": 0.03433671532398682, |
| "grad_norm": 0.0371614433825016, |
| "learning_rate": 0.0002, |
| "loss": 0.076, |
| "step": 18880 |
| }, |
| { |
| "epoch": 0.03435490214354402, |
| "grad_norm": 0.2164178341627121, |
| "learning_rate": 0.0002, |
| "loss": 0.0577, |
| "step": 18890 |
| }, |
| { |
| "epoch": 0.03437308896310122, |
| "grad_norm": 0.028071587905287743, |
| "learning_rate": 0.0002, |
| "loss": 0.0184, |
| "step": 18900 |
| }, |
| { |
| "epoch": 0.034391275782658416, |
| "grad_norm": 0.25464126467704773, |
| "learning_rate": 0.0002, |
| "loss": 0.1616, |
| "step": 18910 |
| }, |
| { |
| "epoch": 0.03440946260221561, |
| "grad_norm": 0.2830415368080139, |
| "learning_rate": 0.0002, |
| "loss": 0.0795, |
| "step": 18920 |
| }, |
| { |
| "epoch": 0.03442764942177281, |
| "grad_norm": 0.07880273461341858, |
| "learning_rate": 0.0002, |
| "loss": 0.0717, |
| "step": 18930 |
| }, |
| { |
| "epoch": 0.034445836241330005, |
| "grad_norm": 0.19671671092510223, |
| "learning_rate": 0.0002, |
| "loss": 0.0625, |
| "step": 18940 |
| }, |
| { |
| "epoch": 0.0344640230608872, |
| "grad_norm": 0.038350027054548264, |
| "learning_rate": 0.0002, |
| "loss": 0.0172, |
| "step": 18950 |
| }, |
| { |
| "epoch": 0.0344822098804444, |
| "grad_norm": 0.196768656373024, |
| "learning_rate": 0.0002, |
| "loss": 0.1586, |
| "step": 18960 |
| }, |
| { |
| "epoch": 0.034500396700001594, |
| "grad_norm": 0.1861678808927536, |
| "learning_rate": 0.0002, |
| "loss": 0.0871, |
| "step": 18970 |
| }, |
| { |
| "epoch": 0.03451858351955879, |
| "grad_norm": 0.1074979305267334, |
| "learning_rate": 0.0002, |
| "loss": 0.0697, |
| "step": 18980 |
| }, |
| { |
| "epoch": 0.03453677033911599, |
| "grad_norm": 0.18214645981788635, |
| "learning_rate": 0.0002, |
| "loss": 0.0594, |
| "step": 18990 |
| }, |
| { |
| "epoch": 0.03455495715867318, |
| "grad_norm": 0.035948049277067184, |
| "learning_rate": 0.0002, |
| "loss": 0.0177, |
| "step": 19000 |
| }, |
| { |
| "epoch": 0.03457314397823038, |
| "grad_norm": 0.2434094399213791, |
| "learning_rate": 0.0002, |
| "loss": 0.1402, |
| "step": 19010 |
| }, |
| { |
| "epoch": 0.034591330797787576, |
| "grad_norm": 0.06897670775651932, |
| "learning_rate": 0.0002, |
| "loss": 0.0758, |
| "step": 19020 |
| }, |
| { |
| "epoch": 0.03460951761734477, |
| "grad_norm": 0.13107649981975555, |
| "learning_rate": 0.0002, |
| "loss": 0.0826, |
| "step": 19030 |
| }, |
| { |
| "epoch": 0.03462770443690197, |
| "grad_norm": 0.1787865310907364, |
| "learning_rate": 0.0002, |
| "loss": 0.0619, |
| "step": 19040 |
| }, |
| { |
| "epoch": 0.034645891256459165, |
| "grad_norm": 0.0460963137447834, |
| "learning_rate": 0.0002, |
| "loss": 0.0203, |
| "step": 19050 |
| }, |
| { |
| "epoch": 0.03466407807601636, |
| "grad_norm": 0.20582084357738495, |
| "learning_rate": 0.0002, |
| "loss": 0.1325, |
| "step": 19060 |
| }, |
| { |
| "epoch": 0.03468226489557356, |
| "grad_norm": 0.16120313107967377, |
| "learning_rate": 0.0002, |
| "loss": 0.08, |
| "step": 19070 |
| }, |
| { |
| "epoch": 0.03470045171513075, |
| "grad_norm": 0.04322347044944763, |
| "learning_rate": 0.0002, |
| "loss": 0.0753, |
| "step": 19080 |
| }, |
| { |
| "epoch": 0.03471863853468795, |
| "grad_norm": 0.1764109879732132, |
| "learning_rate": 0.0002, |
| "loss": 0.0618, |
| "step": 19090 |
| }, |
| { |
| "epoch": 0.034736825354245146, |
| "grad_norm": 0.04453815147280693, |
| "learning_rate": 0.0002, |
| "loss": 0.0172, |
| "step": 19100 |
| }, |
| { |
| "epoch": 0.03475501217380234, |
| "grad_norm": 0.32023972272872925, |
| "learning_rate": 0.0002, |
| "loss": 0.1394, |
| "step": 19110 |
| }, |
| { |
| "epoch": 0.03477319899335954, |
| "grad_norm": 0.09920009225606918, |
| "learning_rate": 0.0002, |
| "loss": 0.0788, |
| "step": 19120 |
| }, |
| { |
| "epoch": 0.034791385812916735, |
| "grad_norm": 0.047868456691503525, |
| "learning_rate": 0.0002, |
| "loss": 0.0745, |
| "step": 19130 |
| }, |
| { |
| "epoch": 0.03480957263247393, |
| "grad_norm": 0.219430074095726, |
| "learning_rate": 0.0002, |
| "loss": 0.063, |
| "step": 19140 |
| }, |
| { |
| "epoch": 0.03482775945203113, |
| "grad_norm": 0.04879681020975113, |
| "learning_rate": 0.0002, |
| "loss": 0.0161, |
| "step": 19150 |
| }, |
| { |
| "epoch": 0.034845946271588324, |
| "grad_norm": 0.21360138058662415, |
| "learning_rate": 0.0002, |
| "loss": 0.1602, |
| "step": 19160 |
| }, |
| { |
| "epoch": 0.03486413309114552, |
| "grad_norm": 0.1391269713640213, |
| "learning_rate": 0.0002, |
| "loss": 0.0798, |
| "step": 19170 |
| }, |
| { |
| "epoch": 0.03488231991070272, |
| "grad_norm": 0.06293737888336182, |
| "learning_rate": 0.0002, |
| "loss": 0.0717, |
| "step": 19180 |
| }, |
| { |
| "epoch": 0.03490050673025991, |
| "grad_norm": 0.20241963863372803, |
| "learning_rate": 0.0002, |
| "loss": 0.0612, |
| "step": 19190 |
| }, |
| { |
| "epoch": 0.03491869354981711, |
| "grad_norm": 0.06246611103415489, |
| "learning_rate": 0.0002, |
| "loss": 0.0148, |
| "step": 19200 |
| }, |
| { |
| "epoch": 0.034936880369374305, |
| "grad_norm": 0.16479995846748352, |
| "learning_rate": 0.0002, |
| "loss": 0.1611, |
| "step": 19210 |
| }, |
| { |
| "epoch": 0.0349550671889315, |
| "grad_norm": 0.12036983668804169, |
| "learning_rate": 0.0002, |
| "loss": 0.0724, |
| "step": 19220 |
| }, |
| { |
| "epoch": 0.0349732540084887, |
| "grad_norm": 0.03939517214894295, |
| "learning_rate": 0.0002, |
| "loss": 0.0758, |
| "step": 19230 |
| }, |
| { |
| "epoch": 0.034991440828045894, |
| "grad_norm": 0.17047277092933655, |
| "learning_rate": 0.0002, |
| "loss": 0.066, |
| "step": 19240 |
| }, |
| { |
| "epoch": 0.03500962764760309, |
| "grad_norm": 0.031782686710357666, |
| "learning_rate": 0.0002, |
| "loss": 0.0203, |
| "step": 19250 |
| }, |
| { |
| "epoch": 0.03502781446716029, |
| "grad_norm": 0.2545730471611023, |
| "learning_rate": 0.0002, |
| "loss": 0.1716, |
| "step": 19260 |
| }, |
| { |
| "epoch": 0.03504600128671748, |
| "grad_norm": 0.11225811392068863, |
| "learning_rate": 0.0002, |
| "loss": 0.0791, |
| "step": 19270 |
| }, |
| { |
| "epoch": 0.03506418810627468, |
| "grad_norm": 0.049140989780426025, |
| "learning_rate": 0.0002, |
| "loss": 0.0784, |
| "step": 19280 |
| }, |
| { |
| "epoch": 0.035082374925831876, |
| "grad_norm": 0.16942913830280304, |
| "learning_rate": 0.0002, |
| "loss": 0.0638, |
| "step": 19290 |
| }, |
| { |
| "epoch": 0.03510056174538907, |
| "grad_norm": 0.03836115077137947, |
| "learning_rate": 0.0002, |
| "loss": 0.0193, |
| "step": 19300 |
| }, |
| { |
| "epoch": 0.03511874856494627, |
| "grad_norm": 0.13004787266254425, |
| "learning_rate": 0.0002, |
| "loss": 0.1477, |
| "step": 19310 |
| }, |
| { |
| "epoch": 0.035136935384503465, |
| "grad_norm": 0.2054329216480255, |
| "learning_rate": 0.0002, |
| "loss": 0.0792, |
| "step": 19320 |
| }, |
| { |
| "epoch": 0.03515512220406066, |
| "grad_norm": 0.06592074781656265, |
| "learning_rate": 0.0002, |
| "loss": 0.0784, |
| "step": 19330 |
| }, |
| { |
| "epoch": 0.03517330902361786, |
| "grad_norm": 0.19228027760982513, |
| "learning_rate": 0.0002, |
| "loss": 0.067, |
| "step": 19340 |
| }, |
| { |
| "epoch": 0.035191495843175054, |
| "grad_norm": 0.04050719738006592, |
| "learning_rate": 0.0002, |
| "loss": 0.017, |
| "step": 19350 |
| }, |
| { |
| "epoch": 0.03520968266273225, |
| "grad_norm": 0.28715401887893677, |
| "learning_rate": 0.0002, |
| "loss": 0.1499, |
| "step": 19360 |
| }, |
| { |
| "epoch": 0.035227869482289446, |
| "grad_norm": 0.13954712450504303, |
| "learning_rate": 0.0002, |
| "loss": 0.0787, |
| "step": 19370 |
| }, |
| { |
| "epoch": 0.03524605630184664, |
| "grad_norm": 0.08851815015077591, |
| "learning_rate": 0.0002, |
| "loss": 0.0739, |
| "step": 19380 |
| }, |
| { |
| "epoch": 0.03526424312140384, |
| "grad_norm": 0.1788545697927475, |
| "learning_rate": 0.0002, |
| "loss": 0.0576, |
| "step": 19390 |
| }, |
| { |
| "epoch": 0.035282429940961035, |
| "grad_norm": 0.03644658252596855, |
| "learning_rate": 0.0002, |
| "loss": 0.0143, |
| "step": 19400 |
| }, |
| { |
| "epoch": 0.03530061676051823, |
| "grad_norm": 0.3140568137168884, |
| "learning_rate": 0.0002, |
| "loss": 0.1498, |
| "step": 19410 |
| }, |
| { |
| "epoch": 0.03531880358007543, |
| "grad_norm": 0.14550529420375824, |
| "learning_rate": 0.0002, |
| "loss": 0.0748, |
| "step": 19420 |
| }, |
| { |
| "epoch": 0.035336990399632624, |
| "grad_norm": 0.10995481163263321, |
| "learning_rate": 0.0002, |
| "loss": 0.0801, |
| "step": 19430 |
| }, |
| { |
| "epoch": 0.03535517721918982, |
| "grad_norm": 0.17238560318946838, |
| "learning_rate": 0.0002, |
| "loss": 0.0608, |
| "step": 19440 |
| }, |
| { |
| "epoch": 0.03537336403874702, |
| "grad_norm": 0.031363293528556824, |
| "learning_rate": 0.0002, |
| "loss": 0.0154, |
| "step": 19450 |
| }, |
| { |
| "epoch": 0.03539155085830421, |
| "grad_norm": 0.14145390689373016, |
| "learning_rate": 0.0002, |
| "loss": 0.1511, |
| "step": 19460 |
| }, |
| { |
| "epoch": 0.03540973767786141, |
| "grad_norm": 0.19073855876922607, |
| "learning_rate": 0.0002, |
| "loss": 0.0725, |
| "step": 19470 |
| }, |
| { |
| "epoch": 0.035427924497418606, |
| "grad_norm": 0.15639430284500122, |
| "learning_rate": 0.0002, |
| "loss": 0.0836, |
| "step": 19480 |
| }, |
| { |
| "epoch": 0.0354461113169758, |
| "grad_norm": 0.2566238045692444, |
| "learning_rate": 0.0002, |
| "loss": 0.0617, |
| "step": 19490 |
| }, |
| { |
| "epoch": 0.035464298136533, |
| "grad_norm": 0.055755820125341415, |
| "learning_rate": 0.0002, |
| "loss": 0.0178, |
| "step": 19500 |
| }, |
| { |
| "epoch": 0.035482484956090195, |
| "grad_norm": 0.2835562527179718, |
| "learning_rate": 0.0002, |
| "loss": 0.1306, |
| "step": 19510 |
| }, |
| { |
| "epoch": 0.03550067177564739, |
| "grad_norm": 0.2310812920331955, |
| "learning_rate": 0.0002, |
| "loss": 0.0766, |
| "step": 19520 |
| }, |
| { |
| "epoch": 0.03551885859520459, |
| "grad_norm": 0.1287071257829666, |
| "learning_rate": 0.0002, |
| "loss": 0.0791, |
| "step": 19530 |
| }, |
| { |
| "epoch": 0.035537045414761784, |
| "grad_norm": 0.21308869123458862, |
| "learning_rate": 0.0002, |
| "loss": 0.0584, |
| "step": 19540 |
| }, |
| { |
| "epoch": 0.03555523223431898, |
| "grad_norm": 0.0662735179066658, |
| "learning_rate": 0.0002, |
| "loss": 0.0207, |
| "step": 19550 |
| }, |
| { |
| "epoch": 0.035573419053876176, |
| "grad_norm": 0.21706523001194, |
| "learning_rate": 0.0002, |
| "loss": 0.1308, |
| "step": 19560 |
| }, |
| { |
| "epoch": 0.03559160587343337, |
| "grad_norm": 0.09376335144042969, |
| "learning_rate": 0.0002, |
| "loss": 0.0677, |
| "step": 19570 |
| }, |
| { |
| "epoch": 0.035609792692990576, |
| "grad_norm": 0.1093437597155571, |
| "learning_rate": 0.0002, |
| "loss": 0.0741, |
| "step": 19580 |
| }, |
| { |
| "epoch": 0.03562797951254777, |
| "grad_norm": 0.21057911217212677, |
| "learning_rate": 0.0002, |
| "loss": 0.0637, |
| "step": 19590 |
| }, |
| { |
| "epoch": 0.03564616633210497, |
| "grad_norm": 0.04383830726146698, |
| "learning_rate": 0.0002, |
| "loss": 0.019, |
| "step": 19600 |
| }, |
| { |
| "epoch": 0.035664353151662165, |
| "grad_norm": 0.3657427132129669, |
| "learning_rate": 0.0002, |
| "loss": 0.1421, |
| "step": 19610 |
| }, |
| { |
| "epoch": 0.03568253997121936, |
| "grad_norm": 0.17154265940189362, |
| "learning_rate": 0.0002, |
| "loss": 0.0779, |
| "step": 19620 |
| }, |
| { |
| "epoch": 0.03570072679077656, |
| "grad_norm": 0.041993435472249985, |
| "learning_rate": 0.0002, |
| "loss": 0.0768, |
| "step": 19630 |
| }, |
| { |
| "epoch": 0.035718913610333754, |
| "grad_norm": 0.1658252775669098, |
| "learning_rate": 0.0002, |
| "loss": 0.0602, |
| "step": 19640 |
| }, |
| { |
| "epoch": 0.03573710042989095, |
| "grad_norm": 0.028523078188300133, |
| "learning_rate": 0.0002, |
| "loss": 0.0151, |
| "step": 19650 |
| }, |
| { |
| "epoch": 0.035755287249448146, |
| "grad_norm": 0.2624453902244568, |
| "learning_rate": 0.0002, |
| "loss": 0.1355, |
| "step": 19660 |
| }, |
| { |
| "epoch": 0.03577347406900534, |
| "grad_norm": 0.12055794149637222, |
| "learning_rate": 0.0002, |
| "loss": 0.079, |
| "step": 19670 |
| }, |
| { |
| "epoch": 0.03579166088856254, |
| "grad_norm": 0.043441224843263626, |
| "learning_rate": 0.0002, |
| "loss": 0.0722, |
| "step": 19680 |
| }, |
| { |
| "epoch": 0.035809847708119735, |
| "grad_norm": 0.2464340627193451, |
| "learning_rate": 0.0002, |
| "loss": 0.0673, |
| "step": 19690 |
| }, |
| { |
| "epoch": 0.03582803452767693, |
| "grad_norm": 0.04004153981804848, |
| "learning_rate": 0.0002, |
| "loss": 0.0212, |
| "step": 19700 |
| }, |
| { |
| "epoch": 0.03584622134723413, |
| "grad_norm": 0.3159453570842743, |
| "learning_rate": 0.0002, |
| "loss": 0.1806, |
| "step": 19710 |
| }, |
| { |
| "epoch": 0.035864408166791324, |
| "grad_norm": 0.11327318102121353, |
| "learning_rate": 0.0002, |
| "loss": 0.0748, |
| "step": 19720 |
| }, |
| { |
| "epoch": 0.03588259498634852, |
| "grad_norm": 0.0980909988284111, |
| "learning_rate": 0.0002, |
| "loss": 0.0807, |
| "step": 19730 |
| }, |
| { |
| "epoch": 0.03590078180590572, |
| "grad_norm": 0.15508098900318146, |
| "learning_rate": 0.0002, |
| "loss": 0.0576, |
| "step": 19740 |
| }, |
| { |
| "epoch": 0.03591896862546291, |
| "grad_norm": 0.019624806940555573, |
| "learning_rate": 0.0002, |
| "loss": 0.0135, |
| "step": 19750 |
| }, |
| { |
| "epoch": 0.03593715544502011, |
| "grad_norm": 0.20336109399795532, |
| "learning_rate": 0.0002, |
| "loss": 0.1702, |
| "step": 19760 |
| }, |
| { |
| "epoch": 0.035955342264577306, |
| "grad_norm": 0.12767620384693146, |
| "learning_rate": 0.0002, |
| "loss": 0.0776, |
| "step": 19770 |
| }, |
| { |
| "epoch": 0.0359735290841345, |
| "grad_norm": 0.19050805270671844, |
| "learning_rate": 0.0002, |
| "loss": 0.0838, |
| "step": 19780 |
| }, |
| { |
| "epoch": 0.0359917159036917, |
| "grad_norm": 0.17471866309642792, |
| "learning_rate": 0.0002, |
| "loss": 0.0561, |
| "step": 19790 |
| }, |
| { |
| "epoch": 0.036009902723248895, |
| "grad_norm": 0.044348277151584625, |
| "learning_rate": 0.0002, |
| "loss": 0.0159, |
| "step": 19800 |
| }, |
| { |
| "epoch": 0.03602808954280609, |
| "grad_norm": 0.30847081542015076, |
| "learning_rate": 0.0002, |
| "loss": 0.1686, |
| "step": 19810 |
| }, |
| { |
| "epoch": 0.03604627636236329, |
| "grad_norm": 0.08963622897863388, |
| "learning_rate": 0.0002, |
| "loss": 0.078, |
| "step": 19820 |
| }, |
| { |
| "epoch": 0.036064463181920484, |
| "grad_norm": 0.0580587275326252, |
| "learning_rate": 0.0002, |
| "loss": 0.0741, |
| "step": 19830 |
| }, |
| { |
| "epoch": 0.03608265000147768, |
| "grad_norm": 0.1698184460401535, |
| "learning_rate": 0.0002, |
| "loss": 0.0631, |
| "step": 19840 |
| }, |
| { |
| "epoch": 0.036100836821034876, |
| "grad_norm": 0.025531867519021034, |
| "learning_rate": 0.0002, |
| "loss": 0.0166, |
| "step": 19850 |
| }, |
| { |
| "epoch": 0.03611902364059207, |
| "grad_norm": 0.3544731140136719, |
| "learning_rate": 0.0002, |
| "loss": 0.1886, |
| "step": 19860 |
| }, |
| { |
| "epoch": 0.03613721046014927, |
| "grad_norm": 0.2552841901779175, |
| "learning_rate": 0.0002, |
| "loss": 0.0859, |
| "step": 19870 |
| }, |
| { |
| "epoch": 0.036155397279706465, |
| "grad_norm": 0.07771942019462585, |
| "learning_rate": 0.0002, |
| "loss": 0.0859, |
| "step": 19880 |
| }, |
| { |
| "epoch": 0.03617358409926366, |
| "grad_norm": 0.15945585072040558, |
| "learning_rate": 0.0002, |
| "loss": 0.0609, |
| "step": 19890 |
| }, |
| { |
| "epoch": 0.03619177091882086, |
| "grad_norm": 0.04583865404129028, |
| "learning_rate": 0.0002, |
| "loss": 0.0196, |
| "step": 19900 |
| }, |
| { |
| "epoch": 0.036209957738378054, |
| "grad_norm": 0.2110920548439026, |
| "learning_rate": 0.0002, |
| "loss": 0.1305, |
| "step": 19910 |
| }, |
| { |
| "epoch": 0.03622814455793525, |
| "grad_norm": 0.22165755927562714, |
| "learning_rate": 0.0002, |
| "loss": 0.0767, |
| "step": 19920 |
| }, |
| { |
| "epoch": 0.03624633137749245, |
| "grad_norm": 0.0866742879152298, |
| "learning_rate": 0.0002, |
| "loss": 0.0785, |
| "step": 19930 |
| }, |
| { |
| "epoch": 0.03626451819704964, |
| "grad_norm": 0.19838224351406097, |
| "learning_rate": 0.0002, |
| "loss": 0.0663, |
| "step": 19940 |
| }, |
| { |
| "epoch": 0.03628270501660684, |
| "grad_norm": 0.05543521046638489, |
| "learning_rate": 0.0002, |
| "loss": 0.023, |
| "step": 19950 |
| }, |
| { |
| "epoch": 0.036300891836164036, |
| "grad_norm": 0.20800183713436127, |
| "learning_rate": 0.0002, |
| "loss": 0.1468, |
| "step": 19960 |
| }, |
| { |
| "epoch": 0.03631907865572123, |
| "grad_norm": 0.14951092004776, |
| "learning_rate": 0.0002, |
| "loss": 0.0698, |
| "step": 19970 |
| }, |
| { |
| "epoch": 0.03633726547527843, |
| "grad_norm": 0.10162603855133057, |
| "learning_rate": 0.0002, |
| "loss": 0.0841, |
| "step": 19980 |
| }, |
| { |
| "epoch": 0.036355452294835625, |
| "grad_norm": 0.24774019420146942, |
| "learning_rate": 0.0002, |
| "loss": 0.0658, |
| "step": 19990 |
| }, |
| { |
| "epoch": 0.03637363911439282, |
| "grad_norm": 0.02705777995288372, |
| "learning_rate": 0.0002, |
| "loss": 0.02, |
| "step": 20000 |
| }, |
| { |
| "epoch": 0.03639182593395002, |
| "grad_norm": 0.2509992718696594, |
| "learning_rate": 0.0002, |
| "loss": 0.1529, |
| "step": 20010 |
| }, |
| { |
| "epoch": 0.036410012753507214, |
| "grad_norm": 0.2126697599887848, |
| "learning_rate": 0.0002, |
| "loss": 0.0716, |
| "step": 20020 |
| }, |
| { |
| "epoch": 0.03642819957306441, |
| "grad_norm": 0.1463591754436493, |
| "learning_rate": 0.0002, |
| "loss": 0.076, |
| "step": 20030 |
| }, |
| { |
| "epoch": 0.036446386392621606, |
| "grad_norm": 0.21879518032073975, |
| "learning_rate": 0.0002, |
| "loss": 0.0677, |
| "step": 20040 |
| }, |
| { |
| "epoch": 0.0364645732121788, |
| "grad_norm": 0.028337355703115463, |
| "learning_rate": 0.0002, |
| "loss": 0.0131, |
| "step": 20050 |
| }, |
| { |
| "epoch": 0.036482760031736, |
| "grad_norm": 0.335788756608963, |
| "learning_rate": 0.0002, |
| "loss": 0.1693, |
| "step": 20060 |
| }, |
| { |
| "epoch": 0.036500946851293195, |
| "grad_norm": 0.17615728080272675, |
| "learning_rate": 0.0002, |
| "loss": 0.0791, |
| "step": 20070 |
| }, |
| { |
| "epoch": 0.03651913367085039, |
| "grad_norm": 0.034229181706905365, |
| "learning_rate": 0.0002, |
| "loss": 0.0774, |
| "step": 20080 |
| }, |
| { |
| "epoch": 0.03653732049040759, |
| "grad_norm": 0.20637790858745575, |
| "learning_rate": 0.0002, |
| "loss": 0.0544, |
| "step": 20090 |
| }, |
| { |
| "epoch": 0.036555507309964784, |
| "grad_norm": 0.033659741282463074, |
| "learning_rate": 0.0002, |
| "loss": 0.0128, |
| "step": 20100 |
| }, |
| { |
| "epoch": 0.03657369412952198, |
| "grad_norm": 0.18249601125717163, |
| "learning_rate": 0.0002, |
| "loss": 0.1939, |
| "step": 20110 |
| }, |
| { |
| "epoch": 0.03659188094907918, |
| "grad_norm": 0.18065877258777618, |
| "learning_rate": 0.0002, |
| "loss": 0.0816, |
| "step": 20120 |
| }, |
| { |
| "epoch": 0.03661006776863637, |
| "grad_norm": 0.4361811876296997, |
| "learning_rate": 0.0002, |
| "loss": 0.0978, |
| "step": 20130 |
| }, |
| { |
| "epoch": 0.03662825458819357, |
| "grad_norm": 0.24488002061843872, |
| "learning_rate": 0.0002, |
| "loss": 0.0742, |
| "step": 20140 |
| }, |
| { |
| "epoch": 0.036646441407750766, |
| "grad_norm": 0.023062752559781075, |
| "learning_rate": 0.0002, |
| "loss": 0.0196, |
| "step": 20150 |
| }, |
| { |
| "epoch": 0.03666462822730796, |
| "grad_norm": 0.22796255350112915, |
| "learning_rate": 0.0002, |
| "loss": 0.1457, |
| "step": 20160 |
| }, |
| { |
| "epoch": 0.03668281504686516, |
| "grad_norm": 0.16665758192539215, |
| "learning_rate": 0.0002, |
| "loss": 0.138, |
| "step": 20170 |
| }, |
| { |
| "epoch": 0.036701001866422354, |
| "grad_norm": 0.0503946952521801, |
| "learning_rate": 0.0002, |
| "loss": 0.079, |
| "step": 20180 |
| }, |
| { |
| "epoch": 0.03671918868597955, |
| "grad_norm": 0.1672963798046112, |
| "learning_rate": 0.0002, |
| "loss": 0.0621, |
| "step": 20190 |
| }, |
| { |
| "epoch": 0.03673737550553675, |
| "grad_norm": 0.06765859574079514, |
| "learning_rate": 0.0002, |
| "loss": 0.0171, |
| "step": 20200 |
| }, |
| { |
| "epoch": 0.03675556232509394, |
| "grad_norm": 0.6076682806015015, |
| "learning_rate": 0.0002, |
| "loss": 0.6804, |
| "step": 20210 |
| }, |
| { |
| "epoch": 0.03677374914465114, |
| "grad_norm": 0.04764563590288162, |
| "learning_rate": 0.0002, |
| "loss": 0.0965, |
| "step": 20220 |
| }, |
| { |
| "epoch": 0.036791935964208336, |
| "grad_norm": 0.6847806572914124, |
| "learning_rate": 0.0002, |
| "loss": 0.0784, |
| "step": 20230 |
| }, |
| { |
| "epoch": 0.03681012278376553, |
| "grad_norm": 0.2678837478160858, |
| "learning_rate": 0.0002, |
| "loss": 0.069, |
| "step": 20240 |
| }, |
| { |
| "epoch": 0.03682830960332273, |
| "grad_norm": 0.039824239909648895, |
| "learning_rate": 0.0002, |
| "loss": 0.0206, |
| "step": 20250 |
| }, |
| { |
| "epoch": 0.036846496422879925, |
| "grad_norm": 0.19583609700202942, |
| "learning_rate": 0.0002, |
| "loss": 0.1588, |
| "step": 20260 |
| }, |
| { |
| "epoch": 0.03686468324243713, |
| "grad_norm": 0.08613055944442749, |
| "learning_rate": 0.0002, |
| "loss": 0.0777, |
| "step": 20270 |
| }, |
| { |
| "epoch": 0.036882870061994324, |
| "grad_norm": 0.028818165883421898, |
| "learning_rate": 0.0002, |
| "loss": 0.0704, |
| "step": 20280 |
| }, |
| { |
| "epoch": 0.03690105688155152, |
| "grad_norm": 0.19514115154743195, |
| "learning_rate": 0.0002, |
| "loss": 0.0654, |
| "step": 20290 |
| }, |
| { |
| "epoch": 0.03691924370110872, |
| "grad_norm": 0.043222617357969284, |
| "learning_rate": 0.0002, |
| "loss": 0.0216, |
| "step": 20300 |
| }, |
| { |
| "epoch": 0.03693743052066591, |
| "grad_norm": 0.2490546703338623, |
| "learning_rate": 0.0002, |
| "loss": 0.1472, |
| "step": 20310 |
| }, |
| { |
| "epoch": 0.03695561734022311, |
| "grad_norm": 0.16989269852638245, |
| "learning_rate": 0.0002, |
| "loss": 0.081, |
| "step": 20320 |
| }, |
| { |
| "epoch": 0.036973804159780306, |
| "grad_norm": 0.09191739559173584, |
| "learning_rate": 0.0002, |
| "loss": 0.0733, |
| "step": 20330 |
| }, |
| { |
| "epoch": 0.0369919909793375, |
| "grad_norm": 0.18435023725032806, |
| "learning_rate": 0.0002, |
| "loss": 0.0654, |
| "step": 20340 |
| }, |
| { |
| "epoch": 0.0370101777988947, |
| "grad_norm": 0.031144114211201668, |
| "learning_rate": 0.0002, |
| "loss": 0.0226, |
| "step": 20350 |
| }, |
| { |
| "epoch": 0.037028364618451895, |
| "grad_norm": 0.3244694769382477, |
| "learning_rate": 0.0002, |
| "loss": 0.1304, |
| "step": 20360 |
| }, |
| { |
| "epoch": 0.03704655143800909, |
| "grad_norm": 0.13787488639354706, |
| "learning_rate": 0.0002, |
| "loss": 0.0811, |
| "step": 20370 |
| }, |
| { |
| "epoch": 0.03706473825756629, |
| "grad_norm": 0.058523450046777725, |
| "learning_rate": 0.0002, |
| "loss": 0.0806, |
| "step": 20380 |
| }, |
| { |
| "epoch": 0.037082925077123484, |
| "grad_norm": 0.3001325726509094, |
| "learning_rate": 0.0002, |
| "loss": 0.0694, |
| "step": 20390 |
| }, |
| { |
| "epoch": 0.03710111189668068, |
| "grad_norm": 0.04447292909026146, |
| "learning_rate": 0.0002, |
| "loss": 0.0218, |
| "step": 20400 |
| }, |
| { |
| "epoch": 0.037119298716237877, |
| "grad_norm": 0.25786396861076355, |
| "learning_rate": 0.0002, |
| "loss": 0.1499, |
| "step": 20410 |
| }, |
| { |
| "epoch": 0.03713748553579507, |
| "grad_norm": 0.11381134390830994, |
| "learning_rate": 0.0002, |
| "loss": 0.0822, |
| "step": 20420 |
| }, |
| { |
| "epoch": 0.03715567235535227, |
| "grad_norm": 0.022713568061590195, |
| "learning_rate": 0.0002, |
| "loss": 0.0785, |
| "step": 20430 |
| }, |
| { |
| "epoch": 0.037173859174909465, |
| "grad_norm": 0.15770909190177917, |
| "learning_rate": 0.0002, |
| "loss": 0.0625, |
| "step": 20440 |
| }, |
| { |
| "epoch": 0.03719204599446666, |
| "grad_norm": 0.021412041038274765, |
| "learning_rate": 0.0002, |
| "loss": 0.0126, |
| "step": 20450 |
| }, |
| { |
| "epoch": 0.03721023281402386, |
| "grad_norm": 0.24260753393173218, |
| "learning_rate": 0.0002, |
| "loss": 0.1777, |
| "step": 20460 |
| }, |
| { |
| "epoch": 0.037228419633581054, |
| "grad_norm": 0.10953031480312347, |
| "learning_rate": 0.0002, |
| "loss": 0.073, |
| "step": 20470 |
| }, |
| { |
| "epoch": 0.03724660645313825, |
| "grad_norm": 0.03975062072277069, |
| "learning_rate": 0.0002, |
| "loss": 0.0907, |
| "step": 20480 |
| }, |
| { |
| "epoch": 0.03726479327269545, |
| "grad_norm": 0.2025018036365509, |
| "learning_rate": 0.0002, |
| "loss": 0.0631, |
| "step": 20490 |
| }, |
| { |
| "epoch": 0.03728298009225264, |
| "grad_norm": 0.031849734485149384, |
| "learning_rate": 0.0002, |
| "loss": 0.0156, |
| "step": 20500 |
| }, |
| { |
| "epoch": 0.03730116691180984, |
| "grad_norm": 0.2650098502635956, |
| "learning_rate": 0.0002, |
| "loss": 0.1569, |
| "step": 20510 |
| }, |
| { |
| "epoch": 0.037319353731367036, |
| "grad_norm": 0.14113937318325043, |
| "learning_rate": 0.0002, |
| "loss": 0.0824, |
| "step": 20520 |
| }, |
| { |
| "epoch": 0.03733754055092423, |
| "grad_norm": 0.10276420414447784, |
| "learning_rate": 0.0002, |
| "loss": 0.0797, |
| "step": 20530 |
| }, |
| { |
| "epoch": 0.03735572737048143, |
| "grad_norm": 0.2258286476135254, |
| "learning_rate": 0.0002, |
| "loss": 0.0671, |
| "step": 20540 |
| }, |
| { |
| "epoch": 0.037373914190038625, |
| "grad_norm": 0.10343242436647415, |
| "learning_rate": 0.0002, |
| "loss": 0.0178, |
| "step": 20550 |
| }, |
| { |
| "epoch": 0.03739210100959582, |
| "grad_norm": 0.19423982501029968, |
| "learning_rate": 0.0002, |
| "loss": 0.1423, |
| "step": 20560 |
| }, |
| { |
| "epoch": 0.03741028782915302, |
| "grad_norm": 0.12046124786138535, |
| "learning_rate": 0.0002, |
| "loss": 0.0827, |
| "step": 20570 |
| }, |
| { |
| "epoch": 0.037428474648710214, |
| "grad_norm": 0.026751041412353516, |
| "learning_rate": 0.0002, |
| "loss": 0.0743, |
| "step": 20580 |
| }, |
| { |
| "epoch": 0.03744666146826741, |
| "grad_norm": 0.23576834797859192, |
| "learning_rate": 0.0002, |
| "loss": 0.0629, |
| "step": 20590 |
| }, |
| { |
| "epoch": 0.037464848287824606, |
| "grad_norm": 0.05146399885416031, |
| "learning_rate": 0.0002, |
| "loss": 0.0205, |
| "step": 20600 |
| }, |
| { |
| "epoch": 0.0374830351073818, |
| "grad_norm": 0.21750135719776154, |
| "learning_rate": 0.0002, |
| "loss": 0.1397, |
| "step": 20610 |
| }, |
| { |
| "epoch": 0.037501221926939, |
| "grad_norm": 0.08351115882396698, |
| "learning_rate": 0.0002, |
| "loss": 0.0801, |
| "step": 20620 |
| }, |
| { |
| "epoch": 0.037519408746496195, |
| "grad_norm": 0.07272092998027802, |
| "learning_rate": 0.0002, |
| "loss": 0.0881, |
| "step": 20630 |
| }, |
| { |
| "epoch": 0.03753759556605339, |
| "grad_norm": 0.23707769811153412, |
| "learning_rate": 0.0002, |
| "loss": 0.0706, |
| "step": 20640 |
| }, |
| { |
| "epoch": 0.03755578238561059, |
| "grad_norm": 0.05208323150873184, |
| "learning_rate": 0.0002, |
| "loss": 0.024, |
| "step": 20650 |
| }, |
| { |
| "epoch": 0.037573969205167784, |
| "grad_norm": 0.4163022041320801, |
| "learning_rate": 0.0002, |
| "loss": 0.159, |
| "step": 20660 |
| }, |
| { |
| "epoch": 0.03759215602472498, |
| "grad_norm": 0.1036575511097908, |
| "learning_rate": 0.0002, |
| "loss": 0.0814, |
| "step": 20670 |
| }, |
| { |
| "epoch": 0.03761034284428218, |
| "grad_norm": 0.09861626476049423, |
| "learning_rate": 0.0002, |
| "loss": 0.0828, |
| "step": 20680 |
| }, |
| { |
| "epoch": 0.03762852966383937, |
| "grad_norm": 0.1685744971036911, |
| "learning_rate": 0.0002, |
| "loss": 0.0597, |
| "step": 20690 |
| }, |
| { |
| "epoch": 0.03764671648339657, |
| "grad_norm": 0.02716050110757351, |
| "learning_rate": 0.0002, |
| "loss": 0.0164, |
| "step": 20700 |
| }, |
| { |
| "epoch": 0.037664903302953766, |
| "grad_norm": 0.46858713030815125, |
| "learning_rate": 0.0002, |
| "loss": 0.1596, |
| "step": 20710 |
| }, |
| { |
| "epoch": 0.03768309012251096, |
| "grad_norm": 0.15260715782642365, |
| "learning_rate": 0.0002, |
| "loss": 0.0835, |
| "step": 20720 |
| }, |
| { |
| "epoch": 0.03770127694206816, |
| "grad_norm": 0.2063397914171219, |
| "learning_rate": 0.0002, |
| "loss": 0.0845, |
| "step": 20730 |
| }, |
| { |
| "epoch": 0.037719463761625355, |
| "grad_norm": 0.16447599232196808, |
| "learning_rate": 0.0002, |
| "loss": 0.0595, |
| "step": 20740 |
| }, |
| { |
| "epoch": 0.03773765058118255, |
| "grad_norm": 0.020755184814333916, |
| "learning_rate": 0.0002, |
| "loss": 0.0164, |
| "step": 20750 |
| }, |
| { |
| "epoch": 0.03775583740073975, |
| "grad_norm": 0.23675021529197693, |
| "learning_rate": 0.0002, |
| "loss": 0.1634, |
| "step": 20760 |
| }, |
| { |
| "epoch": 0.037774024220296944, |
| "grad_norm": 0.08625516295433044, |
| "learning_rate": 0.0002, |
| "loss": 0.0685, |
| "step": 20770 |
| }, |
| { |
| "epoch": 0.03779221103985414, |
| "grad_norm": 0.043796882033348083, |
| "learning_rate": 0.0002, |
| "loss": 0.0812, |
| "step": 20780 |
| }, |
| { |
| "epoch": 0.037810397859411336, |
| "grad_norm": 0.20600435137748718, |
| "learning_rate": 0.0002, |
| "loss": 0.0651, |
| "step": 20790 |
| }, |
| { |
| "epoch": 0.03782858467896853, |
| "grad_norm": 0.04963940382003784, |
| "learning_rate": 0.0002, |
| "loss": 0.0202, |
| "step": 20800 |
| }, |
| { |
| "epoch": 0.03784677149852573, |
| "grad_norm": 0.34920167922973633, |
| "learning_rate": 0.0002, |
| "loss": 0.1494, |
| "step": 20810 |
| }, |
| { |
| "epoch": 0.037864958318082925, |
| "grad_norm": 0.18662041425704956, |
| "learning_rate": 0.0002, |
| "loss": 0.0823, |
| "step": 20820 |
| }, |
| { |
| "epoch": 0.03788314513764012, |
| "grad_norm": 0.12615887820720673, |
| "learning_rate": 0.0002, |
| "loss": 0.0856, |
| "step": 20830 |
| }, |
| { |
| "epoch": 0.03790133195719732, |
| "grad_norm": 0.1857282668352127, |
| "learning_rate": 0.0002, |
| "loss": 0.0676, |
| "step": 20840 |
| }, |
| { |
| "epoch": 0.037919518776754514, |
| "grad_norm": 0.05569197237491608, |
| "learning_rate": 0.0002, |
| "loss": 0.0181, |
| "step": 20850 |
| }, |
| { |
| "epoch": 0.03793770559631171, |
| "grad_norm": 0.29011765122413635, |
| "learning_rate": 0.0002, |
| "loss": 0.1418, |
| "step": 20860 |
| }, |
| { |
| "epoch": 0.03795589241586891, |
| "grad_norm": 0.14119744300842285, |
| "learning_rate": 0.0002, |
| "loss": 0.0812, |
| "step": 20870 |
| }, |
| { |
| "epoch": 0.0379740792354261, |
| "grad_norm": 0.039884984493255615, |
| "learning_rate": 0.0002, |
| "loss": 0.0781, |
| "step": 20880 |
| }, |
| { |
| "epoch": 0.0379922660549833, |
| "grad_norm": 0.23705685138702393, |
| "learning_rate": 0.0002, |
| "loss": 0.0621, |
| "step": 20890 |
| }, |
| { |
| "epoch": 0.038010452874540496, |
| "grad_norm": 0.07462739199399948, |
| "learning_rate": 0.0002, |
| "loss": 0.022, |
| "step": 20900 |
| }, |
| { |
| "epoch": 0.03802863969409769, |
| "grad_norm": 0.2610052824020386, |
| "learning_rate": 0.0002, |
| "loss": 0.1517, |
| "step": 20910 |
| }, |
| { |
| "epoch": 0.03804682651365489, |
| "grad_norm": 0.12775090336799622, |
| "learning_rate": 0.0002, |
| "loss": 0.0758, |
| "step": 20920 |
| }, |
| { |
| "epoch": 0.038065013333212085, |
| "grad_norm": 0.03661905974149704, |
| "learning_rate": 0.0002, |
| "loss": 0.0738, |
| "step": 20930 |
| }, |
| { |
| "epoch": 0.03808320015276928, |
| "grad_norm": 0.20907218754291534, |
| "learning_rate": 0.0002, |
| "loss": 0.0627, |
| "step": 20940 |
| }, |
| { |
| "epoch": 0.03810138697232648, |
| "grad_norm": 0.022804679349064827, |
| "learning_rate": 0.0002, |
| "loss": 0.0205, |
| "step": 20950 |
| }, |
| { |
| "epoch": 0.03811957379188368, |
| "grad_norm": 0.258284330368042, |
| "learning_rate": 0.0002, |
| "loss": 0.1428, |
| "step": 20960 |
| }, |
| { |
| "epoch": 0.03813776061144088, |
| "grad_norm": 0.1477317065000534, |
| "learning_rate": 0.0002, |
| "loss": 0.0789, |
| "step": 20970 |
| }, |
| { |
| "epoch": 0.03815594743099807, |
| "grad_norm": 0.0610325001180172, |
| "learning_rate": 0.0002, |
| "loss": 0.0836, |
| "step": 20980 |
| }, |
| { |
| "epoch": 0.03817413425055527, |
| "grad_norm": 0.18825507164001465, |
| "learning_rate": 0.0002, |
| "loss": 0.0621, |
| "step": 20990 |
| }, |
| { |
| "epoch": 0.038192321070112466, |
| "grad_norm": 0.03943372145295143, |
| "learning_rate": 0.0002, |
| "loss": 0.0185, |
| "step": 21000 |
| }, |
| { |
| "epoch": 0.03821050788966966, |
| "grad_norm": 0.34519344568252563, |
| "learning_rate": 0.0002, |
| "loss": 0.1345, |
| "step": 21010 |
| }, |
| { |
| "epoch": 0.03822869470922686, |
| "grad_norm": 0.09635084867477417, |
| "learning_rate": 0.0002, |
| "loss": 0.0753, |
| "step": 21020 |
| }, |
| { |
| "epoch": 0.038246881528784055, |
| "grad_norm": 0.032520972192287445, |
| "learning_rate": 0.0002, |
| "loss": 0.082, |
| "step": 21030 |
| }, |
| { |
| "epoch": 0.03826506834834125, |
| "grad_norm": 0.18068930506706238, |
| "learning_rate": 0.0002, |
| "loss": 0.0609, |
| "step": 21040 |
| }, |
| { |
| "epoch": 0.03828325516789845, |
| "grad_norm": 0.05550973862409592, |
| "learning_rate": 0.0002, |
| "loss": 0.0241, |
| "step": 21050 |
| }, |
| { |
| "epoch": 0.038301441987455644, |
| "grad_norm": 0.19561107456684113, |
| "learning_rate": 0.0002, |
| "loss": 0.1337, |
| "step": 21060 |
| }, |
| { |
| "epoch": 0.03831962880701284, |
| "grad_norm": 0.1852179914712906, |
| "learning_rate": 0.0002, |
| "loss": 0.0724, |
| "step": 21070 |
| }, |
| { |
| "epoch": 0.038337815626570036, |
| "grad_norm": 0.11915116757154465, |
| "learning_rate": 0.0002, |
| "loss": 0.0836, |
| "step": 21080 |
| }, |
| { |
| "epoch": 0.03835600244612723, |
| "grad_norm": 0.21116836369037628, |
| "learning_rate": 0.0002, |
| "loss": 0.0628, |
| "step": 21090 |
| }, |
| { |
| "epoch": 0.03837418926568443, |
| "grad_norm": 0.042745884507894516, |
| "learning_rate": 0.0002, |
| "loss": 0.0214, |
| "step": 21100 |
| }, |
| { |
| "epoch": 0.038392376085241625, |
| "grad_norm": 0.43089792132377625, |
| "learning_rate": 0.0002, |
| "loss": 0.1351, |
| "step": 21110 |
| }, |
| { |
| "epoch": 0.03841056290479882, |
| "grad_norm": 0.09607810527086258, |
| "learning_rate": 0.0002, |
| "loss": 0.0778, |
| "step": 21120 |
| }, |
| { |
| "epoch": 0.03842874972435602, |
| "grad_norm": 0.13603460788726807, |
| "learning_rate": 0.0002, |
| "loss": 0.0787, |
| "step": 21130 |
| }, |
| { |
| "epoch": 0.038446936543913214, |
| "grad_norm": 0.20110103487968445, |
| "learning_rate": 0.0002, |
| "loss": 0.067, |
| "step": 21140 |
| }, |
| { |
| "epoch": 0.03846512336347041, |
| "grad_norm": 0.042503997683525085, |
| "learning_rate": 0.0002, |
| "loss": 0.0194, |
| "step": 21150 |
| }, |
| { |
| "epoch": 0.03848331018302761, |
| "grad_norm": 0.2605084478855133, |
| "learning_rate": 0.0002, |
| "loss": 0.1374, |
| "step": 21160 |
| }, |
| { |
| "epoch": 0.0385014970025848, |
| "grad_norm": 0.09476794302463531, |
| "learning_rate": 0.0002, |
| "loss": 0.078, |
| "step": 21170 |
| }, |
| { |
| "epoch": 0.038519683822142, |
| "grad_norm": 0.03458428382873535, |
| "learning_rate": 0.0002, |
| "loss": 0.08, |
| "step": 21180 |
| }, |
| { |
| "epoch": 0.038537870641699196, |
| "grad_norm": 0.31196194887161255, |
| "learning_rate": 0.0002, |
| "loss": 0.0664, |
| "step": 21190 |
| }, |
| { |
| "epoch": 0.03855605746125639, |
| "grad_norm": 0.037113118916749954, |
| "learning_rate": 0.0002, |
| "loss": 0.0221, |
| "step": 21200 |
| }, |
| { |
| "epoch": 0.03857424428081359, |
| "grad_norm": 0.3699415922164917, |
| "learning_rate": 0.0002, |
| "loss": 0.1534, |
| "step": 21210 |
| }, |
| { |
| "epoch": 0.038592431100370785, |
| "grad_norm": 0.06454256922006607, |
| "learning_rate": 0.0002, |
| "loss": 0.0762, |
| "step": 21220 |
| }, |
| { |
| "epoch": 0.03861061791992798, |
| "grad_norm": 0.09858033806085587, |
| "learning_rate": 0.0002, |
| "loss": 0.0785, |
| "step": 21230 |
| }, |
| { |
| "epoch": 0.03862880473948518, |
| "grad_norm": 0.1482791304588318, |
| "learning_rate": 0.0002, |
| "loss": 0.062, |
| "step": 21240 |
| }, |
| { |
| "epoch": 0.038646991559042373, |
| "grad_norm": 0.031473588198423386, |
| "learning_rate": 0.0002, |
| "loss": 0.0163, |
| "step": 21250 |
| }, |
| { |
| "epoch": 0.03866517837859957, |
| "grad_norm": 0.09360513091087341, |
| "learning_rate": 0.0002, |
| "loss": 0.1397, |
| "step": 21260 |
| }, |
| { |
| "epoch": 0.038683365198156766, |
| "grad_norm": 0.10830901563167572, |
| "learning_rate": 0.0002, |
| "loss": 0.0789, |
| "step": 21270 |
| }, |
| { |
| "epoch": 0.03870155201771396, |
| "grad_norm": 0.08910014480352402, |
| "learning_rate": 0.0002, |
| "loss": 0.0758, |
| "step": 21280 |
| }, |
| { |
| "epoch": 0.03871973883727116, |
| "grad_norm": 0.21524523198604584, |
| "learning_rate": 0.0002, |
| "loss": 0.0628, |
| "step": 21290 |
| }, |
| { |
| "epoch": 0.038737925656828355, |
| "grad_norm": 0.03794678673148155, |
| "learning_rate": 0.0002, |
| "loss": 0.0229, |
| "step": 21300 |
| }, |
| { |
| "epoch": 0.03875611247638555, |
| "grad_norm": 0.46754345297813416, |
| "learning_rate": 0.0002, |
| "loss": 0.1291, |
| "step": 21310 |
| }, |
| { |
| "epoch": 0.03877429929594275, |
| "grad_norm": 0.07472983002662659, |
| "learning_rate": 0.0002, |
| "loss": 0.076, |
| "step": 21320 |
| }, |
| { |
| "epoch": 0.038792486115499944, |
| "grad_norm": 0.11820811778306961, |
| "learning_rate": 0.0002, |
| "loss": 0.0772, |
| "step": 21330 |
| }, |
| { |
| "epoch": 0.03881067293505714, |
| "grad_norm": 0.21140390634536743, |
| "learning_rate": 0.0002, |
| "loss": 0.0539, |
| "step": 21340 |
| }, |
| { |
| "epoch": 0.03882885975461434, |
| "grad_norm": 0.044819217175245285, |
| "learning_rate": 0.0002, |
| "loss": 0.0228, |
| "step": 21350 |
| }, |
| { |
| "epoch": 0.03884704657417153, |
| "grad_norm": 0.2267816811800003, |
| "learning_rate": 0.0002, |
| "loss": 0.1462, |
| "step": 21360 |
| }, |
| { |
| "epoch": 0.03886523339372873, |
| "grad_norm": 0.10087496787309647, |
| "learning_rate": 0.0002, |
| "loss": 0.0766, |
| "step": 21370 |
| }, |
| { |
| "epoch": 0.038883420213285926, |
| "grad_norm": 0.09982341527938843, |
| "learning_rate": 0.0002, |
| "loss": 0.0798, |
| "step": 21380 |
| }, |
| { |
| "epoch": 0.03890160703284312, |
| "grad_norm": 0.21729151904582977, |
| "learning_rate": 0.0002, |
| "loss": 0.0586, |
| "step": 21390 |
| }, |
| { |
| "epoch": 0.03891979385240032, |
| "grad_norm": 0.020691821351647377, |
| "learning_rate": 0.0002, |
| "loss": 0.0175, |
| "step": 21400 |
| }, |
| { |
| "epoch": 0.038937980671957514, |
| "grad_norm": 0.33531665802001953, |
| "learning_rate": 0.0002, |
| "loss": 0.149, |
| "step": 21410 |
| }, |
| { |
| "epoch": 0.03895616749151471, |
| "grad_norm": 0.11777795851230621, |
| "learning_rate": 0.0002, |
| "loss": 0.0736, |
| "step": 21420 |
| }, |
| { |
| "epoch": 0.03897435431107191, |
| "grad_norm": 0.07860718667507172, |
| "learning_rate": 0.0002, |
| "loss": 0.0788, |
| "step": 21430 |
| }, |
| { |
| "epoch": 0.0389925411306291, |
| "grad_norm": 0.16030597686767578, |
| "learning_rate": 0.0002, |
| "loss": 0.0581, |
| "step": 21440 |
| }, |
| { |
| "epoch": 0.0390107279501863, |
| "grad_norm": 0.01747356541454792, |
| "learning_rate": 0.0002, |
| "loss": 0.0185, |
| "step": 21450 |
| }, |
| { |
| "epoch": 0.039028914769743496, |
| "grad_norm": 0.2313859909772873, |
| "learning_rate": 0.0002, |
| "loss": 0.1383, |
| "step": 21460 |
| }, |
| { |
| "epoch": 0.03904710158930069, |
| "grad_norm": 0.14510080218315125, |
| "learning_rate": 0.0002, |
| "loss": 0.0805, |
| "step": 21470 |
| }, |
| { |
| "epoch": 0.03906528840885789, |
| "grad_norm": 0.04511871561408043, |
| "learning_rate": 0.0002, |
| "loss": 0.0793, |
| "step": 21480 |
| }, |
| { |
| "epoch": 0.039083475228415085, |
| "grad_norm": 0.24205265939235687, |
| "learning_rate": 0.0002, |
| "loss": 0.0624, |
| "step": 21490 |
| }, |
| { |
| "epoch": 0.03910166204797228, |
| "grad_norm": 0.08096791058778763, |
| "learning_rate": 0.0002, |
| "loss": 0.0208, |
| "step": 21500 |
| }, |
| { |
| "epoch": 0.03911984886752948, |
| "grad_norm": 0.14405490458011627, |
| "learning_rate": 0.0002, |
| "loss": 0.1189, |
| "step": 21510 |
| }, |
| { |
| "epoch": 0.039138035687086674, |
| "grad_norm": 0.06753374636173248, |
| "learning_rate": 0.0002, |
| "loss": 0.0772, |
| "step": 21520 |
| }, |
| { |
| "epoch": 0.03915622250664387, |
| "grad_norm": 0.029025042429566383, |
| "learning_rate": 0.0002, |
| "loss": 0.0761, |
| "step": 21530 |
| }, |
| { |
| "epoch": 0.039174409326201066, |
| "grad_norm": 0.2987070381641388, |
| "learning_rate": 0.0002, |
| "loss": 0.0656, |
| "step": 21540 |
| }, |
| { |
| "epoch": 0.03919259614575826, |
| "grad_norm": 0.04445091262459755, |
| "learning_rate": 0.0002, |
| "loss": 0.0241, |
| "step": 21550 |
| }, |
| { |
| "epoch": 0.03921078296531546, |
| "grad_norm": 0.34976306557655334, |
| "learning_rate": 0.0002, |
| "loss": 0.138, |
| "step": 21560 |
| }, |
| { |
| "epoch": 0.039228969784872655, |
| "grad_norm": 0.07521916925907135, |
| "learning_rate": 0.0002, |
| "loss": 0.0774, |
| "step": 21570 |
| }, |
| { |
| "epoch": 0.03924715660442985, |
| "grad_norm": 0.1445412039756775, |
| "learning_rate": 0.0002, |
| "loss": 0.087, |
| "step": 21580 |
| }, |
| { |
| "epoch": 0.03926534342398705, |
| "grad_norm": 0.2688128352165222, |
| "learning_rate": 0.0002, |
| "loss": 0.0712, |
| "step": 21590 |
| }, |
| { |
| "epoch": 0.039283530243544244, |
| "grad_norm": 0.05321233719587326, |
| "learning_rate": 0.0002, |
| "loss": 0.0245, |
| "step": 21600 |
| }, |
| { |
| "epoch": 0.03930171706310144, |
| "grad_norm": 0.44459134340286255, |
| "learning_rate": 0.0002, |
| "loss": 0.1524, |
| "step": 21610 |
| }, |
| { |
| "epoch": 0.03931990388265864, |
| "grad_norm": 0.13169553875923157, |
| "learning_rate": 0.0002, |
| "loss": 0.0726, |
| "step": 21620 |
| }, |
| { |
| "epoch": 0.03933809070221583, |
| "grad_norm": 0.0908237174153328, |
| "learning_rate": 0.0002, |
| "loss": 0.0785, |
| "step": 21630 |
| }, |
| { |
| "epoch": 0.03935627752177303, |
| "grad_norm": 0.18110623955726624, |
| "learning_rate": 0.0002, |
| "loss": 0.0606, |
| "step": 21640 |
| }, |
| { |
| "epoch": 0.03937446434133023, |
| "grad_norm": 0.021362677216529846, |
| "learning_rate": 0.0002, |
| "loss": 0.0175, |
| "step": 21650 |
| }, |
| { |
| "epoch": 0.03939265116088743, |
| "grad_norm": 0.27973899245262146, |
| "learning_rate": 0.0002, |
| "loss": 0.1641, |
| "step": 21660 |
| }, |
| { |
| "epoch": 0.039410837980444625, |
| "grad_norm": 0.09090718626976013, |
| "learning_rate": 0.0002, |
| "loss": 0.073, |
| "step": 21670 |
| }, |
| { |
| "epoch": 0.03942902480000182, |
| "grad_norm": 0.13408254086971283, |
| "learning_rate": 0.0002, |
| "loss": 0.0769, |
| "step": 21680 |
| }, |
| { |
| "epoch": 0.03944721161955902, |
| "grad_norm": 0.2530055046081543, |
| "learning_rate": 0.0002, |
| "loss": 0.0729, |
| "step": 21690 |
| }, |
| { |
| "epoch": 0.039465398439116214, |
| "grad_norm": 0.027523871511220932, |
| "learning_rate": 0.0002, |
| "loss": 0.017, |
| "step": 21700 |
| }, |
| { |
| "epoch": 0.03948358525867341, |
| "grad_norm": 0.2520642578601837, |
| "learning_rate": 0.0002, |
| "loss": 0.1804, |
| "step": 21710 |
| }, |
| { |
| "epoch": 0.03950177207823061, |
| "grad_norm": 0.11017465591430664, |
| "learning_rate": 0.0002, |
| "loss": 0.0767, |
| "step": 21720 |
| }, |
| { |
| "epoch": 0.0395199588977878, |
| "grad_norm": 0.05129052326083183, |
| "learning_rate": 0.0002, |
| "loss": 0.0723, |
| "step": 21730 |
| }, |
| { |
| "epoch": 0.039538145717345, |
| "grad_norm": 0.1846659779548645, |
| "learning_rate": 0.0002, |
| "loss": 0.0619, |
| "step": 21740 |
| }, |
| { |
| "epoch": 0.039556332536902196, |
| "grad_norm": 0.014305013231933117, |
| "learning_rate": 0.0002, |
| "loss": 0.0171, |
| "step": 21750 |
| }, |
| { |
| "epoch": 0.03957451935645939, |
| "grad_norm": 0.21667814254760742, |
| "learning_rate": 0.0002, |
| "loss": 0.157, |
| "step": 21760 |
| }, |
| { |
| "epoch": 0.03959270617601659, |
| "grad_norm": 0.21456903219223022, |
| "learning_rate": 0.0002, |
| "loss": 0.0803, |
| "step": 21770 |
| }, |
| { |
| "epoch": 0.039610892995573785, |
| "grad_norm": 0.03621416166424751, |
| "learning_rate": 0.0002, |
| "loss": 0.0796, |
| "step": 21780 |
| }, |
| { |
| "epoch": 0.03962907981513098, |
| "grad_norm": 0.20819205045700073, |
| "learning_rate": 0.0002, |
| "loss": 0.0633, |
| "step": 21790 |
| }, |
| { |
| "epoch": 0.03964726663468818, |
| "grad_norm": 0.06860963255167007, |
| "learning_rate": 0.0002, |
| "loss": 0.0172, |
| "step": 21800 |
| }, |
| { |
| "epoch": 0.039665453454245374, |
| "grad_norm": 0.2568039894104004, |
| "learning_rate": 0.0002, |
| "loss": 0.134, |
| "step": 21810 |
| }, |
| { |
| "epoch": 0.03968364027380257, |
| "grad_norm": 0.08747372031211853, |
| "learning_rate": 0.0002, |
| "loss": 0.0753, |
| "step": 21820 |
| }, |
| { |
| "epoch": 0.039701827093359766, |
| "grad_norm": 0.13403570652008057, |
| "learning_rate": 0.0002, |
| "loss": 0.0807, |
| "step": 21830 |
| }, |
| { |
| "epoch": 0.03972001391291696, |
| "grad_norm": 0.20756667852401733, |
| "learning_rate": 0.0002, |
| "loss": 0.0625, |
| "step": 21840 |
| }, |
| { |
| "epoch": 0.03973820073247416, |
| "grad_norm": 0.03678170591592789, |
| "learning_rate": 0.0002, |
| "loss": 0.019, |
| "step": 21850 |
| }, |
| { |
| "epoch": 0.039756387552031355, |
| "grad_norm": 0.1847693920135498, |
| "learning_rate": 0.0002, |
| "loss": 0.1385, |
| "step": 21860 |
| }, |
| { |
| "epoch": 0.03977457437158855, |
| "grad_norm": 0.1627635508775711, |
| "learning_rate": 0.0002, |
| "loss": 0.0765, |
| "step": 21870 |
| }, |
| { |
| "epoch": 0.03979276119114575, |
| "grad_norm": 0.0535571426153183, |
| "learning_rate": 0.0002, |
| "loss": 0.0741, |
| "step": 21880 |
| }, |
| { |
| "epoch": 0.039810948010702944, |
| "grad_norm": 0.3128276765346527, |
| "learning_rate": 0.0002, |
| "loss": 0.0598, |
| "step": 21890 |
| }, |
| { |
| "epoch": 0.03982913483026014, |
| "grad_norm": 0.03369860351085663, |
| "learning_rate": 0.0002, |
| "loss": 0.0217, |
| "step": 21900 |
| }, |
| { |
| "epoch": 0.03984732164981734, |
| "grad_norm": 0.1962599903345108, |
| "learning_rate": 0.0002, |
| "loss": 0.1319, |
| "step": 21910 |
| }, |
| { |
| "epoch": 0.03986550846937453, |
| "grad_norm": 0.1397421509027481, |
| "learning_rate": 0.0002, |
| "loss": 0.068, |
| "step": 21920 |
| }, |
| { |
| "epoch": 0.03988369528893173, |
| "grad_norm": 0.10252605378627777, |
| "learning_rate": 0.0002, |
| "loss": 0.0736, |
| "step": 21930 |
| }, |
| { |
| "epoch": 0.039901882108488926, |
| "grad_norm": 0.22179432213306427, |
| "learning_rate": 0.0002, |
| "loss": 0.0625, |
| "step": 21940 |
| }, |
| { |
| "epoch": 0.03992006892804612, |
| "grad_norm": 0.06068069487810135, |
| "learning_rate": 0.0002, |
| "loss": 0.0242, |
| "step": 21950 |
| }, |
| { |
| "epoch": 0.03993825574760332, |
| "grad_norm": 0.20243950188159943, |
| "learning_rate": 0.0002, |
| "loss": 0.143, |
| "step": 21960 |
| }, |
| { |
| "epoch": 0.039956442567160515, |
| "grad_norm": 0.11786511540412903, |
| "learning_rate": 0.0002, |
| "loss": 0.0779, |
| "step": 21970 |
| }, |
| { |
| "epoch": 0.03997462938671771, |
| "grad_norm": 0.08299421519041061, |
| "learning_rate": 0.0002, |
| "loss": 0.0774, |
| "step": 21980 |
| }, |
| { |
| "epoch": 0.03999281620627491, |
| "grad_norm": 0.2844075858592987, |
| "learning_rate": 0.0002, |
| "loss": 0.0711, |
| "step": 21990 |
| }, |
| { |
| "epoch": 0.040011003025832104, |
| "grad_norm": 0.034433312714099884, |
| "learning_rate": 0.0002, |
| "loss": 0.0217, |
| "step": 22000 |
| }, |
| { |
| "epoch": 0.0400291898453893, |
| "grad_norm": 0.3878481388092041, |
| "learning_rate": 0.0002, |
| "loss": 0.1525, |
| "step": 22010 |
| }, |
| { |
| "epoch": 0.040047376664946496, |
| "grad_norm": 0.16157971322536469, |
| "learning_rate": 0.0002, |
| "loss": 0.0788, |
| "step": 22020 |
| }, |
| { |
| "epoch": 0.04006556348450369, |
| "grad_norm": 0.10347063094377518, |
| "learning_rate": 0.0002, |
| "loss": 0.0809, |
| "step": 22030 |
| }, |
| { |
| "epoch": 0.04008375030406089, |
| "grad_norm": 0.20982638001441956, |
| "learning_rate": 0.0002, |
| "loss": 0.0662, |
| "step": 22040 |
| }, |
| { |
| "epoch": 0.040101937123618085, |
| "grad_norm": 5.856126308441162, |
| "learning_rate": 0.0002, |
| "loss": 0.0578, |
| "step": 22050 |
| }, |
| { |
| "epoch": 0.04012012394317528, |
| "grad_norm": 0.21289357542991638, |
| "learning_rate": 0.0002, |
| "loss": 0.1257, |
| "step": 22060 |
| }, |
| { |
| "epoch": 0.04013831076273248, |
| "grad_norm": 0.040848907083272934, |
| "learning_rate": 0.0002, |
| "loss": 0.0783, |
| "step": 22070 |
| }, |
| { |
| "epoch": 0.040156497582289674, |
| "grad_norm": 0.056517478078603745, |
| "learning_rate": 0.0002, |
| "loss": 0.0693, |
| "step": 22080 |
| }, |
| { |
| "epoch": 0.04017468440184687, |
| "grad_norm": 0.274312287569046, |
| "learning_rate": 0.0002, |
| "loss": 0.0685, |
| "step": 22090 |
| }, |
| { |
| "epoch": 0.04019287122140407, |
| "grad_norm": 0.06353340297937393, |
| "learning_rate": 0.0002, |
| "loss": 0.0263, |
| "step": 22100 |
| }, |
| { |
| "epoch": 0.04021105804096126, |
| "grad_norm": 0.287201464176178, |
| "learning_rate": 0.0002, |
| "loss": 0.1425, |
| "step": 22110 |
| }, |
| { |
| "epoch": 0.04022924486051846, |
| "grad_norm": 0.0990116223692894, |
| "learning_rate": 0.0002, |
| "loss": 0.0732, |
| "step": 22120 |
| }, |
| { |
| "epoch": 0.040247431680075656, |
| "grad_norm": 0.03471527248620987, |
| "learning_rate": 0.0002, |
| "loss": 0.0806, |
| "step": 22130 |
| }, |
| { |
| "epoch": 0.04026561849963285, |
| "grad_norm": 0.16411902010440826, |
| "learning_rate": 0.0002, |
| "loss": 0.0646, |
| "step": 22140 |
| }, |
| { |
| "epoch": 0.04028380531919005, |
| "grad_norm": 0.032927367836236954, |
| "learning_rate": 0.0002, |
| "loss": 0.0225, |
| "step": 22150 |
| }, |
| { |
| "epoch": 0.040301992138747245, |
| "grad_norm": 0.31128716468811035, |
| "learning_rate": 0.0002, |
| "loss": 0.1227, |
| "step": 22160 |
| }, |
| { |
| "epoch": 0.04032017895830444, |
| "grad_norm": 0.14056596159934998, |
| "learning_rate": 0.0002, |
| "loss": 0.0866, |
| "step": 22170 |
| }, |
| { |
| "epoch": 0.04033836577786164, |
| "grad_norm": 0.10555677115917206, |
| "learning_rate": 0.0002, |
| "loss": 0.0785, |
| "step": 22180 |
| }, |
| { |
| "epoch": 0.040356552597418834, |
| "grad_norm": 0.25597816705703735, |
| "learning_rate": 0.0002, |
| "loss": 0.0667, |
| "step": 22190 |
| }, |
| { |
| "epoch": 0.04037473941697603, |
| "grad_norm": 0.04694845899939537, |
| "learning_rate": 0.0002, |
| "loss": 0.021, |
| "step": 22200 |
| }, |
| { |
| "epoch": 0.040392926236533226, |
| "grad_norm": 0.2536766529083252, |
| "learning_rate": 0.0002, |
| "loss": 0.1485, |
| "step": 22210 |
| }, |
| { |
| "epoch": 0.04041111305609042, |
| "grad_norm": 0.0536673367023468, |
| "learning_rate": 0.0002, |
| "loss": 0.0737, |
| "step": 22220 |
| }, |
| { |
| "epoch": 0.04042929987564762, |
| "grad_norm": 0.13121111690998077, |
| "learning_rate": 0.0002, |
| "loss": 0.0793, |
| "step": 22230 |
| }, |
| { |
| "epoch": 0.040447486695204815, |
| "grad_norm": 0.23850645124912262, |
| "learning_rate": 0.0002, |
| "loss": 0.0698, |
| "step": 22240 |
| }, |
| { |
| "epoch": 0.04046567351476201, |
| "grad_norm": 0.04178560525178909, |
| "learning_rate": 0.0002, |
| "loss": 0.0212, |
| "step": 22250 |
| }, |
| { |
| "epoch": 0.04048386033431921, |
| "grad_norm": 0.42834579944610596, |
| "learning_rate": 0.0002, |
| "loss": 0.1352, |
| "step": 22260 |
| }, |
| { |
| "epoch": 0.040502047153876404, |
| "grad_norm": 0.050178542733192444, |
| "learning_rate": 0.0002, |
| "loss": 0.0853, |
| "step": 22270 |
| }, |
| { |
| "epoch": 0.0405202339734336, |
| "grad_norm": 0.042758237570524216, |
| "learning_rate": 0.0002, |
| "loss": 0.0709, |
| "step": 22280 |
| }, |
| { |
| "epoch": 0.0405384207929908, |
| "grad_norm": 0.2604416012763977, |
| "learning_rate": 0.0002, |
| "loss": 0.0643, |
| "step": 22290 |
| }, |
| { |
| "epoch": 0.04055660761254799, |
| "grad_norm": 0.06166388466954231, |
| "learning_rate": 0.0002, |
| "loss": 0.0236, |
| "step": 22300 |
| }, |
| { |
| "epoch": 0.04057479443210519, |
| "grad_norm": 0.2337518334388733, |
| "learning_rate": 0.0002, |
| "loss": 0.132, |
| "step": 22310 |
| }, |
| { |
| "epoch": 0.040592981251662386, |
| "grad_norm": 0.15794694423675537, |
| "learning_rate": 0.0002, |
| "loss": 0.0739, |
| "step": 22320 |
| }, |
| { |
| "epoch": 0.04061116807121959, |
| "grad_norm": 0.12059915065765381, |
| "learning_rate": 0.0002, |
| "loss": 0.0743, |
| "step": 22330 |
| }, |
| { |
| "epoch": 0.040629354890776785, |
| "grad_norm": 0.25351977348327637, |
| "learning_rate": 0.0002, |
| "loss": 0.065, |
| "step": 22340 |
| }, |
| { |
| "epoch": 0.04064754171033398, |
| "grad_norm": 0.03265364468097687, |
| "learning_rate": 0.0002, |
| "loss": 0.02, |
| "step": 22350 |
| }, |
| { |
| "epoch": 0.04066572852989118, |
| "grad_norm": 0.22959749400615692, |
| "learning_rate": 0.0002, |
| "loss": 0.1278, |
| "step": 22360 |
| }, |
| { |
| "epoch": 0.040683915349448374, |
| "grad_norm": 0.11381889134645462, |
| "learning_rate": 0.0002, |
| "loss": 0.0823, |
| "step": 22370 |
| }, |
| { |
| "epoch": 0.04070210216900557, |
| "grad_norm": 0.03541165217757225, |
| "learning_rate": 0.0002, |
| "loss": 0.0809, |
| "step": 22380 |
| }, |
| { |
| "epoch": 0.04072028898856277, |
| "grad_norm": 0.20604047179222107, |
| "learning_rate": 0.0002, |
| "loss": 0.0693, |
| "step": 22390 |
| }, |
| { |
| "epoch": 0.04073847580811996, |
| "grad_norm": 0.051576532423496246, |
| "learning_rate": 0.0002, |
| "loss": 0.0213, |
| "step": 22400 |
| }, |
| { |
| "epoch": 0.04075666262767716, |
| "grad_norm": 0.208265483379364, |
| "learning_rate": 0.0002, |
| "loss": 0.1203, |
| "step": 22410 |
| }, |
| { |
| "epoch": 0.040774849447234356, |
| "grad_norm": 0.14376410841941833, |
| "learning_rate": 0.0002, |
| "loss": 0.0832, |
| "step": 22420 |
| }, |
| { |
| "epoch": 0.04079303626679155, |
| "grad_norm": 0.0634629875421524, |
| "learning_rate": 0.0002, |
| "loss": 0.0797, |
| "step": 22430 |
| }, |
| { |
| "epoch": 0.04081122308634875, |
| "grad_norm": 0.22782418131828308, |
| "learning_rate": 0.0002, |
| "loss": 0.0594, |
| "step": 22440 |
| }, |
| { |
| "epoch": 0.040829409905905945, |
| "grad_norm": 0.034153662621974945, |
| "learning_rate": 0.0002, |
| "loss": 0.0197, |
| "step": 22450 |
| }, |
| { |
| "epoch": 0.04084759672546314, |
| "grad_norm": 0.22994177043437958, |
| "learning_rate": 0.0002, |
| "loss": 0.1276, |
| "step": 22460 |
| }, |
| { |
| "epoch": 0.04086578354502034, |
| "grad_norm": 0.37397289276123047, |
| "learning_rate": 0.0002, |
| "loss": 0.0794, |
| "step": 22470 |
| }, |
| { |
| "epoch": 0.040883970364577533, |
| "grad_norm": 0.03585643321275711, |
| "learning_rate": 0.0002, |
| "loss": 0.0765, |
| "step": 22480 |
| }, |
| { |
| "epoch": 0.04090215718413473, |
| "grad_norm": 0.2266087681055069, |
| "learning_rate": 0.0002, |
| "loss": 0.0661, |
| "step": 22490 |
| }, |
| { |
| "epoch": 0.040920344003691926, |
| "grad_norm": 0.03867397829890251, |
| "learning_rate": 0.0002, |
| "loss": 0.0241, |
| "step": 22500 |
| }, |
| { |
| "epoch": 0.04093853082324912, |
| "grad_norm": 0.23483702540397644, |
| "learning_rate": 0.0002, |
| "loss": 0.1442, |
| "step": 22510 |
| }, |
| { |
| "epoch": 0.04095671764280632, |
| "grad_norm": 0.11447428911924362, |
| "learning_rate": 0.0002, |
| "loss": 0.0759, |
| "step": 22520 |
| }, |
| { |
| "epoch": 0.040974904462363515, |
| "grad_norm": 0.1060417965054512, |
| "learning_rate": 0.0002, |
| "loss": 0.0792, |
| "step": 22530 |
| }, |
| { |
| "epoch": 0.04099309128192071, |
| "grad_norm": 0.1915966123342514, |
| "learning_rate": 0.0002, |
| "loss": 0.0679, |
| "step": 22540 |
| }, |
| { |
| "epoch": 0.04101127810147791, |
| "grad_norm": 0.05328527092933655, |
| "learning_rate": 0.0002, |
| "loss": 0.0211, |
| "step": 22550 |
| }, |
| { |
| "epoch": 0.041029464921035104, |
| "grad_norm": 0.31612515449523926, |
| "learning_rate": 0.0002, |
| "loss": 0.1395, |
| "step": 22560 |
| }, |
| { |
| "epoch": 0.0410476517405923, |
| "grad_norm": 0.1860841065645218, |
| "learning_rate": 0.0002, |
| "loss": 0.0704, |
| "step": 22570 |
| }, |
| { |
| "epoch": 0.0410658385601495, |
| "grad_norm": 0.11183702945709229, |
| "learning_rate": 0.0002, |
| "loss": 0.073, |
| "step": 22580 |
| }, |
| { |
| "epoch": 0.04108402537970669, |
| "grad_norm": 0.2028307020664215, |
| "learning_rate": 0.0002, |
| "loss": 0.0592, |
| "step": 22590 |
| }, |
| { |
| "epoch": 0.04110221219926389, |
| "grad_norm": 0.032915905117988586, |
| "learning_rate": 0.0002, |
| "loss": 0.0211, |
| "step": 22600 |
| }, |
| { |
| "epoch": 0.041120399018821086, |
| "grad_norm": 0.2932131588459015, |
| "learning_rate": 0.0002, |
| "loss": 0.1542, |
| "step": 22610 |
| }, |
| { |
| "epoch": 0.04113858583837828, |
| "grad_norm": 0.08883325010538101, |
| "learning_rate": 0.0002, |
| "loss": 0.079, |
| "step": 22620 |
| }, |
| { |
| "epoch": 0.04115677265793548, |
| "grad_norm": 0.07874555885791779, |
| "learning_rate": 0.0002, |
| "loss": 0.0801, |
| "step": 22630 |
| }, |
| { |
| "epoch": 0.041174959477492674, |
| "grad_norm": 0.13785040378570557, |
| "learning_rate": 0.0002, |
| "loss": 0.0636, |
| "step": 22640 |
| }, |
| { |
| "epoch": 0.04119314629704987, |
| "grad_norm": 0.0321812778711319, |
| "learning_rate": 0.0002, |
| "loss": 0.0208, |
| "step": 22650 |
| }, |
| { |
| "epoch": 0.04121133311660707, |
| "grad_norm": 0.142785906791687, |
| "learning_rate": 0.0002, |
| "loss": 0.1292, |
| "step": 22660 |
| }, |
| { |
| "epoch": 0.04122951993616426, |
| "grad_norm": 0.15572668612003326, |
| "learning_rate": 0.0002, |
| "loss": 0.0774, |
| "step": 22670 |
| }, |
| { |
| "epoch": 0.04124770675572146, |
| "grad_norm": 0.033191781491041183, |
| "learning_rate": 0.0002, |
| "loss": 0.0805, |
| "step": 22680 |
| }, |
| { |
| "epoch": 0.041265893575278656, |
| "grad_norm": 0.23840776085853577, |
| "learning_rate": 0.0002, |
| "loss": 0.06, |
| "step": 22690 |
| }, |
| { |
| "epoch": 0.04128408039483585, |
| "grad_norm": 0.05943412706255913, |
| "learning_rate": 0.0002, |
| "loss": 0.0215, |
| "step": 22700 |
| }, |
| { |
| "epoch": 0.04130226721439305, |
| "grad_norm": 0.05142183229327202, |
| "learning_rate": 0.0002, |
| "loss": 0.1181, |
| "step": 22710 |
| }, |
| { |
| "epoch": 0.041320454033950245, |
| "grad_norm": 0.1583058387041092, |
| "learning_rate": 0.0002, |
| "loss": 0.0822, |
| "step": 22720 |
| }, |
| { |
| "epoch": 0.04133864085350744, |
| "grad_norm": 0.035809941589832306, |
| "learning_rate": 0.0002, |
| "loss": 0.0723, |
| "step": 22730 |
| }, |
| { |
| "epoch": 0.04135682767306464, |
| "grad_norm": 0.24066607654094696, |
| "learning_rate": 0.0002, |
| "loss": 0.0621, |
| "step": 22740 |
| }, |
| { |
| "epoch": 0.041375014492621834, |
| "grad_norm": 0.0327225998044014, |
| "learning_rate": 0.0002, |
| "loss": 0.0264, |
| "step": 22750 |
| }, |
| { |
| "epoch": 0.04139320131217903, |
| "grad_norm": 0.16599033772945404, |
| "learning_rate": 0.0002, |
| "loss": 0.1082, |
| "step": 22760 |
| }, |
| { |
| "epoch": 0.041411388131736226, |
| "grad_norm": 0.18834830820560455, |
| "learning_rate": 0.0002, |
| "loss": 0.0767, |
| "step": 22770 |
| }, |
| { |
| "epoch": 0.04142957495129342, |
| "grad_norm": 0.04162973538041115, |
| "learning_rate": 0.0002, |
| "loss": 0.0821, |
| "step": 22780 |
| }, |
| { |
| "epoch": 0.04144776177085062, |
| "grad_norm": 0.21065399050712585, |
| "learning_rate": 0.0002, |
| "loss": 0.0591, |
| "step": 22790 |
| }, |
| { |
| "epoch": 0.041465948590407815, |
| "grad_norm": 0.03744394704699516, |
| "learning_rate": 0.0002, |
| "loss": 0.0199, |
| "step": 22800 |
| }, |
| { |
| "epoch": 0.04148413540996501, |
| "grad_norm": 0.30440911650657654, |
| "learning_rate": 0.0002, |
| "loss": 0.1321, |
| "step": 22810 |
| }, |
| { |
| "epoch": 0.04150232222952221, |
| "grad_norm": 0.07215052098035812, |
| "learning_rate": 0.0002, |
| "loss": 0.077, |
| "step": 22820 |
| }, |
| { |
| "epoch": 0.041520509049079404, |
| "grad_norm": 0.0822744220495224, |
| "learning_rate": 0.0002, |
| "loss": 0.0695, |
| "step": 22830 |
| }, |
| { |
| "epoch": 0.0415386958686366, |
| "grad_norm": 0.20610104501247406, |
| "learning_rate": 0.0002, |
| "loss": 0.0668, |
| "step": 22840 |
| }, |
| { |
| "epoch": 0.0415568826881938, |
| "grad_norm": 0.05089128017425537, |
| "learning_rate": 0.0002, |
| "loss": 0.0275, |
| "step": 22850 |
| }, |
| { |
| "epoch": 0.04157506950775099, |
| "grad_norm": 0.23365797102451324, |
| "learning_rate": 0.0002, |
| "loss": 0.1308, |
| "step": 22860 |
| }, |
| { |
| "epoch": 0.04159325632730819, |
| "grad_norm": 0.03983612358570099, |
| "learning_rate": 0.0002, |
| "loss": 0.0738, |
| "step": 22870 |
| }, |
| { |
| "epoch": 0.041611443146865386, |
| "grad_norm": 0.12472117692232132, |
| "learning_rate": 0.0002, |
| "loss": 0.082, |
| "step": 22880 |
| }, |
| { |
| "epoch": 0.04162962996642258, |
| "grad_norm": 0.19599118828773499, |
| "learning_rate": 0.0002, |
| "loss": 0.0614, |
| "step": 22890 |
| }, |
| { |
| "epoch": 0.04164781678597978, |
| "grad_norm": 0.04077763110399246, |
| "learning_rate": 0.0002, |
| "loss": 0.0298, |
| "step": 22900 |
| }, |
| { |
| "epoch": 0.041666003605536975, |
| "grad_norm": 0.3027828633785248, |
| "learning_rate": 0.0002, |
| "loss": 0.1294, |
| "step": 22910 |
| }, |
| { |
| "epoch": 0.04168419042509417, |
| "grad_norm": 0.1551598757505417, |
| "learning_rate": 0.0002, |
| "loss": 0.0716, |
| "step": 22920 |
| }, |
| { |
| "epoch": 0.04170237724465137, |
| "grad_norm": 0.06512947380542755, |
| "learning_rate": 0.0002, |
| "loss": 0.071, |
| "step": 22930 |
| }, |
| { |
| "epoch": 0.041720564064208564, |
| "grad_norm": 0.2486017346382141, |
| "learning_rate": 0.0002, |
| "loss": 0.0726, |
| "step": 22940 |
| }, |
| { |
| "epoch": 0.04173875088376576, |
| "grad_norm": 0.0658118799328804, |
| "learning_rate": 0.0002, |
| "loss": 0.0211, |
| "step": 22950 |
| }, |
| { |
| "epoch": 0.041756937703322956, |
| "grad_norm": 0.18327641487121582, |
| "learning_rate": 0.0002, |
| "loss": 0.1307, |
| "step": 22960 |
| }, |
| { |
| "epoch": 0.04177512452288015, |
| "grad_norm": 0.06218123063445091, |
| "learning_rate": 0.0002, |
| "loss": 0.0839, |
| "step": 22970 |
| }, |
| { |
| "epoch": 0.04179331134243735, |
| "grad_norm": 0.07085203379392624, |
| "learning_rate": 0.0002, |
| "loss": 0.0786, |
| "step": 22980 |
| }, |
| { |
| "epoch": 0.041811498161994545, |
| "grad_norm": 0.19552426040172577, |
| "learning_rate": 0.0002, |
| "loss": 0.0646, |
| "step": 22990 |
| }, |
| { |
| "epoch": 0.04182968498155174, |
| "grad_norm": 0.06710335612297058, |
| "learning_rate": 0.0002, |
| "loss": 0.0252, |
| "step": 23000 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 100000, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 250, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.0398767809662812e+19, |
| "train_batch_size": 24, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|