| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 6.476683937823834, |
| "eval_steps": 500, |
| "global_step": 10000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.006476683937823834, |
| "grad_norm": 12.527985572814941, |
| "learning_rate": 2.0000000000000003e-06, |
| "loss": 1.244, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.012953367875647668, |
| "grad_norm": 19.27385139465332, |
| "learning_rate": 4.000000000000001e-06, |
| "loss": 1.2301, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.019430051813471502, |
| "grad_norm": 6.885942459106445, |
| "learning_rate": 6e-06, |
| "loss": 1.169, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.025906735751295335, |
| "grad_norm": 5.766233921051025, |
| "learning_rate": 8.000000000000001e-06, |
| "loss": 0.831, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.03238341968911917, |
| "grad_norm": 3.2696096897125244, |
| "learning_rate": 1e-05, |
| "loss": 0.5349, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.038860103626943004, |
| "grad_norm": 3.4554696083068848, |
| "learning_rate": 1.2e-05, |
| "loss": 0.4294, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.04533678756476684, |
| "grad_norm": 2.4693260192871094, |
| "learning_rate": 1.4000000000000001e-05, |
| "loss": 0.3355, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.05181347150259067, |
| "grad_norm": 1.4052163362503052, |
| "learning_rate": 1.6000000000000003e-05, |
| "loss": 0.3158, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.05829015544041451, |
| "grad_norm": 2.1825428009033203, |
| "learning_rate": 1.8e-05, |
| "loss": 0.2853, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.06476683937823834, |
| "grad_norm": 1.7514649629592896, |
| "learning_rate": 2e-05, |
| "loss": 0.2427, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.07124352331606218, |
| "grad_norm": 4.749204158782959, |
| "learning_rate": 2.2000000000000003e-05, |
| "loss": 0.2211, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.07772020725388601, |
| "grad_norm": 2.262394428253174, |
| "learning_rate": 2.4e-05, |
| "loss": 0.2275, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.08419689119170984, |
| "grad_norm": 2.971313238143921, |
| "learning_rate": 2.6000000000000002e-05, |
| "loss": 0.1767, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.09067357512953368, |
| "grad_norm": 1.2210947275161743, |
| "learning_rate": 2.8000000000000003e-05, |
| "loss": 0.167, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.09715025906735751, |
| "grad_norm": 6.283907890319824, |
| "learning_rate": 3e-05, |
| "loss": 0.1677, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.10362694300518134, |
| "grad_norm": 0.9573088884353638, |
| "learning_rate": 3.2000000000000005e-05, |
| "loss": 0.1766, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.11010362694300518, |
| "grad_norm": 1.4948713779449463, |
| "learning_rate": 3.4000000000000007e-05, |
| "loss": 0.149, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.11658031088082901, |
| "grad_norm": 1.1040873527526855, |
| "learning_rate": 3.6e-05, |
| "loss": 0.1549, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.12305699481865284, |
| "grad_norm": 0.8945522904396057, |
| "learning_rate": 3.8e-05, |
| "loss": 0.1424, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.12953367875647667, |
| "grad_norm": 1.265764594078064, |
| "learning_rate": 4e-05, |
| "loss": 0.1503, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.13601036269430053, |
| "grad_norm": 1.3545417785644531, |
| "learning_rate": 4.2e-05, |
| "loss": 0.1511, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.14248704663212436, |
| "grad_norm": 2.2851409912109375, |
| "learning_rate": 4.4000000000000006e-05, |
| "loss": 0.1424, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.1489637305699482, |
| "grad_norm": 1.2114957571029663, |
| "learning_rate": 4.600000000000001e-05, |
| "loss": 0.1295, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.15544041450777202, |
| "grad_norm": 0.7779485583305359, |
| "learning_rate": 4.8e-05, |
| "loss": 0.1347, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.16191709844559585, |
| "grad_norm": 0.9195041060447693, |
| "learning_rate": 5e-05, |
| "loss": 0.1205, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.16839378238341968, |
| "grad_norm": 1.378207802772522, |
| "learning_rate": 5.2000000000000004e-05, |
| "loss": 0.128, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.17487046632124353, |
| "grad_norm": 0.9869899153709412, |
| "learning_rate": 5.4000000000000005e-05, |
| "loss": 0.1199, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.18134715025906736, |
| "grad_norm": 2.6648809909820557, |
| "learning_rate": 5.6000000000000006e-05, |
| "loss": 0.1442, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.1878238341968912, |
| "grad_norm": 1.7314049005508423, |
| "learning_rate": 5.8e-05, |
| "loss": 0.1287, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.19430051813471502, |
| "grad_norm": 1.1444119215011597, |
| "learning_rate": 6e-05, |
| "loss": 0.1206, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.20077720207253885, |
| "grad_norm": 0.8097096085548401, |
| "learning_rate": 6.2e-05, |
| "loss": 0.1139, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.20725388601036268, |
| "grad_norm": 1.2885841131210327, |
| "learning_rate": 6.400000000000001e-05, |
| "loss": 0.1244, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.21373056994818654, |
| "grad_norm": 0.8578065037727356, |
| "learning_rate": 6.6e-05, |
| "loss": 0.1112, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.22020725388601037, |
| "grad_norm": 1.3751784563064575, |
| "learning_rate": 6.800000000000001e-05, |
| "loss": 0.1033, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.2266839378238342, |
| "grad_norm": 1.1707627773284912, |
| "learning_rate": 7e-05, |
| "loss": 0.1162, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.23316062176165803, |
| "grad_norm": 1.7474905252456665, |
| "learning_rate": 7.2e-05, |
| "loss": 0.1038, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.23963730569948186, |
| "grad_norm": 0.5757717490196228, |
| "learning_rate": 7.4e-05, |
| "loss": 0.0896, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.24611398963730569, |
| "grad_norm": 1.0151946544647217, |
| "learning_rate": 7.6e-05, |
| "loss": 0.1136, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.25259067357512954, |
| "grad_norm": 0.9157730937004089, |
| "learning_rate": 7.800000000000001e-05, |
| "loss": 0.1207, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.25906735751295334, |
| "grad_norm": 0.9437503218650818, |
| "learning_rate": 8e-05, |
| "loss": 0.1058, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.2655440414507772, |
| "grad_norm": 0.8575088381767273, |
| "learning_rate": 8.2e-05, |
| "loss": 0.1019, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.27202072538860106, |
| "grad_norm": 1.1373648643493652, |
| "learning_rate": 8.4e-05, |
| "loss": 0.1168, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.27849740932642486, |
| "grad_norm": 0.9613192677497864, |
| "learning_rate": 8.6e-05, |
| "loss": 0.1048, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.2849740932642487, |
| "grad_norm": 1.4302594661712646, |
| "learning_rate": 8.800000000000001e-05, |
| "loss": 0.1037, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.2914507772020725, |
| "grad_norm": 1.0947346687316895, |
| "learning_rate": 9e-05, |
| "loss": 0.1023, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.2979274611398964, |
| "grad_norm": 1.21216881275177, |
| "learning_rate": 9.200000000000001e-05, |
| "loss": 0.0955, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.30440414507772023, |
| "grad_norm": 1.4514696598052979, |
| "learning_rate": 9.4e-05, |
| "loss": 0.1054, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.31088082901554404, |
| "grad_norm": 1.2861961126327515, |
| "learning_rate": 9.6e-05, |
| "loss": 0.1016, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.3173575129533679, |
| "grad_norm": 0.5643912553787231, |
| "learning_rate": 9.8e-05, |
| "loss": 0.1003, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.3238341968911917, |
| "grad_norm": 0.9163880348205566, |
| "learning_rate": 0.0001, |
| "loss": 0.0849, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.33031088082901555, |
| "grad_norm": 1.2243982553482056, |
| "learning_rate": 9.999972660400536e-05, |
| "loss": 0.0903, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.33678756476683935, |
| "grad_norm": 0.9769583940505981, |
| "learning_rate": 9.999890641901125e-05, |
| "loss": 0.0926, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.3432642487046632, |
| "grad_norm": 1.0890876054763794, |
| "learning_rate": 9.999753945398704e-05, |
| "loss": 0.09, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.34974093264248707, |
| "grad_norm": 0.701673150062561, |
| "learning_rate": 9.99956257238817e-05, |
| "loss": 0.105, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.35621761658031087, |
| "grad_norm": 0.6724784970283508, |
| "learning_rate": 9.999316524962345e-05, |
| "loss": 0.0862, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.3626943005181347, |
| "grad_norm": 0.6740795373916626, |
| "learning_rate": 9.999015805811965e-05, |
| "loss": 0.0933, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.36917098445595853, |
| "grad_norm": 0.9289199113845825, |
| "learning_rate": 9.998660418225645e-05, |
| "loss": 0.1064, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.3756476683937824, |
| "grad_norm": 0.9368308186531067, |
| "learning_rate": 9.998250366089848e-05, |
| "loss": 0.1032, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.38212435233160624, |
| "grad_norm": 0.688762903213501, |
| "learning_rate": 9.997785653888835e-05, |
| "loss": 0.0899, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.38860103626943004, |
| "grad_norm": 0.9816915988922119, |
| "learning_rate": 9.997266286704631e-05, |
| "loss": 0.0998, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.3950777202072539, |
| "grad_norm": 0.8907963037490845, |
| "learning_rate": 9.996692270216947e-05, |
| "loss": 0.0987, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.4015544041450777, |
| "grad_norm": 1.1411789655685425, |
| "learning_rate": 9.996063610703137e-05, |
| "loss": 0.0936, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.40803108808290156, |
| "grad_norm": 0.7415697574615479, |
| "learning_rate": 9.995380315038119e-05, |
| "loss": 0.084, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.41450777202072536, |
| "grad_norm": 0.856239378452301, |
| "learning_rate": 9.994642390694308e-05, |
| "loss": 0.0881, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.4209844559585492, |
| "grad_norm": 0.8964262008666992, |
| "learning_rate": 9.993849845741524e-05, |
| "loss": 0.0872, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.4274611398963731, |
| "grad_norm": 1.0198777914047241, |
| "learning_rate": 9.993002688846913e-05, |
| "loss": 0.086, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.4339378238341969, |
| "grad_norm": 0.7242151498794556, |
| "learning_rate": 9.992100929274846e-05, |
| "loss": 0.0695, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.44041450777202074, |
| "grad_norm": 0.6688233017921448, |
| "learning_rate": 9.991144576886823e-05, |
| "loss": 0.0869, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.44689119170984454, |
| "grad_norm": 1.0634338855743408, |
| "learning_rate": 9.990133642141359e-05, |
| "loss": 0.074, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.4533678756476684, |
| "grad_norm": 0.9007896184921265, |
| "learning_rate": 9.989068136093873e-05, |
| "loss": 0.0795, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.45984455958549225, |
| "grad_norm": 0.9645273685455322, |
| "learning_rate": 9.987948070396571e-05, |
| "loss": 0.0748, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.46632124352331605, |
| "grad_norm": 0.6668915748596191, |
| "learning_rate": 9.986773457298311e-05, |
| "loss": 0.0724, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.4727979274611399, |
| "grad_norm": 0.6029163599014282, |
| "learning_rate": 9.985544309644475e-05, |
| "loss": 0.0914, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.4792746113989637, |
| "grad_norm": 0.4224916398525238, |
| "learning_rate": 9.984260640876821e-05, |
| "loss": 0.0698, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.48575129533678757, |
| "grad_norm": 0.5059609413146973, |
| "learning_rate": 9.98292246503335e-05, |
| "loss": 0.0735, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.49222797927461137, |
| "grad_norm": 0.6006156802177429, |
| "learning_rate": 9.981529796748134e-05, |
| "loss": 0.0738, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.49870466321243523, |
| "grad_norm": 0.6188303828239441, |
| "learning_rate": 9.980082651251175e-05, |
| "loss": 0.0757, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.5051813471502591, |
| "grad_norm": 0.5524353981018066, |
| "learning_rate": 9.97858104436822e-05, |
| "loss": 0.0759, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.5116580310880829, |
| "grad_norm": 0.7834250330924988, |
| "learning_rate": 9.977024992520602e-05, |
| "loss": 0.0798, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.5181347150259067, |
| "grad_norm": 0.5550430417060852, |
| "learning_rate": 9.975414512725057e-05, |
| "loss": 0.0829, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.5246113989637305, |
| "grad_norm": 0.4284016788005829, |
| "learning_rate": 9.973749622593534e-05, |
| "loss": 0.0737, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.5310880829015544, |
| "grad_norm": 0.6256512403488159, |
| "learning_rate": 9.972030340333001e-05, |
| "loss": 0.0911, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.5375647668393783, |
| "grad_norm": 0.7751624584197998, |
| "learning_rate": 9.970256684745258e-05, |
| "loss": 0.0744, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.5440414507772021, |
| "grad_norm": 0.9001058340072632, |
| "learning_rate": 9.968428675226714e-05, |
| "loss": 0.0587, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.5505181347150259, |
| "grad_norm": 0.46805211901664734, |
| "learning_rate": 9.966546331768191e-05, |
| "loss": 0.0704, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.5569948186528497, |
| "grad_norm": 0.6288490891456604, |
| "learning_rate": 9.964609674954696e-05, |
| "loss": 0.0778, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.5634715025906736, |
| "grad_norm": 0.6661942601203918, |
| "learning_rate": 9.962618725965196e-05, |
| "loss": 0.0722, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.5699481865284974, |
| "grad_norm": 1.02120041847229, |
| "learning_rate": 9.96057350657239e-05, |
| "loss": 0.0733, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.5764248704663213, |
| "grad_norm": 0.5569030046463013, |
| "learning_rate": 9.95847403914247e-05, |
| "loss": 0.0744, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.582901554404145, |
| "grad_norm": 0.6581366062164307, |
| "learning_rate": 9.956320346634876e-05, |
| "loss": 0.0641, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.5893782383419689, |
| "grad_norm": 0.8849700689315796, |
| "learning_rate": 9.954112452602045e-05, |
| "loss": 0.0638, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.5958549222797928, |
| "grad_norm": 0.6778038740158081, |
| "learning_rate": 9.95185038118915e-05, |
| "loss": 0.074, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.6023316062176166, |
| "grad_norm": 0.8255287408828735, |
| "learning_rate": 9.949534157133844e-05, |
| "loss": 0.0886, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.6088082901554405, |
| "grad_norm": 0.6942656636238098, |
| "learning_rate": 9.94716380576598e-05, |
| "loss": 0.0708, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.6152849740932642, |
| "grad_norm": 0.5593329071998596, |
| "learning_rate": 9.944739353007344e-05, |
| "loss": 0.071, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.6217616580310881, |
| "grad_norm": 0.9311866164207458, |
| "learning_rate": 9.942260825371358e-05, |
| "loss": 0.0684, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.6282383419689119, |
| "grad_norm": 0.5894584655761719, |
| "learning_rate": 9.939728249962807e-05, |
| "loss": 0.0676, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.6347150259067358, |
| "grad_norm": 0.40771910548210144, |
| "learning_rate": 9.937141654477528e-05, |
| "loss": 0.0638, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.6411917098445595, |
| "grad_norm": 0.4833804666996002, |
| "learning_rate": 9.934501067202117e-05, |
| "loss": 0.0656, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.6476683937823834, |
| "grad_norm": 0.5921794772148132, |
| "learning_rate": 9.931806517013612e-05, |
| "loss": 0.0591, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.6541450777202072, |
| "grad_norm": 0.40994754433631897, |
| "learning_rate": 9.929058033379181e-05, |
| "loss": 0.077, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.6606217616580311, |
| "grad_norm": 1.0852352380752563, |
| "learning_rate": 9.926255646355804e-05, |
| "loss": 0.0573, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.667098445595855, |
| "grad_norm": 0.833574116230011, |
| "learning_rate": 9.923399386589933e-05, |
| "loss": 0.0586, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.6735751295336787, |
| "grad_norm": 0.9025760889053345, |
| "learning_rate": 9.92048928531717e-05, |
| "loss": 0.0627, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.6800518134715026, |
| "grad_norm": 0.525789737701416, |
| "learning_rate": 9.917525374361912e-05, |
| "loss": 0.0599, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.6865284974093264, |
| "grad_norm": 0.4664829969406128, |
| "learning_rate": 9.914507686137019e-05, |
| "loss": 0.0635, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.6930051813471503, |
| "grad_norm": 0.29409900307655334, |
| "learning_rate": 9.911436253643445e-05, |
| "loss": 0.0528, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.6994818652849741, |
| "grad_norm": 0.7681676149368286, |
| "learning_rate": 9.90831111046988e-05, |
| "loss": 0.062, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.7059585492227979, |
| "grad_norm": 0.741635799407959, |
| "learning_rate": 9.905132290792394e-05, |
| "loss": 0.0615, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.7124352331606217, |
| "grad_norm": 0.4648561477661133, |
| "learning_rate": 9.901899829374047e-05, |
| "loss": 0.0627, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.7189119170984456, |
| "grad_norm": 0.40356361865997314, |
| "learning_rate": 9.89861376156452e-05, |
| "loss": 0.0577, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.7253886010362695, |
| "grad_norm": 0.3577727675437927, |
| "learning_rate": 9.895274123299723e-05, |
| "loss": 0.0538, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.7318652849740933, |
| "grad_norm": 0.3680018186569214, |
| "learning_rate": 9.891880951101407e-05, |
| "loss": 0.0538, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.7383419689119171, |
| "grad_norm": 0.5400322675704956, |
| "learning_rate": 9.888434282076758e-05, |
| "loss": 0.0541, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.7448186528497409, |
| "grad_norm": 0.4998588562011719, |
| "learning_rate": 9.884934153917997e-05, |
| "loss": 0.0582, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.7512953367875648, |
| "grad_norm": 0.6761271953582764, |
| "learning_rate": 9.881380604901964e-05, |
| "loss": 0.0557, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.7577720207253886, |
| "grad_norm": 0.751621425151825, |
| "learning_rate": 9.877773673889701e-05, |
| "loss": 0.0627, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.7642487046632125, |
| "grad_norm": 0.563669741153717, |
| "learning_rate": 9.87411340032603e-05, |
| "loss": 0.0572, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.7707253886010362, |
| "grad_norm": 0.4242180585861206, |
| "learning_rate": 9.870399824239117e-05, |
| "loss": 0.0572, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.7772020725388601, |
| "grad_norm": 0.37374168634414673, |
| "learning_rate": 9.86663298624003e-05, |
| "loss": 0.058, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.783678756476684, |
| "grad_norm": 0.4612903296947479, |
| "learning_rate": 9.862812927522309e-05, |
| "loss": 0.0657, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.7901554404145078, |
| "grad_norm": 0.6304490566253662, |
| "learning_rate": 9.858939689861506e-05, |
| "loss": 0.0573, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.7966321243523317, |
| "grad_norm": 0.5008482336997986, |
| "learning_rate": 9.855013315614725e-05, |
| "loss": 0.0564, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.8031088082901554, |
| "grad_norm": 0.6502981185913086, |
| "learning_rate": 9.851033847720166e-05, |
| "loss": 0.0688, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.8095854922279793, |
| "grad_norm": 0.6085125803947449, |
| "learning_rate": 9.847001329696653e-05, |
| "loss": 0.0539, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.8160621761658031, |
| "grad_norm": 0.4931461215019226, |
| "learning_rate": 9.842915805643155e-05, |
| "loss": 0.0491, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.822538860103627, |
| "grad_norm": 0.6507974863052368, |
| "learning_rate": 9.838777320238312e-05, |
| "loss": 0.0514, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.8290155440414507, |
| "grad_norm": 0.753237783908844, |
| "learning_rate": 9.834585918739936e-05, |
| "loss": 0.0632, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.8354922279792746, |
| "grad_norm": 0.5048700571060181, |
| "learning_rate": 9.830341646984521e-05, |
| "loss": 0.0604, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.8419689119170984, |
| "grad_norm": 0.5395866632461548, |
| "learning_rate": 9.826044551386744e-05, |
| "loss": 0.0596, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.8484455958549223, |
| "grad_norm": 0.3940522074699402, |
| "learning_rate": 9.821694678938953e-05, |
| "loss": 0.0547, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.8549222797927462, |
| "grad_norm": 0.4183802902698517, |
| "learning_rate": 9.817292077210659e-05, |
| "loss": 0.0542, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.8613989637305699, |
| "grad_norm": 0.6100337505340576, |
| "learning_rate": 9.812836794348004e-05, |
| "loss": 0.0688, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.8678756476683938, |
| "grad_norm": 0.5487945675849915, |
| "learning_rate": 9.808328879073251e-05, |
| "loss": 0.0597, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.8743523316062176, |
| "grad_norm": 0.6972073912620544, |
| "learning_rate": 9.803768380684242e-05, |
| "loss": 0.0526, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.8808290155440415, |
| "grad_norm": 0.6903045773506165, |
| "learning_rate": 9.799155349053851e-05, |
| "loss": 0.0608, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.8873056994818653, |
| "grad_norm": 0.5447720885276794, |
| "learning_rate": 9.794489834629455e-05, |
| "loss": 0.047, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.8937823834196891, |
| "grad_norm": 0.6312441825866699, |
| "learning_rate": 9.789771888432375e-05, |
| "loss": 0.0508, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.9002590673575129, |
| "grad_norm": 0.6527279615402222, |
| "learning_rate": 9.785001562057309e-05, |
| "loss": 0.06, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.9067357512953368, |
| "grad_norm": 0.5341177582740784, |
| "learning_rate": 9.780178907671789e-05, |
| "loss": 0.055, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.9132124352331606, |
| "grad_norm": 0.7597373723983765, |
| "learning_rate": 9.775303978015585e-05, |
| "loss": 0.0677, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.9196891191709845, |
| "grad_norm": 0.6934833526611328, |
| "learning_rate": 9.77037682640015e-05, |
| "loss": 0.0528, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.9261658031088082, |
| "grad_norm": 0.6315605640411377, |
| "learning_rate": 9.765397506708023e-05, |
| "loss": 0.0607, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.9326424870466321, |
| "grad_norm": 0.5957879424095154, |
| "learning_rate": 9.760366073392246e-05, |
| "loss": 0.0428, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.939119170984456, |
| "grad_norm": 0.4082203209400177, |
| "learning_rate": 9.755282581475769e-05, |
| "loss": 0.0534, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.9455958549222798, |
| "grad_norm": 0.5609003305435181, |
| "learning_rate": 9.750147086550844e-05, |
| "loss": 0.0489, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.9520725388601037, |
| "grad_norm": 0.5083820819854736, |
| "learning_rate": 9.744959644778422e-05, |
| "loss": 0.0452, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.9585492227979274, |
| "grad_norm": 0.48094549775123596, |
| "learning_rate": 9.739720312887535e-05, |
| "loss": 0.0551, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.9650259067357513, |
| "grad_norm": 0.46845394372940063, |
| "learning_rate": 9.734429148174675e-05, |
| "loss": 0.0459, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.9715025906735751, |
| "grad_norm": 0.46226462721824646, |
| "learning_rate": 9.729086208503174e-05, |
| "loss": 0.0565, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.977979274611399, |
| "grad_norm": 0.3774145543575287, |
| "learning_rate": 9.723691552302562e-05, |
| "loss": 0.0445, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.9844559585492227, |
| "grad_norm": 0.6989986300468445, |
| "learning_rate": 9.718245238567939e-05, |
| "loss": 0.0513, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.9909326424870466, |
| "grad_norm": 0.31146273016929626, |
| "learning_rate": 9.712747326859315e-05, |
| "loss": 0.0415, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.9974093264248705, |
| "grad_norm": 0.5306687951087952, |
| "learning_rate": 9.707197877300974e-05, |
| "loss": 0.043, |
| "step": 1540 |
| }, |
| { |
| "epoch": 1.0038860103626943, |
| "grad_norm": 0.4852505922317505, |
| "learning_rate": 9.701596950580806e-05, |
| "loss": 0.0519, |
| "step": 1550 |
| }, |
| { |
| "epoch": 1.0103626943005182, |
| "grad_norm": 0.38807374238967896, |
| "learning_rate": 9.695944607949649e-05, |
| "loss": 0.0486, |
| "step": 1560 |
| }, |
| { |
| "epoch": 1.016839378238342, |
| "grad_norm": 0.4936634600162506, |
| "learning_rate": 9.690240911220618e-05, |
| "loss": 0.0459, |
| "step": 1570 |
| }, |
| { |
| "epoch": 1.0233160621761659, |
| "grad_norm": 0.5796962976455688, |
| "learning_rate": 9.684485922768422e-05, |
| "loss": 0.0515, |
| "step": 1580 |
| }, |
| { |
| "epoch": 1.0297927461139897, |
| "grad_norm": 0.38532519340515137, |
| "learning_rate": 9.6786797055287e-05, |
| "loss": 0.0498, |
| "step": 1590 |
| }, |
| { |
| "epoch": 1.0362694300518134, |
| "grad_norm": 0.45021045207977295, |
| "learning_rate": 9.672822322997305e-05, |
| "loss": 0.0497, |
| "step": 1600 |
| }, |
| { |
| "epoch": 1.0427461139896372, |
| "grad_norm": 0.5176680684089661, |
| "learning_rate": 9.66691383922964e-05, |
| "loss": 0.0556, |
| "step": 1610 |
| }, |
| { |
| "epoch": 1.049222797927461, |
| "grad_norm": 0.4210398495197296, |
| "learning_rate": 9.660954318839933e-05, |
| "loss": 0.0531, |
| "step": 1620 |
| }, |
| { |
| "epoch": 1.055699481865285, |
| "grad_norm": 0.4386137127876282, |
| "learning_rate": 9.654943827000548e-05, |
| "loss": 0.0497, |
| "step": 1630 |
| }, |
| { |
| "epoch": 1.0621761658031088, |
| "grad_norm": 0.37542179226875305, |
| "learning_rate": 9.648882429441257e-05, |
| "loss": 0.0471, |
| "step": 1640 |
| }, |
| { |
| "epoch": 1.0686528497409327, |
| "grad_norm": 0.5773810744285583, |
| "learning_rate": 9.642770192448536e-05, |
| "loss": 0.0387, |
| "step": 1650 |
| }, |
| { |
| "epoch": 1.0751295336787565, |
| "grad_norm": 0.6515969038009644, |
| "learning_rate": 9.636607182864827e-05, |
| "loss": 0.0575, |
| "step": 1660 |
| }, |
| { |
| "epoch": 1.0816062176165804, |
| "grad_norm": 0.28263774514198303, |
| "learning_rate": 9.630393468087818e-05, |
| "loss": 0.0435, |
| "step": 1670 |
| }, |
| { |
| "epoch": 1.0880829015544042, |
| "grad_norm": 0.49530646204948425, |
| "learning_rate": 9.624129116069694e-05, |
| "loss": 0.0452, |
| "step": 1680 |
| }, |
| { |
| "epoch": 1.0945595854922279, |
| "grad_norm": 0.5361151695251465, |
| "learning_rate": 9.617814195316411e-05, |
| "loss": 0.0685, |
| "step": 1690 |
| }, |
| { |
| "epoch": 1.1010362694300517, |
| "grad_norm": 0.9678163528442383, |
| "learning_rate": 9.611448774886924e-05, |
| "loss": 0.048, |
| "step": 1700 |
| }, |
| { |
| "epoch": 1.1075129533678756, |
| "grad_norm": 0.3357471823692322, |
| "learning_rate": 9.605032924392457e-05, |
| "loss": 0.0492, |
| "step": 1710 |
| }, |
| { |
| "epoch": 1.1139896373056994, |
| "grad_norm": 0.7007777094841003, |
| "learning_rate": 9.598566713995718e-05, |
| "loss": 0.0489, |
| "step": 1720 |
| }, |
| { |
| "epoch": 1.1204663212435233, |
| "grad_norm": 0.2847372591495514, |
| "learning_rate": 9.59205021441015e-05, |
| "loss": 0.0519, |
| "step": 1730 |
| }, |
| { |
| "epoch": 1.1269430051813472, |
| "grad_norm": 0.41937583684921265, |
| "learning_rate": 9.58548349689915e-05, |
| "loss": 0.0533, |
| "step": 1740 |
| }, |
| { |
| "epoch": 1.133419689119171, |
| "grad_norm": 0.6489237546920776, |
| "learning_rate": 9.578866633275288e-05, |
| "loss": 0.0455, |
| "step": 1750 |
| }, |
| { |
| "epoch": 1.1398963730569949, |
| "grad_norm": 0.5252217054367065, |
| "learning_rate": 9.572199695899522e-05, |
| "loss": 0.0512, |
| "step": 1760 |
| }, |
| { |
| "epoch": 1.1463730569948187, |
| "grad_norm": 0.44682297110557556, |
| "learning_rate": 9.565482757680415e-05, |
| "loss": 0.0479, |
| "step": 1770 |
| }, |
| { |
| "epoch": 1.1528497409326426, |
| "grad_norm": 0.7223442196846008, |
| "learning_rate": 9.558715892073323e-05, |
| "loss": 0.0427, |
| "step": 1780 |
| }, |
| { |
| "epoch": 1.1593264248704664, |
| "grad_norm": 0.4472532570362091, |
| "learning_rate": 9.551899173079607e-05, |
| "loss": 0.0549, |
| "step": 1790 |
| }, |
| { |
| "epoch": 1.16580310880829, |
| "grad_norm": 0.4524969160556793, |
| "learning_rate": 9.545032675245813e-05, |
| "loss": 0.0543, |
| "step": 1800 |
| }, |
| { |
| "epoch": 1.172279792746114, |
| "grad_norm": 0.5706340670585632, |
| "learning_rate": 9.538116473662861e-05, |
| "loss": 0.0442, |
| "step": 1810 |
| }, |
| { |
| "epoch": 1.1787564766839378, |
| "grad_norm": 0.655547559261322, |
| "learning_rate": 9.531150643965223e-05, |
| "loss": 0.0355, |
| "step": 1820 |
| }, |
| { |
| "epoch": 1.1852331606217616, |
| "grad_norm": 0.3323609530925751, |
| "learning_rate": 9.524135262330098e-05, |
| "loss": 0.0508, |
| "step": 1830 |
| }, |
| { |
| "epoch": 1.1917098445595855, |
| "grad_norm": 0.5258129239082336, |
| "learning_rate": 9.517070405476575e-05, |
| "loss": 0.0491, |
| "step": 1840 |
| }, |
| { |
| "epoch": 1.1981865284974094, |
| "grad_norm": 0.4815446734428406, |
| "learning_rate": 9.509956150664796e-05, |
| "loss": 0.0492, |
| "step": 1850 |
| }, |
| { |
| "epoch": 1.2046632124352332, |
| "grad_norm": 0.3571079969406128, |
| "learning_rate": 9.502792575695112e-05, |
| "loss": 0.0478, |
| "step": 1860 |
| }, |
| { |
| "epoch": 1.211139896373057, |
| "grad_norm": 0.5302177667617798, |
| "learning_rate": 9.49557975890723e-05, |
| "loss": 0.0405, |
| "step": 1870 |
| }, |
| { |
| "epoch": 1.2176165803108807, |
| "grad_norm": 0.3246991038322449, |
| "learning_rate": 9.488317779179361e-05, |
| "loss": 0.0413, |
| "step": 1880 |
| }, |
| { |
| "epoch": 1.2240932642487046, |
| "grad_norm": 0.5374770760536194, |
| "learning_rate": 9.481006715927351e-05, |
| "loss": 0.0502, |
| "step": 1890 |
| }, |
| { |
| "epoch": 1.2305699481865284, |
| "grad_norm": 0.410244882106781, |
| "learning_rate": 9.473646649103818e-05, |
| "loss": 0.0502, |
| "step": 1900 |
| }, |
| { |
| "epoch": 1.2370466321243523, |
| "grad_norm": 0.5318041443824768, |
| "learning_rate": 9.46623765919727e-05, |
| "loss": 0.0429, |
| "step": 1910 |
| }, |
| { |
| "epoch": 1.2435233160621761, |
| "grad_norm": 0.42394232749938965, |
| "learning_rate": 9.458779827231237e-05, |
| "loss": 0.0394, |
| "step": 1920 |
| }, |
| { |
| "epoch": 1.25, |
| "grad_norm": 0.4311422109603882, |
| "learning_rate": 9.451273234763371e-05, |
| "loss": 0.0438, |
| "step": 1930 |
| }, |
| { |
| "epoch": 1.2564766839378239, |
| "grad_norm": 0.5800412893295288, |
| "learning_rate": 9.443717963884569e-05, |
| "loss": 0.0403, |
| "step": 1940 |
| }, |
| { |
| "epoch": 1.2629533678756477, |
| "grad_norm": 0.48299023509025574, |
| "learning_rate": 9.43611409721806e-05, |
| "loss": 0.0549, |
| "step": 1950 |
| }, |
| { |
| "epoch": 1.2694300518134716, |
| "grad_norm": 0.5357002019882202, |
| "learning_rate": 9.428461717918511e-05, |
| "loss": 0.0403, |
| "step": 1960 |
| }, |
| { |
| "epoch": 1.2759067357512954, |
| "grad_norm": 0.43265822529792786, |
| "learning_rate": 9.420760909671118e-05, |
| "loss": 0.0529, |
| "step": 1970 |
| }, |
| { |
| "epoch": 1.2823834196891193, |
| "grad_norm": 0.36043480038642883, |
| "learning_rate": 9.413011756690685e-05, |
| "loss": 0.0476, |
| "step": 1980 |
| }, |
| { |
| "epoch": 1.2888601036269431, |
| "grad_norm": 0.43068262934684753, |
| "learning_rate": 9.405214343720707e-05, |
| "loss": 0.0443, |
| "step": 1990 |
| }, |
| { |
| "epoch": 1.2953367875647668, |
| "grad_norm": 0.5082911849021912, |
| "learning_rate": 9.397368756032445e-05, |
| "loss": 0.0466, |
| "step": 2000 |
| }, |
| { |
| "epoch": 1.3018134715025906, |
| "grad_norm": 0.5428886413574219, |
| "learning_rate": 9.389475079423988e-05, |
| "loss": 0.0441, |
| "step": 2010 |
| }, |
| { |
| "epoch": 1.3082901554404145, |
| "grad_norm": 0.4764600098133087, |
| "learning_rate": 9.381533400219318e-05, |
| "loss": 0.041, |
| "step": 2020 |
| }, |
| { |
| "epoch": 1.3147668393782384, |
| "grad_norm": 0.40374669432640076, |
| "learning_rate": 9.373543805267368e-05, |
| "loss": 0.0446, |
| "step": 2030 |
| }, |
| { |
| "epoch": 1.3212435233160622, |
| "grad_norm": 0.31766676902770996, |
| "learning_rate": 9.365506381941066e-05, |
| "loss": 0.0416, |
| "step": 2040 |
| }, |
| { |
| "epoch": 1.327720207253886, |
| "grad_norm": 0.4188934862613678, |
| "learning_rate": 9.357421218136386e-05, |
| "loss": 0.0381, |
| "step": 2050 |
| }, |
| { |
| "epoch": 1.33419689119171, |
| "grad_norm": 0.3089483380317688, |
| "learning_rate": 9.349288402271388e-05, |
| "loss": 0.044, |
| "step": 2060 |
| }, |
| { |
| "epoch": 1.3406735751295336, |
| "grad_norm": 0.2659463882446289, |
| "learning_rate": 9.341108023285238e-05, |
| "loss": 0.0487, |
| "step": 2070 |
| }, |
| { |
| "epoch": 1.3471502590673574, |
| "grad_norm": 0.589297354221344, |
| "learning_rate": 9.332880170637252e-05, |
| "loss": 0.0472, |
| "step": 2080 |
| }, |
| { |
| "epoch": 1.3536269430051813, |
| "grad_norm": 0.39319437742233276, |
| "learning_rate": 9.32460493430591e-05, |
| "loss": 0.0377, |
| "step": 2090 |
| }, |
| { |
| "epoch": 1.3601036269430051, |
| "grad_norm": 0.3916621506214142, |
| "learning_rate": 9.316282404787871e-05, |
| "loss": 0.0404, |
| "step": 2100 |
| }, |
| { |
| "epoch": 1.366580310880829, |
| "grad_norm": 0.6271021366119385, |
| "learning_rate": 9.30791267309698e-05, |
| "loss": 0.0467, |
| "step": 2110 |
| }, |
| { |
| "epoch": 1.3730569948186528, |
| "grad_norm": 0.44995781779289246, |
| "learning_rate": 9.299495830763286e-05, |
| "loss": 0.0521, |
| "step": 2120 |
| }, |
| { |
| "epoch": 1.3795336787564767, |
| "grad_norm": 0.46163469552993774, |
| "learning_rate": 9.291031969832026e-05, |
| "loss": 0.0477, |
| "step": 2130 |
| }, |
| { |
| "epoch": 1.3860103626943006, |
| "grad_norm": 0.43207502365112305, |
| "learning_rate": 9.282521182862629e-05, |
| "loss": 0.0383, |
| "step": 2140 |
| }, |
| { |
| "epoch": 1.3924870466321244, |
| "grad_norm": 0.2974700331687927, |
| "learning_rate": 9.273963562927695e-05, |
| "loss": 0.038, |
| "step": 2150 |
| }, |
| { |
| "epoch": 1.3989637305699483, |
| "grad_norm": 0.45704299211502075, |
| "learning_rate": 9.265359203611987e-05, |
| "loss": 0.0553, |
| "step": 2160 |
| }, |
| { |
| "epoch": 1.4054404145077721, |
| "grad_norm": 0.4036669135093689, |
| "learning_rate": 9.256708199011401e-05, |
| "loss": 0.0366, |
| "step": 2170 |
| }, |
| { |
| "epoch": 1.411917098445596, |
| "grad_norm": 0.44508373737335205, |
| "learning_rate": 9.248010643731935e-05, |
| "loss": 0.0307, |
| "step": 2180 |
| }, |
| { |
| "epoch": 1.4183937823834196, |
| "grad_norm": 0.31299716234207153, |
| "learning_rate": 9.239266632888659e-05, |
| "loss": 0.0363, |
| "step": 2190 |
| }, |
| { |
| "epoch": 1.4248704663212435, |
| "grad_norm": 0.3911212086677551, |
| "learning_rate": 9.230476262104677e-05, |
| "loss": 0.0377, |
| "step": 2200 |
| }, |
| { |
| "epoch": 1.4313471502590673, |
| "grad_norm": 0.4919986128807068, |
| "learning_rate": 9.221639627510076e-05, |
| "loss": 0.0417, |
| "step": 2210 |
| }, |
| { |
| "epoch": 1.4378238341968912, |
| "grad_norm": 0.5026757717132568, |
| "learning_rate": 9.212756825740873e-05, |
| "loss": 0.0426, |
| "step": 2220 |
| }, |
| { |
| "epoch": 1.444300518134715, |
| "grad_norm": 0.4060062766075134, |
| "learning_rate": 9.20382795393797e-05, |
| "loss": 0.0518, |
| "step": 2230 |
| }, |
| { |
| "epoch": 1.450777202072539, |
| "grad_norm": 0.7231489419937134, |
| "learning_rate": 9.194853109746074e-05, |
| "loss": 0.0405, |
| "step": 2240 |
| }, |
| { |
| "epoch": 1.4572538860103628, |
| "grad_norm": 0.42704254388809204, |
| "learning_rate": 9.185832391312644e-05, |
| "loss": 0.042, |
| "step": 2250 |
| }, |
| { |
| "epoch": 1.4637305699481864, |
| "grad_norm": 0.5011823773384094, |
| "learning_rate": 9.176765897286813e-05, |
| "loss": 0.055, |
| "step": 2260 |
| }, |
| { |
| "epoch": 1.4702072538860103, |
| "grad_norm": 0.49078866839408875, |
| "learning_rate": 9.167653726818305e-05, |
| "loss": 0.0464, |
| "step": 2270 |
| }, |
| { |
| "epoch": 1.4766839378238341, |
| "grad_norm": 0.37776052951812744, |
| "learning_rate": 9.158495979556358e-05, |
| "loss": 0.0435, |
| "step": 2280 |
| }, |
| { |
| "epoch": 1.483160621761658, |
| "grad_norm": 0.3427102267742157, |
| "learning_rate": 9.14929275564863e-05, |
| "loss": 0.0433, |
| "step": 2290 |
| }, |
| { |
| "epoch": 1.4896373056994818, |
| "grad_norm": 0.3482416570186615, |
| "learning_rate": 9.140044155740101e-05, |
| "loss": 0.0398, |
| "step": 2300 |
| }, |
| { |
| "epoch": 1.4961139896373057, |
| "grad_norm": 0.36297425627708435, |
| "learning_rate": 9.130750280971978e-05, |
| "loss": 0.0412, |
| "step": 2310 |
| }, |
| { |
| "epoch": 1.5025906735751295, |
| "grad_norm": 0.6208764910697937, |
| "learning_rate": 9.121411232980588e-05, |
| "loss": 0.0494, |
| "step": 2320 |
| }, |
| { |
| "epoch": 1.5090673575129534, |
| "grad_norm": 0.3590516149997711, |
| "learning_rate": 9.112027113896262e-05, |
| "loss": 0.0495, |
| "step": 2330 |
| }, |
| { |
| "epoch": 1.5155440414507773, |
| "grad_norm": 0.4894999861717224, |
| "learning_rate": 9.102598026342222e-05, |
| "loss": 0.0384, |
| "step": 2340 |
| }, |
| { |
| "epoch": 1.5220207253886011, |
| "grad_norm": 0.34248238801956177, |
| "learning_rate": 9.093124073433463e-05, |
| "loss": 0.0417, |
| "step": 2350 |
| }, |
| { |
| "epoch": 1.528497409326425, |
| "grad_norm": 0.333857923746109, |
| "learning_rate": 9.083605358775612e-05, |
| "loss": 0.0398, |
| "step": 2360 |
| }, |
| { |
| "epoch": 1.5349740932642488, |
| "grad_norm": 0.3952425420284271, |
| "learning_rate": 9.074041986463808e-05, |
| "loss": 0.0416, |
| "step": 2370 |
| }, |
| { |
| "epoch": 1.5414507772020727, |
| "grad_norm": 0.5747725963592529, |
| "learning_rate": 9.064434061081562e-05, |
| "loss": 0.0383, |
| "step": 2380 |
| }, |
| { |
| "epoch": 1.5479274611398963, |
| "grad_norm": 0.49283695220947266, |
| "learning_rate": 9.0547816876996e-05, |
| "loss": 0.045, |
| "step": 2390 |
| }, |
| { |
| "epoch": 1.5544041450777202, |
| "grad_norm": 0.3009427487850189, |
| "learning_rate": 9.045084971874738e-05, |
| "loss": 0.0387, |
| "step": 2400 |
| }, |
| { |
| "epoch": 1.560880829015544, |
| "grad_norm": 0.37806493043899536, |
| "learning_rate": 9.035344019648702e-05, |
| "loss": 0.0399, |
| "step": 2410 |
| }, |
| { |
| "epoch": 1.567357512953368, |
| "grad_norm": 0.425051748752594, |
| "learning_rate": 9.025558937546988e-05, |
| "loss": 0.0418, |
| "step": 2420 |
| }, |
| { |
| "epoch": 1.5738341968911918, |
| "grad_norm": 0.24157755076885223, |
| "learning_rate": 9.015729832577681e-05, |
| "loss": 0.0344, |
| "step": 2430 |
| }, |
| { |
| "epoch": 1.5803108808290154, |
| "grad_norm": 0.2987273335456848, |
| "learning_rate": 9.005856812230304e-05, |
| "loss": 0.0323, |
| "step": 2440 |
| }, |
| { |
| "epoch": 1.5867875647668392, |
| "grad_norm": 0.27894994616508484, |
| "learning_rate": 8.995939984474624e-05, |
| "loss": 0.0467, |
| "step": 2450 |
| }, |
| { |
| "epoch": 1.593264248704663, |
| "grad_norm": 0.30715203285217285, |
| "learning_rate": 8.98597945775948e-05, |
| "loss": 0.0447, |
| "step": 2460 |
| }, |
| { |
| "epoch": 1.599740932642487, |
| "grad_norm": 0.4254259467124939, |
| "learning_rate": 8.975975341011596e-05, |
| "loss": 0.0401, |
| "step": 2470 |
| }, |
| { |
| "epoch": 1.6062176165803108, |
| "grad_norm": 0.4341532289981842, |
| "learning_rate": 8.965927743634391e-05, |
| "loss": 0.0435, |
| "step": 2480 |
| }, |
| { |
| "epoch": 1.6126943005181347, |
| "grad_norm": 0.4242204427719116, |
| "learning_rate": 8.955836775506776e-05, |
| "loss": 0.0437, |
| "step": 2490 |
| }, |
| { |
| "epoch": 1.6191709844559585, |
| "grad_norm": 0.4756164252758026, |
| "learning_rate": 8.945702546981969e-05, |
| "loss": 0.0428, |
| "step": 2500 |
| }, |
| { |
| "epoch": 1.6256476683937824, |
| "grad_norm": 0.6672074794769287, |
| "learning_rate": 8.935525168886262e-05, |
| "loss": 0.0448, |
| "step": 2510 |
| }, |
| { |
| "epoch": 1.6321243523316062, |
| "grad_norm": 0.38436517119407654, |
| "learning_rate": 8.92530475251784e-05, |
| "loss": 0.0357, |
| "step": 2520 |
| }, |
| { |
| "epoch": 1.63860103626943, |
| "grad_norm": 0.5012999773025513, |
| "learning_rate": 8.91504140964553e-05, |
| "loss": 0.046, |
| "step": 2530 |
| }, |
| { |
| "epoch": 1.645077720207254, |
| "grad_norm": 0.4836990237236023, |
| "learning_rate": 8.90473525250761e-05, |
| "loss": 0.0497, |
| "step": 2540 |
| }, |
| { |
| "epoch": 1.6515544041450778, |
| "grad_norm": 0.318345844745636, |
| "learning_rate": 8.894386393810563e-05, |
| "loss": 0.0362, |
| "step": 2550 |
| }, |
| { |
| "epoch": 1.6580310880829017, |
| "grad_norm": 0.43974965810775757, |
| "learning_rate": 8.883994946727849e-05, |
| "loss": 0.0463, |
| "step": 2560 |
| }, |
| { |
| "epoch": 1.6645077720207255, |
| "grad_norm": 0.5346915125846863, |
| "learning_rate": 8.873561024898668e-05, |
| "loss": 0.0447, |
| "step": 2570 |
| }, |
| { |
| "epoch": 1.6709844559585494, |
| "grad_norm": 0.45229285955429077, |
| "learning_rate": 8.863084742426719e-05, |
| "loss": 0.0431, |
| "step": 2580 |
| }, |
| { |
| "epoch": 1.677461139896373, |
| "grad_norm": 0.41755497455596924, |
| "learning_rate": 8.852566213878947e-05, |
| "loss": 0.0321, |
| "step": 2590 |
| }, |
| { |
| "epoch": 1.6839378238341969, |
| "grad_norm": 0.25371554493904114, |
| "learning_rate": 8.842005554284296e-05, |
| "loss": 0.0462, |
| "step": 2600 |
| }, |
| { |
| "epoch": 1.6904145077720207, |
| "grad_norm": 0.40556564927101135, |
| "learning_rate": 8.831402879132446e-05, |
| "loss": 0.0377, |
| "step": 2610 |
| }, |
| { |
| "epoch": 1.6968911917098446, |
| "grad_norm": 0.3499738574028015, |
| "learning_rate": 8.820758304372557e-05, |
| "loss": 0.0371, |
| "step": 2620 |
| }, |
| { |
| "epoch": 1.7033678756476682, |
| "grad_norm": 0.4621606171131134, |
| "learning_rate": 8.810071946411989e-05, |
| "loss": 0.0339, |
| "step": 2630 |
| }, |
| { |
| "epoch": 1.709844559585492, |
| "grad_norm": 0.2640065848827362, |
| "learning_rate": 8.799343922115044e-05, |
| "loss": 0.04, |
| "step": 2640 |
| }, |
| { |
| "epoch": 1.716321243523316, |
| "grad_norm": 0.33107438683509827, |
| "learning_rate": 8.788574348801675e-05, |
| "loss": 0.0433, |
| "step": 2650 |
| }, |
| { |
| "epoch": 1.7227979274611398, |
| "grad_norm": 0.3660358786582947, |
| "learning_rate": 8.77776334424621e-05, |
| "loss": 0.0331, |
| "step": 2660 |
| }, |
| { |
| "epoch": 1.7292746113989637, |
| "grad_norm": 0.40852832794189453, |
| "learning_rate": 8.766911026676064e-05, |
| "loss": 0.0404, |
| "step": 2670 |
| }, |
| { |
| "epoch": 1.7357512953367875, |
| "grad_norm": 0.33427226543426514, |
| "learning_rate": 8.756017514770443e-05, |
| "loss": 0.037, |
| "step": 2680 |
| }, |
| { |
| "epoch": 1.7422279792746114, |
| "grad_norm": 0.2343767136335373, |
| "learning_rate": 8.745082927659047e-05, |
| "loss": 0.0334, |
| "step": 2690 |
| }, |
| { |
| "epoch": 1.7487046632124352, |
| "grad_norm": 0.3142069876194, |
| "learning_rate": 8.73410738492077e-05, |
| "loss": 0.0349, |
| "step": 2700 |
| }, |
| { |
| "epoch": 1.755181347150259, |
| "grad_norm": 0.43764960765838623, |
| "learning_rate": 8.723091006582389e-05, |
| "loss": 0.0318, |
| "step": 2710 |
| }, |
| { |
| "epoch": 1.761658031088083, |
| "grad_norm": 0.509016752243042, |
| "learning_rate": 8.71203391311725e-05, |
| "loss": 0.0462, |
| "step": 2720 |
| }, |
| { |
| "epoch": 1.7681347150259068, |
| "grad_norm": 0.29676973819732666, |
| "learning_rate": 8.700936225443959e-05, |
| "loss": 0.0277, |
| "step": 2730 |
| }, |
| { |
| "epoch": 1.7746113989637307, |
| "grad_norm": 0.3989731967449188, |
| "learning_rate": 8.689798064925049e-05, |
| "loss": 0.0285, |
| "step": 2740 |
| }, |
| { |
| "epoch": 1.7810880829015545, |
| "grad_norm": 0.3644524812698364, |
| "learning_rate": 8.678619553365659e-05, |
| "loss": 0.0372, |
| "step": 2750 |
| }, |
| { |
| "epoch": 1.7875647668393784, |
| "grad_norm": 0.4097837209701538, |
| "learning_rate": 8.6674008130122e-05, |
| "loss": 0.038, |
| "step": 2760 |
| }, |
| { |
| "epoch": 1.7940414507772022, |
| "grad_norm": 0.2878129482269287, |
| "learning_rate": 8.656141966551019e-05, |
| "loss": 0.0331, |
| "step": 2770 |
| }, |
| { |
| "epoch": 1.8005181347150259, |
| "grad_norm": 0.2862109839916229, |
| "learning_rate": 8.644843137107059e-05, |
| "loss": 0.0335, |
| "step": 2780 |
| }, |
| { |
| "epoch": 1.8069948186528497, |
| "grad_norm": 0.43468615412712097, |
| "learning_rate": 8.633504448242505e-05, |
| "loss": 0.0348, |
| "step": 2790 |
| }, |
| { |
| "epoch": 1.8134715025906736, |
| "grad_norm": 0.5068714022636414, |
| "learning_rate": 8.622126023955446e-05, |
| "loss": 0.0402, |
| "step": 2800 |
| }, |
| { |
| "epoch": 1.8199481865284974, |
| "grad_norm": 0.5587580800056458, |
| "learning_rate": 8.610707988678503e-05, |
| "loss": 0.0344, |
| "step": 2810 |
| }, |
| { |
| "epoch": 1.8264248704663213, |
| "grad_norm": 0.6059616804122925, |
| "learning_rate": 8.599250467277483e-05, |
| "loss": 0.0369, |
| "step": 2820 |
| }, |
| { |
| "epoch": 1.832901554404145, |
| "grad_norm": 0.35330525040626526, |
| "learning_rate": 8.587753585050004e-05, |
| "loss": 0.0379, |
| "step": 2830 |
| }, |
| { |
| "epoch": 1.8393782383419688, |
| "grad_norm": 0.41709282994270325, |
| "learning_rate": 8.576217467724128e-05, |
| "loss": 0.0395, |
| "step": 2840 |
| }, |
| { |
| "epoch": 1.8458549222797926, |
| "grad_norm": 0.43537795543670654, |
| "learning_rate": 8.564642241456986e-05, |
| "loss": 0.035, |
| "step": 2850 |
| }, |
| { |
| "epoch": 1.8523316062176165, |
| "grad_norm": 0.4171069264411926, |
| "learning_rate": 8.553028032833397e-05, |
| "loss": 0.0376, |
| "step": 2860 |
| }, |
| { |
| "epoch": 1.8588082901554404, |
| "grad_norm": 0.4365420341491699, |
| "learning_rate": 8.541374968864487e-05, |
| "loss": 0.0403, |
| "step": 2870 |
| }, |
| { |
| "epoch": 1.8652849740932642, |
| "grad_norm": 0.4330903887748718, |
| "learning_rate": 8.529683176986295e-05, |
| "loss": 0.0356, |
| "step": 2880 |
| }, |
| { |
| "epoch": 1.871761658031088, |
| "grad_norm": 0.28101474046707153, |
| "learning_rate": 8.517952785058385e-05, |
| "loss": 0.0401, |
| "step": 2890 |
| }, |
| { |
| "epoch": 1.878238341968912, |
| "grad_norm": 0.45936867594718933, |
| "learning_rate": 8.506183921362443e-05, |
| "loss": 0.0354, |
| "step": 2900 |
| }, |
| { |
| "epoch": 1.8847150259067358, |
| "grad_norm": 0.35999923944473267, |
| "learning_rate": 8.494376714600878e-05, |
| "loss": 0.0384, |
| "step": 2910 |
| }, |
| { |
| "epoch": 1.8911917098445596, |
| "grad_norm": 0.36796870827674866, |
| "learning_rate": 8.482531293895412e-05, |
| "loss": 0.0399, |
| "step": 2920 |
| }, |
| { |
| "epoch": 1.8976683937823835, |
| "grad_norm": 0.443658709526062, |
| "learning_rate": 8.470647788785665e-05, |
| "loss": 0.0345, |
| "step": 2930 |
| }, |
| { |
| "epoch": 1.9041450777202074, |
| "grad_norm": 0.4713057279586792, |
| "learning_rate": 8.458726329227747e-05, |
| "loss": 0.0409, |
| "step": 2940 |
| }, |
| { |
| "epoch": 1.9106217616580312, |
| "grad_norm": 0.540421724319458, |
| "learning_rate": 8.44676704559283e-05, |
| "loss": 0.0502, |
| "step": 2950 |
| }, |
| { |
| "epoch": 1.917098445595855, |
| "grad_norm": 0.4227132499217987, |
| "learning_rate": 8.434770068665723e-05, |
| "loss": 0.0405, |
| "step": 2960 |
| }, |
| { |
| "epoch": 1.9235751295336787, |
| "grad_norm": 0.40846768021583557, |
| "learning_rate": 8.422735529643444e-05, |
| "loss": 0.0412, |
| "step": 2970 |
| }, |
| { |
| "epoch": 1.9300518134715026, |
| "grad_norm": 0.3132789134979248, |
| "learning_rate": 8.410663560133784e-05, |
| "loss": 0.0286, |
| "step": 2980 |
| }, |
| { |
| "epoch": 1.9365284974093264, |
| "grad_norm": 0.3604018986225128, |
| "learning_rate": 8.398554292153866e-05, |
| "loss": 0.0414, |
| "step": 2990 |
| }, |
| { |
| "epoch": 1.9430051813471503, |
| "grad_norm": 0.6764439940452576, |
| "learning_rate": 8.386407858128706e-05, |
| "loss": 0.0541, |
| "step": 3000 |
| }, |
| { |
| "epoch": 1.9494818652849741, |
| "grad_norm": 0.3984512388706207, |
| "learning_rate": 8.37422439088976e-05, |
| "loss": 0.039, |
| "step": 3010 |
| }, |
| { |
| "epoch": 1.9559585492227978, |
| "grad_norm": 0.31599175930023193, |
| "learning_rate": 8.362004023673474e-05, |
| "loss": 0.0412, |
| "step": 3020 |
| }, |
| { |
| "epoch": 1.9624352331606216, |
| "grad_norm": 0.34263965487480164, |
| "learning_rate": 8.349746890119826e-05, |
| "loss": 0.0343, |
| "step": 3030 |
| }, |
| { |
| "epoch": 1.9689119170984455, |
| "grad_norm": 0.2634800672531128, |
| "learning_rate": 8.337453124270863e-05, |
| "loss": 0.0374, |
| "step": 3040 |
| }, |
| { |
| "epoch": 1.9753886010362693, |
| "grad_norm": 0.2721804976463318, |
| "learning_rate": 8.32512286056924e-05, |
| "loss": 0.0463, |
| "step": 3050 |
| }, |
| { |
| "epoch": 1.9818652849740932, |
| "grad_norm": 0.37321698665618896, |
| "learning_rate": 8.31275623385675e-05, |
| "loss": 0.0363, |
| "step": 3060 |
| }, |
| { |
| "epoch": 1.988341968911917, |
| "grad_norm": 0.40666913986206055, |
| "learning_rate": 8.300353379372834e-05, |
| "loss": 0.0326, |
| "step": 3070 |
| }, |
| { |
| "epoch": 1.994818652849741, |
| "grad_norm": 0.32317647337913513, |
| "learning_rate": 8.287914432753123e-05, |
| "loss": 0.0496, |
| "step": 3080 |
| }, |
| { |
| "epoch": 2.0012953367875648, |
| "grad_norm": 0.23770615458488464, |
| "learning_rate": 8.275439530027948e-05, |
| "loss": 0.0421, |
| "step": 3090 |
| }, |
| { |
| "epoch": 2.0077720207253886, |
| "grad_norm": 0.31836286187171936, |
| "learning_rate": 8.262928807620843e-05, |
| "loss": 0.0399, |
| "step": 3100 |
| }, |
| { |
| "epoch": 2.0142487046632125, |
| "grad_norm": 0.3128102719783783, |
| "learning_rate": 8.250382402347065e-05, |
| "loss": 0.0338, |
| "step": 3110 |
| }, |
| { |
| "epoch": 2.0207253886010363, |
| "grad_norm": 0.45987972617149353, |
| "learning_rate": 8.237800451412095e-05, |
| "loss": 0.035, |
| "step": 3120 |
| }, |
| { |
| "epoch": 2.02720207253886, |
| "grad_norm": 0.35997146368026733, |
| "learning_rate": 8.225183092410128e-05, |
| "loss": 0.0318, |
| "step": 3130 |
| }, |
| { |
| "epoch": 2.033678756476684, |
| "grad_norm": 0.5645427703857422, |
| "learning_rate": 8.212530463322583e-05, |
| "loss": 0.0329, |
| "step": 3140 |
| }, |
| { |
| "epoch": 2.040155440414508, |
| "grad_norm": 0.5178828239440918, |
| "learning_rate": 8.199842702516583e-05, |
| "loss": 0.0354, |
| "step": 3150 |
| }, |
| { |
| "epoch": 2.0466321243523318, |
| "grad_norm": 0.48487406969070435, |
| "learning_rate": 8.18711994874345e-05, |
| "loss": 0.0288, |
| "step": 3160 |
| }, |
| { |
| "epoch": 2.0531088082901556, |
| "grad_norm": 0.4110161066055298, |
| "learning_rate": 8.174362341137177e-05, |
| "loss": 0.0348, |
| "step": 3170 |
| }, |
| { |
| "epoch": 2.0595854922279795, |
| "grad_norm": 0.26325592398643494, |
| "learning_rate": 8.161570019212921e-05, |
| "loss": 0.0344, |
| "step": 3180 |
| }, |
| { |
| "epoch": 2.066062176165803, |
| "grad_norm": 0.2679133713245392, |
| "learning_rate": 8.148743122865463e-05, |
| "loss": 0.0263, |
| "step": 3190 |
| }, |
| { |
| "epoch": 2.0725388601036268, |
| "grad_norm": 0.33095425367355347, |
| "learning_rate": 8.135881792367686e-05, |
| "loss": 0.0349, |
| "step": 3200 |
| }, |
| { |
| "epoch": 2.0790155440414506, |
| "grad_norm": 0.3910488784313202, |
| "learning_rate": 8.12298616836904e-05, |
| "loss": 0.0313, |
| "step": 3210 |
| }, |
| { |
| "epoch": 2.0854922279792745, |
| "grad_norm": 0.378434956073761, |
| "learning_rate": 8.110056391894005e-05, |
| "loss": 0.026, |
| "step": 3220 |
| }, |
| { |
| "epoch": 2.0919689119170983, |
| "grad_norm": 0.3993297815322876, |
| "learning_rate": 8.097092604340542e-05, |
| "loss": 0.0304, |
| "step": 3230 |
| }, |
| { |
| "epoch": 2.098445595854922, |
| "grad_norm": 0.5249981880187988, |
| "learning_rate": 8.084094947478556e-05, |
| "loss": 0.0283, |
| "step": 3240 |
| }, |
| { |
| "epoch": 2.104922279792746, |
| "grad_norm": 0.2571522295475006, |
| "learning_rate": 8.07106356344834e-05, |
| "loss": 0.034, |
| "step": 3250 |
| }, |
| { |
| "epoch": 2.11139896373057, |
| "grad_norm": 0.5822983384132385, |
| "learning_rate": 8.057998594759022e-05, |
| "loss": 0.0274, |
| "step": 3260 |
| }, |
| { |
| "epoch": 2.1178756476683938, |
| "grad_norm": 0.40026816725730896, |
| "learning_rate": 8.044900184287007e-05, |
| "loss": 0.0348, |
| "step": 3270 |
| }, |
| { |
| "epoch": 2.1243523316062176, |
| "grad_norm": 0.29010850191116333, |
| "learning_rate": 8.031768475274413e-05, |
| "loss": 0.0305, |
| "step": 3280 |
| }, |
| { |
| "epoch": 2.1308290155440415, |
| "grad_norm": 0.4505520164966583, |
| "learning_rate": 8.018603611327504e-05, |
| "loss": 0.0353, |
| "step": 3290 |
| }, |
| { |
| "epoch": 2.1373056994818653, |
| "grad_norm": 0.49468860030174255, |
| "learning_rate": 8.005405736415126e-05, |
| "loss": 0.0385, |
| "step": 3300 |
| }, |
| { |
| "epoch": 2.143782383419689, |
| "grad_norm": 0.27332326769828796, |
| "learning_rate": 7.992174994867123e-05, |
| "loss": 0.0319, |
| "step": 3310 |
| }, |
| { |
| "epoch": 2.150259067357513, |
| "grad_norm": 0.30603963136672974, |
| "learning_rate": 7.978911531372765e-05, |
| "loss": 0.0351, |
| "step": 3320 |
| }, |
| { |
| "epoch": 2.156735751295337, |
| "grad_norm": 0.4821016490459442, |
| "learning_rate": 7.965615490979163e-05, |
| "loss": 0.0343, |
| "step": 3330 |
| }, |
| { |
| "epoch": 2.1632124352331608, |
| "grad_norm": 0.2607490122318268, |
| "learning_rate": 7.952287019089685e-05, |
| "loss": 0.025, |
| "step": 3340 |
| }, |
| { |
| "epoch": 2.1696891191709846, |
| "grad_norm": 0.44392070174217224, |
| "learning_rate": 7.938926261462366e-05, |
| "loss": 0.0318, |
| "step": 3350 |
| }, |
| { |
| "epoch": 2.1761658031088085, |
| "grad_norm": 0.5164742469787598, |
| "learning_rate": 7.925533364208309e-05, |
| "loss": 0.0386, |
| "step": 3360 |
| }, |
| { |
| "epoch": 2.1826424870466323, |
| "grad_norm": 0.41299721598625183, |
| "learning_rate": 7.912108473790092e-05, |
| "loss": 0.0374, |
| "step": 3370 |
| }, |
| { |
| "epoch": 2.1891191709844557, |
| "grad_norm": 0.5575032234191895, |
| "learning_rate": 7.898651737020166e-05, |
| "loss": 0.0312, |
| "step": 3380 |
| }, |
| { |
| "epoch": 2.1955958549222796, |
| "grad_norm": 0.27284616231918335, |
| "learning_rate": 7.88516330105925e-05, |
| "loss": 0.0273, |
| "step": 3390 |
| }, |
| { |
| "epoch": 2.2020725388601035, |
| "grad_norm": 0.39631810784339905, |
| "learning_rate": 7.871643313414718e-05, |
| "loss": 0.0313, |
| "step": 3400 |
| }, |
| { |
| "epoch": 2.2085492227979273, |
| "grad_norm": 0.2716298997402191, |
| "learning_rate": 7.858091921938988e-05, |
| "loss": 0.0332, |
| "step": 3410 |
| }, |
| { |
| "epoch": 2.215025906735751, |
| "grad_norm": 0.35479772090911865, |
| "learning_rate": 7.844509274827907e-05, |
| "loss": 0.0343, |
| "step": 3420 |
| }, |
| { |
| "epoch": 2.221502590673575, |
| "grad_norm": 0.3278186321258545, |
| "learning_rate": 7.830895520619128e-05, |
| "loss": 0.0295, |
| "step": 3430 |
| }, |
| { |
| "epoch": 2.227979274611399, |
| "grad_norm": 0.36920130252838135, |
| "learning_rate": 7.817250808190483e-05, |
| "loss": 0.0411, |
| "step": 3440 |
| }, |
| { |
| "epoch": 2.2344559585492227, |
| "grad_norm": 0.49877047538757324, |
| "learning_rate": 7.803575286758364e-05, |
| "loss": 0.0338, |
| "step": 3450 |
| }, |
| { |
| "epoch": 2.2409326424870466, |
| "grad_norm": 0.3202963173389435, |
| "learning_rate": 7.789869105876083e-05, |
| "loss": 0.028, |
| "step": 3460 |
| }, |
| { |
| "epoch": 2.2474093264248705, |
| "grad_norm": 0.33412453532218933, |
| "learning_rate": 7.776132415432234e-05, |
| "loss": 0.0386, |
| "step": 3470 |
| }, |
| { |
| "epoch": 2.2538860103626943, |
| "grad_norm": 0.46674206852912903, |
| "learning_rate": 7.762365365649067e-05, |
| "loss": 0.0272, |
| "step": 3480 |
| }, |
| { |
| "epoch": 2.260362694300518, |
| "grad_norm": 0.2830505073070526, |
| "learning_rate": 7.748568107080832e-05, |
| "loss": 0.0319, |
| "step": 3490 |
| }, |
| { |
| "epoch": 2.266839378238342, |
| "grad_norm": 0.4614030122756958, |
| "learning_rate": 7.734740790612136e-05, |
| "loss": 0.0376, |
| "step": 3500 |
| }, |
| { |
| "epoch": 2.273316062176166, |
| "grad_norm": 0.4553893804550171, |
| "learning_rate": 7.720883567456298e-05, |
| "loss": 0.0362, |
| "step": 3510 |
| }, |
| { |
| "epoch": 2.2797927461139897, |
| "grad_norm": 0.36776071786880493, |
| "learning_rate": 7.70699658915369e-05, |
| "loss": 0.0392, |
| "step": 3520 |
| }, |
| { |
| "epoch": 2.2862694300518136, |
| "grad_norm": 0.6267045736312866, |
| "learning_rate": 7.693080007570084e-05, |
| "loss": 0.0256, |
| "step": 3530 |
| }, |
| { |
| "epoch": 2.2927461139896375, |
| "grad_norm": 0.36080533266067505, |
| "learning_rate": 7.679133974894983e-05, |
| "loss": 0.0374, |
| "step": 3540 |
| }, |
| { |
| "epoch": 2.2992227979274613, |
| "grad_norm": 0.32267022132873535, |
| "learning_rate": 7.66515864363997e-05, |
| "loss": 0.04, |
| "step": 3550 |
| }, |
| { |
| "epoch": 2.305699481865285, |
| "grad_norm": 0.344371497631073, |
| "learning_rate": 7.651154166637025e-05, |
| "loss": 0.0338, |
| "step": 3560 |
| }, |
| { |
| "epoch": 2.312176165803109, |
| "grad_norm": 0.43632611632347107, |
| "learning_rate": 7.637120697036866e-05, |
| "loss": 0.0351, |
| "step": 3570 |
| }, |
| { |
| "epoch": 2.318652849740933, |
| "grad_norm": 0.35309430956840515, |
| "learning_rate": 7.623058388307269e-05, |
| "loss": 0.036, |
| "step": 3580 |
| }, |
| { |
| "epoch": 2.3251295336787563, |
| "grad_norm": 0.301886647939682, |
| "learning_rate": 7.608967394231387e-05, |
| "loss": 0.0319, |
| "step": 3590 |
| }, |
| { |
| "epoch": 2.33160621761658, |
| "grad_norm": 0.31548261642456055, |
| "learning_rate": 7.594847868906076e-05, |
| "loss": 0.0329, |
| "step": 3600 |
| }, |
| { |
| "epoch": 2.338082901554404, |
| "grad_norm": 0.3690776526927948, |
| "learning_rate": 7.580699966740201e-05, |
| "loss": 0.0331, |
| "step": 3610 |
| }, |
| { |
| "epoch": 2.344559585492228, |
| "grad_norm": 0.3857821226119995, |
| "learning_rate": 7.566523842452958e-05, |
| "loss": 0.0329, |
| "step": 3620 |
| }, |
| { |
| "epoch": 2.3510362694300517, |
| "grad_norm": 0.3122609555721283, |
| "learning_rate": 7.552319651072164e-05, |
| "loss": 0.0351, |
| "step": 3630 |
| }, |
| { |
| "epoch": 2.3575129533678756, |
| "grad_norm": 0.23658838868141174, |
| "learning_rate": 7.538087547932585e-05, |
| "loss": 0.0246, |
| "step": 3640 |
| }, |
| { |
| "epoch": 2.3639896373056994, |
| "grad_norm": 0.40285107493400574, |
| "learning_rate": 7.52382768867422e-05, |
| "loss": 0.0349, |
| "step": 3650 |
| }, |
| { |
| "epoch": 2.3704663212435233, |
| "grad_norm": 0.17549413442611694, |
| "learning_rate": 7.509540229240601e-05, |
| "loss": 0.0226, |
| "step": 3660 |
| }, |
| { |
| "epoch": 2.376943005181347, |
| "grad_norm": 0.3721100091934204, |
| "learning_rate": 7.495225325877103e-05, |
| "loss": 0.0302, |
| "step": 3670 |
| }, |
| { |
| "epoch": 2.383419689119171, |
| "grad_norm": 0.24715149402618408, |
| "learning_rate": 7.480883135129211e-05, |
| "loss": 0.0313, |
| "step": 3680 |
| }, |
| { |
| "epoch": 2.389896373056995, |
| "grad_norm": 0.3196186423301697, |
| "learning_rate": 7.466513813840825e-05, |
| "loss": 0.0284, |
| "step": 3690 |
| }, |
| { |
| "epoch": 2.3963730569948187, |
| "grad_norm": 0.2939792275428772, |
| "learning_rate": 7.452117519152542e-05, |
| "loss": 0.0302, |
| "step": 3700 |
| }, |
| { |
| "epoch": 2.4028497409326426, |
| "grad_norm": 0.31848374009132385, |
| "learning_rate": 7.437694408499933e-05, |
| "loss": 0.0347, |
| "step": 3710 |
| }, |
| { |
| "epoch": 2.4093264248704664, |
| "grad_norm": 0.32931575179100037, |
| "learning_rate": 7.423244639611826e-05, |
| "loss": 0.0413, |
| "step": 3720 |
| }, |
| { |
| "epoch": 2.4158031088082903, |
| "grad_norm": 0.45900219678878784, |
| "learning_rate": 7.408768370508576e-05, |
| "loss": 0.0299, |
| "step": 3730 |
| }, |
| { |
| "epoch": 2.422279792746114, |
| "grad_norm": 0.35719117522239685, |
| "learning_rate": 7.394265759500348e-05, |
| "loss": 0.0345, |
| "step": 3740 |
| }, |
| { |
| "epoch": 2.428756476683938, |
| "grad_norm": 0.25340452790260315, |
| "learning_rate": 7.379736965185368e-05, |
| "loss": 0.0301, |
| "step": 3750 |
| }, |
| { |
| "epoch": 2.4352331606217614, |
| "grad_norm": 0.2732887268066406, |
| "learning_rate": 7.365182146448205e-05, |
| "loss": 0.0258, |
| "step": 3760 |
| }, |
| { |
| "epoch": 2.4417098445595853, |
| "grad_norm": 0.41965925693511963, |
| "learning_rate": 7.350601462458024e-05, |
| "loss": 0.0258, |
| "step": 3770 |
| }, |
| { |
| "epoch": 2.448186528497409, |
| "grad_norm": 0.22055207192897797, |
| "learning_rate": 7.335995072666848e-05, |
| "loss": 0.0279, |
| "step": 3780 |
| }, |
| { |
| "epoch": 2.454663212435233, |
| "grad_norm": 0.23329317569732666, |
| "learning_rate": 7.32136313680782e-05, |
| "loss": 0.0295, |
| "step": 3790 |
| }, |
| { |
| "epoch": 2.461139896373057, |
| "grad_norm": 0.4226686358451843, |
| "learning_rate": 7.30670581489344e-05, |
| "loss": 0.0421, |
| "step": 3800 |
| }, |
| { |
| "epoch": 2.4676165803108807, |
| "grad_norm": 0.2794199585914612, |
| "learning_rate": 7.292023267213835e-05, |
| "loss": 0.0327, |
| "step": 3810 |
| }, |
| { |
| "epoch": 2.4740932642487046, |
| "grad_norm": 0.45654261112213135, |
| "learning_rate": 7.277315654334997e-05, |
| "loss": 0.0312, |
| "step": 3820 |
| }, |
| { |
| "epoch": 2.4805699481865284, |
| "grad_norm": 0.5298384428024292, |
| "learning_rate": 7.262583137097018e-05, |
| "loss": 0.0298, |
| "step": 3830 |
| }, |
| { |
| "epoch": 2.4870466321243523, |
| "grad_norm": 0.4858406186103821, |
| "learning_rate": 7.247825876612353e-05, |
| "loss": 0.0376, |
| "step": 3840 |
| }, |
| { |
| "epoch": 2.493523316062176, |
| "grad_norm": 0.40908846259117126, |
| "learning_rate": 7.233044034264034e-05, |
| "loss": 0.0289, |
| "step": 3850 |
| }, |
| { |
| "epoch": 2.5, |
| "grad_norm": 0.6489147543907166, |
| "learning_rate": 7.218237771703921e-05, |
| "loss": 0.0374, |
| "step": 3860 |
| }, |
| { |
| "epoch": 2.506476683937824, |
| "grad_norm": 0.5571417808532715, |
| "learning_rate": 7.203407250850928e-05, |
| "loss": 0.0313, |
| "step": 3870 |
| }, |
| { |
| "epoch": 2.5129533678756477, |
| "grad_norm": 0.4606262743473053, |
| "learning_rate": 7.188552633889259e-05, |
| "loss": 0.0362, |
| "step": 3880 |
| }, |
| { |
| "epoch": 2.5194300518134716, |
| "grad_norm": 0.45915576815605164, |
| "learning_rate": 7.173674083266624e-05, |
| "loss": 0.0344, |
| "step": 3890 |
| }, |
| { |
| "epoch": 2.5259067357512954, |
| "grad_norm": 0.312846839427948, |
| "learning_rate": 7.158771761692464e-05, |
| "loss": 0.0287, |
| "step": 3900 |
| }, |
| { |
| "epoch": 2.5323834196891193, |
| "grad_norm": 0.3359382748603821, |
| "learning_rate": 7.143845832136188e-05, |
| "loss": 0.0296, |
| "step": 3910 |
| }, |
| { |
| "epoch": 2.538860103626943, |
| "grad_norm": 0.33793967962265015, |
| "learning_rate": 7.128896457825364e-05, |
| "loss": 0.0249, |
| "step": 3920 |
| }, |
| { |
| "epoch": 2.545336787564767, |
| "grad_norm": 0.6286918520927429, |
| "learning_rate": 7.113923802243957e-05, |
| "loss": 0.0321, |
| "step": 3930 |
| }, |
| { |
| "epoch": 2.551813471502591, |
| "grad_norm": 0.27349719405174255, |
| "learning_rate": 7.09892802913053e-05, |
| "loss": 0.0387, |
| "step": 3940 |
| }, |
| { |
| "epoch": 2.5582901554404147, |
| "grad_norm": 0.20686767995357513, |
| "learning_rate": 7.083909302476453e-05, |
| "loss": 0.0406, |
| "step": 3950 |
| }, |
| { |
| "epoch": 2.5647668393782386, |
| "grad_norm": 0.491794615983963, |
| "learning_rate": 7.068867786524116e-05, |
| "loss": 0.0449, |
| "step": 3960 |
| }, |
| { |
| "epoch": 2.5712435233160624, |
| "grad_norm": 0.28735771775245667, |
| "learning_rate": 7.053803645765128e-05, |
| "loss": 0.0351, |
| "step": 3970 |
| }, |
| { |
| "epoch": 2.5777202072538863, |
| "grad_norm": 0.2941044270992279, |
| "learning_rate": 7.038717044938519e-05, |
| "loss": 0.0275, |
| "step": 3980 |
| }, |
| { |
| "epoch": 2.5841968911917097, |
| "grad_norm": 0.3536795973777771, |
| "learning_rate": 7.023608149028937e-05, |
| "loss": 0.0305, |
| "step": 3990 |
| }, |
| { |
| "epoch": 2.5906735751295336, |
| "grad_norm": 0.3308267593383789, |
| "learning_rate": 7.008477123264848e-05, |
| "loss": 0.0276, |
| "step": 4000 |
| }, |
| { |
| "epoch": 2.5971502590673574, |
| "grad_norm": 0.2571580410003662, |
| "learning_rate": 6.993324133116726e-05, |
| "loss": 0.0286, |
| "step": 4010 |
| }, |
| { |
| "epoch": 2.6036269430051813, |
| "grad_norm": 0.21292202174663544, |
| "learning_rate": 6.978149344295242e-05, |
| "loss": 0.0264, |
| "step": 4020 |
| }, |
| { |
| "epoch": 2.610103626943005, |
| "grad_norm": 0.27990859746932983, |
| "learning_rate": 6.962952922749457e-05, |
| "loss": 0.0327, |
| "step": 4030 |
| }, |
| { |
| "epoch": 2.616580310880829, |
| "grad_norm": 0.4610576629638672, |
| "learning_rate": 6.947735034665002e-05, |
| "loss": 0.033, |
| "step": 4040 |
| }, |
| { |
| "epoch": 2.623056994818653, |
| "grad_norm": 0.2786068618297577, |
| "learning_rate": 6.932495846462261e-05, |
| "loss": 0.0367, |
| "step": 4050 |
| }, |
| { |
| "epoch": 2.6295336787564767, |
| "grad_norm": 0.21466326713562012, |
| "learning_rate": 6.917235524794558e-05, |
| "loss": 0.0366, |
| "step": 4060 |
| }, |
| { |
| "epoch": 2.6360103626943006, |
| "grad_norm": 0.4376002252101898, |
| "learning_rate": 6.901954236546323e-05, |
| "loss": 0.0415, |
| "step": 4070 |
| }, |
| { |
| "epoch": 2.6424870466321244, |
| "grad_norm": 0.327411025762558, |
| "learning_rate": 6.886652148831279e-05, |
| "loss": 0.0307, |
| "step": 4080 |
| }, |
| { |
| "epoch": 2.6489637305699483, |
| "grad_norm": 0.32535767555236816, |
| "learning_rate": 6.871329428990602e-05, |
| "loss": 0.0327, |
| "step": 4090 |
| }, |
| { |
| "epoch": 2.655440414507772, |
| "grad_norm": 0.17626729607582092, |
| "learning_rate": 6.855986244591104e-05, |
| "loss": 0.0301, |
| "step": 4100 |
| }, |
| { |
| "epoch": 2.661917098445596, |
| "grad_norm": 0.3351651430130005, |
| "learning_rate": 6.840622763423391e-05, |
| "loss": 0.0304, |
| "step": 4110 |
| }, |
| { |
| "epoch": 2.66839378238342, |
| "grad_norm": 0.41209253668785095, |
| "learning_rate": 6.825239153500029e-05, |
| "loss": 0.0278, |
| "step": 4120 |
| }, |
| { |
| "epoch": 2.6748704663212433, |
| "grad_norm": 0.33279556035995483, |
| "learning_rate": 6.809835583053715e-05, |
| "loss": 0.0221, |
| "step": 4130 |
| }, |
| { |
| "epoch": 2.681347150259067, |
| "grad_norm": 0.3693098723888397, |
| "learning_rate": 6.794412220535426e-05, |
| "loss": 0.0312, |
| "step": 4140 |
| }, |
| { |
| "epoch": 2.687823834196891, |
| "grad_norm": 0.5105347037315369, |
| "learning_rate": 6.778969234612584e-05, |
| "loss": 0.0296, |
| "step": 4150 |
| }, |
| { |
| "epoch": 2.694300518134715, |
| "grad_norm": 0.32539427280426025, |
| "learning_rate": 6.763506794167208e-05, |
| "loss": 0.0298, |
| "step": 4160 |
| }, |
| { |
| "epoch": 2.7007772020725387, |
| "grad_norm": 0.455147922039032, |
| "learning_rate": 6.748025068294067e-05, |
| "loss": 0.0251, |
| "step": 4170 |
| }, |
| { |
| "epoch": 2.7072538860103625, |
| "grad_norm": 0.21031919121742249, |
| "learning_rate": 6.732524226298841e-05, |
| "loss": 0.0265, |
| "step": 4180 |
| }, |
| { |
| "epoch": 2.7137305699481864, |
| "grad_norm": 0.36876460909843445, |
| "learning_rate": 6.71700443769625e-05, |
| "loss": 0.0242, |
| "step": 4190 |
| }, |
| { |
| "epoch": 2.7202072538860103, |
| "grad_norm": 0.3641934096813202, |
| "learning_rate": 6.701465872208216e-05, |
| "loss": 0.0236, |
| "step": 4200 |
| }, |
| { |
| "epoch": 2.726683937823834, |
| "grad_norm": 0.3689476251602173, |
| "learning_rate": 6.685908699762002e-05, |
| "loss": 0.0261, |
| "step": 4210 |
| }, |
| { |
| "epoch": 2.733160621761658, |
| "grad_norm": 0.40867921710014343, |
| "learning_rate": 6.670333090488356e-05, |
| "loss": 0.0268, |
| "step": 4220 |
| }, |
| { |
| "epoch": 2.739637305699482, |
| "grad_norm": 0.29465481638908386, |
| "learning_rate": 6.654739214719641e-05, |
| "loss": 0.0245, |
| "step": 4230 |
| }, |
| { |
| "epoch": 2.7461139896373057, |
| "grad_norm": 0.33505406975746155, |
| "learning_rate": 6.639127242987988e-05, |
| "loss": 0.0249, |
| "step": 4240 |
| }, |
| { |
| "epoch": 2.7525906735751295, |
| "grad_norm": 0.21609602868556976, |
| "learning_rate": 6.623497346023418e-05, |
| "loss": 0.03, |
| "step": 4250 |
| }, |
| { |
| "epoch": 2.7590673575129534, |
| "grad_norm": 0.29220953583717346, |
| "learning_rate": 6.607849694751977e-05, |
| "loss": 0.0276, |
| "step": 4260 |
| }, |
| { |
| "epoch": 2.7655440414507773, |
| "grad_norm": 0.23505854606628418, |
| "learning_rate": 6.592184460293877e-05, |
| "loss": 0.0261, |
| "step": 4270 |
| }, |
| { |
| "epoch": 2.772020725388601, |
| "grad_norm": 0.2939005196094513, |
| "learning_rate": 6.576501813961609e-05, |
| "loss": 0.032, |
| "step": 4280 |
| }, |
| { |
| "epoch": 2.778497409326425, |
| "grad_norm": 0.24093054234981537, |
| "learning_rate": 6.56080192725808e-05, |
| "loss": 0.0398, |
| "step": 4290 |
| }, |
| { |
| "epoch": 2.784974093264249, |
| "grad_norm": 0.16263030469417572, |
| "learning_rate": 6.545084971874738e-05, |
| "loss": 0.0303, |
| "step": 4300 |
| }, |
| { |
| "epoch": 2.7914507772020727, |
| "grad_norm": 0.2163333296775818, |
| "learning_rate": 6.529351119689688e-05, |
| "loss": 0.0215, |
| "step": 4310 |
| }, |
| { |
| "epoch": 2.7979274611398965, |
| "grad_norm": 0.17953196167945862, |
| "learning_rate": 6.513600542765817e-05, |
| "loss": 0.0284, |
| "step": 4320 |
| }, |
| { |
| "epoch": 2.8044041450777204, |
| "grad_norm": 0.2565161883831024, |
| "learning_rate": 6.497833413348909e-05, |
| "loss": 0.0275, |
| "step": 4330 |
| }, |
| { |
| "epoch": 2.8108808290155443, |
| "grad_norm": 0.30938777327537537, |
| "learning_rate": 6.48204990386577e-05, |
| "loss": 0.0232, |
| "step": 4340 |
| }, |
| { |
| "epoch": 2.817357512953368, |
| "grad_norm": 0.4033896028995514, |
| "learning_rate": 6.466250186922325e-05, |
| "loss": 0.0245, |
| "step": 4350 |
| }, |
| { |
| "epoch": 2.823834196891192, |
| "grad_norm": 0.3601841628551483, |
| "learning_rate": 6.450434435301751e-05, |
| "loss": 0.0288, |
| "step": 4360 |
| }, |
| { |
| "epoch": 2.8303108808290154, |
| "grad_norm": 0.4404318332672119, |
| "learning_rate": 6.43460282196257e-05, |
| "loss": 0.032, |
| "step": 4370 |
| }, |
| { |
| "epoch": 2.8367875647668392, |
| "grad_norm": 0.37518399953842163, |
| "learning_rate": 6.418755520036775e-05, |
| "loss": 0.0301, |
| "step": 4380 |
| }, |
| { |
| "epoch": 2.843264248704663, |
| "grad_norm": 0.27603596448898315, |
| "learning_rate": 6.402892702827916e-05, |
| "loss": 0.023, |
| "step": 4390 |
| }, |
| { |
| "epoch": 2.849740932642487, |
| "grad_norm": 0.36747151613235474, |
| "learning_rate": 6.387014543809223e-05, |
| "loss": 0.0301, |
| "step": 4400 |
| }, |
| { |
| "epoch": 2.856217616580311, |
| "grad_norm": 0.28517892956733704, |
| "learning_rate": 6.371121216621698e-05, |
| "loss": 0.0239, |
| "step": 4410 |
| }, |
| { |
| "epoch": 2.8626943005181347, |
| "grad_norm": 0.5214961171150208, |
| "learning_rate": 6.355212895072223e-05, |
| "loss": 0.0268, |
| "step": 4420 |
| }, |
| { |
| "epoch": 2.8691709844559585, |
| "grad_norm": 0.35717862844467163, |
| "learning_rate": 6.339289753131649e-05, |
| "loss": 0.0253, |
| "step": 4430 |
| }, |
| { |
| "epoch": 2.8756476683937824, |
| "grad_norm": 0.23553046584129333, |
| "learning_rate": 6.323351964932908e-05, |
| "loss": 0.0291, |
| "step": 4440 |
| }, |
| { |
| "epoch": 2.8821243523316062, |
| "grad_norm": 0.3337346613407135, |
| "learning_rate": 6.307399704769099e-05, |
| "loss": 0.0265, |
| "step": 4450 |
| }, |
| { |
| "epoch": 2.88860103626943, |
| "grad_norm": 0.31009089946746826, |
| "learning_rate": 6.291433147091583e-05, |
| "loss": 0.022, |
| "step": 4460 |
| }, |
| { |
| "epoch": 2.895077720207254, |
| "grad_norm": 0.3828548789024353, |
| "learning_rate": 6.275452466508077e-05, |
| "loss": 0.0279, |
| "step": 4470 |
| }, |
| { |
| "epoch": 2.901554404145078, |
| "grad_norm": 0.28669974207878113, |
| "learning_rate": 6.259457837780742e-05, |
| "loss": 0.0225, |
| "step": 4480 |
| }, |
| { |
| "epoch": 2.9080310880829017, |
| "grad_norm": 0.31521838903427124, |
| "learning_rate": 6.243449435824276e-05, |
| "loss": 0.0255, |
| "step": 4490 |
| }, |
| { |
| "epoch": 2.9145077720207255, |
| "grad_norm": 0.308748334646225, |
| "learning_rate": 6.227427435703997e-05, |
| "loss": 0.0368, |
| "step": 4500 |
| }, |
| { |
| "epoch": 2.9209844559585494, |
| "grad_norm": 0.3998130261898041, |
| "learning_rate": 6.211392012633932e-05, |
| "loss": 0.0224, |
| "step": 4510 |
| }, |
| { |
| "epoch": 2.927461139896373, |
| "grad_norm": 0.440463662147522, |
| "learning_rate": 6.195343341974899e-05, |
| "loss": 0.0344, |
| "step": 4520 |
| }, |
| { |
| "epoch": 2.9339378238341967, |
| "grad_norm": 0.3381267786026001, |
| "learning_rate": 6.179281599232591e-05, |
| "loss": 0.0276, |
| "step": 4530 |
| }, |
| { |
| "epoch": 2.9404145077720205, |
| "grad_norm": 0.2546345293521881, |
| "learning_rate": 6.163206960055651e-05, |
| "loss": 0.021, |
| "step": 4540 |
| }, |
| { |
| "epoch": 2.9468911917098444, |
| "grad_norm": 0.49094951152801514, |
| "learning_rate": 6.147119600233758e-05, |
| "loss": 0.0307, |
| "step": 4550 |
| }, |
| { |
| "epoch": 2.9533678756476682, |
| "grad_norm": 0.506216824054718, |
| "learning_rate": 6.131019695695702e-05, |
| "loss": 0.0281, |
| "step": 4560 |
| }, |
| { |
| "epoch": 2.959844559585492, |
| "grad_norm": 0.33828750252723694, |
| "learning_rate": 6.11490742250746e-05, |
| "loss": 0.0285, |
| "step": 4570 |
| }, |
| { |
| "epoch": 2.966321243523316, |
| "grad_norm": 0.43117380142211914, |
| "learning_rate": 6.0987829568702656e-05, |
| "loss": 0.0275, |
| "step": 4580 |
| }, |
| { |
| "epoch": 2.97279792746114, |
| "grad_norm": 0.32091742753982544, |
| "learning_rate": 6.0826464751186994e-05, |
| "loss": 0.0235, |
| "step": 4590 |
| }, |
| { |
| "epoch": 2.9792746113989637, |
| "grad_norm": 0.3023090958595276, |
| "learning_rate": 6.066498153718735e-05, |
| "loss": 0.0222, |
| "step": 4600 |
| }, |
| { |
| "epoch": 2.9857512953367875, |
| "grad_norm": 0.3170013725757599, |
| "learning_rate": 6.05033816926583e-05, |
| "loss": 0.0357, |
| "step": 4610 |
| }, |
| { |
| "epoch": 2.9922279792746114, |
| "grad_norm": 0.39176446199417114, |
| "learning_rate": 6.034166698482984e-05, |
| "loss": 0.0222, |
| "step": 4620 |
| }, |
| { |
| "epoch": 2.9987046632124352, |
| "grad_norm": 0.3850434422492981, |
| "learning_rate": 6.017983918218812e-05, |
| "loss": 0.0268, |
| "step": 4630 |
| }, |
| { |
| "epoch": 3.005181347150259, |
| "grad_norm": 0.37918686866760254, |
| "learning_rate": 6.001790005445607e-05, |
| "loss": 0.0195, |
| "step": 4640 |
| }, |
| { |
| "epoch": 3.011658031088083, |
| "grad_norm": 0.21127471327781677, |
| "learning_rate": 5.985585137257401e-05, |
| "loss": 0.0311, |
| "step": 4650 |
| }, |
| { |
| "epoch": 3.018134715025907, |
| "grad_norm": 0.3583742082118988, |
| "learning_rate": 5.969369490868042e-05, |
| "loss": 0.0314, |
| "step": 4660 |
| }, |
| { |
| "epoch": 3.0246113989637307, |
| "grad_norm": 0.43034079670906067, |
| "learning_rate": 5.953143243609235e-05, |
| "loss": 0.0289, |
| "step": 4670 |
| }, |
| { |
| "epoch": 3.0310880829015545, |
| "grad_norm": 0.20105870068073273, |
| "learning_rate": 5.9369065729286245e-05, |
| "loss": 0.0239, |
| "step": 4680 |
| }, |
| { |
| "epoch": 3.0375647668393784, |
| "grad_norm": 0.4128076732158661, |
| "learning_rate": 5.9206596563878357e-05, |
| "loss": 0.0315, |
| "step": 4690 |
| }, |
| { |
| "epoch": 3.0440414507772022, |
| "grad_norm": 0.3665732145309448, |
| "learning_rate": 5.90440267166055e-05, |
| "loss": 0.0254, |
| "step": 4700 |
| }, |
| { |
| "epoch": 3.050518134715026, |
| "grad_norm": 0.3067231774330139, |
| "learning_rate": 5.888135796530544e-05, |
| "loss": 0.0287, |
| "step": 4710 |
| }, |
| { |
| "epoch": 3.05699481865285, |
| "grad_norm": 0.3856498897075653, |
| "learning_rate": 5.871859208889759e-05, |
| "loss": 0.0221, |
| "step": 4720 |
| }, |
| { |
| "epoch": 3.063471502590674, |
| "grad_norm": 0.31735512614250183, |
| "learning_rate": 5.85557308673635e-05, |
| "loss": 0.0266, |
| "step": 4730 |
| }, |
| { |
| "epoch": 3.069948186528497, |
| "grad_norm": 0.4213178753852844, |
| "learning_rate": 5.8392776081727385e-05, |
| "loss": 0.0279, |
| "step": 4740 |
| }, |
| { |
| "epoch": 3.076424870466321, |
| "grad_norm": 0.33281782269477844, |
| "learning_rate": 5.8229729514036705e-05, |
| "loss": 0.0196, |
| "step": 4750 |
| }, |
| { |
| "epoch": 3.082901554404145, |
| "grad_norm": 0.4126596748828888, |
| "learning_rate": 5.8066592947342555e-05, |
| "loss": 0.0202, |
| "step": 4760 |
| }, |
| { |
| "epoch": 3.089378238341969, |
| "grad_norm": 0.2282947152853012, |
| "learning_rate": 5.7903368165680327e-05, |
| "loss": 0.0226, |
| "step": 4770 |
| }, |
| { |
| "epoch": 3.0958549222797926, |
| "grad_norm": 0.44935131072998047, |
| "learning_rate": 5.7740056954050084e-05, |
| "loss": 0.0303, |
| "step": 4780 |
| }, |
| { |
| "epoch": 3.1023316062176165, |
| "grad_norm": 0.37293264269828796, |
| "learning_rate": 5.757666109839702e-05, |
| "loss": 0.0353, |
| "step": 4790 |
| }, |
| { |
| "epoch": 3.1088082901554404, |
| "grad_norm": 0.20915338397026062, |
| "learning_rate": 5.74131823855921e-05, |
| "loss": 0.0267, |
| "step": 4800 |
| }, |
| { |
| "epoch": 3.115284974093264, |
| "grad_norm": 0.26994702219963074, |
| "learning_rate": 5.72496226034123e-05, |
| "loss": 0.0247, |
| "step": 4810 |
| }, |
| { |
| "epoch": 3.121761658031088, |
| "grad_norm": 0.3534125089645386, |
| "learning_rate": 5.7085983540521216e-05, |
| "loss": 0.0231, |
| "step": 4820 |
| }, |
| { |
| "epoch": 3.128238341968912, |
| "grad_norm": 0.3730353116989136, |
| "learning_rate": 5.692226698644938e-05, |
| "loss": 0.0241, |
| "step": 4830 |
| }, |
| { |
| "epoch": 3.134715025906736, |
| "grad_norm": 0.25073689222335815, |
| "learning_rate": 5.675847473157485e-05, |
| "loss": 0.0228, |
| "step": 4840 |
| }, |
| { |
| "epoch": 3.1411917098445596, |
| "grad_norm": 0.23234787583351135, |
| "learning_rate": 5.6594608567103456e-05, |
| "loss": 0.0165, |
| "step": 4850 |
| }, |
| { |
| "epoch": 3.1476683937823835, |
| "grad_norm": 0.2170400619506836, |
| "learning_rate": 5.6430670285049314e-05, |
| "loss": 0.0188, |
| "step": 4860 |
| }, |
| { |
| "epoch": 3.1541450777202074, |
| "grad_norm": 0.38324370980262756, |
| "learning_rate": 5.6266661678215216e-05, |
| "loss": 0.0313, |
| "step": 4870 |
| }, |
| { |
| "epoch": 3.160621761658031, |
| "grad_norm": 0.3521007001399994, |
| "learning_rate": 5.6102584540173006e-05, |
| "loss": 0.0239, |
| "step": 4880 |
| }, |
| { |
| "epoch": 3.167098445595855, |
| "grad_norm": 0.2577424943447113, |
| "learning_rate": 5.5938440665244006e-05, |
| "loss": 0.0212, |
| "step": 4890 |
| }, |
| { |
| "epoch": 3.173575129533679, |
| "grad_norm": 0.36752060055732727, |
| "learning_rate": 5.577423184847932e-05, |
| "loss": 0.0226, |
| "step": 4900 |
| }, |
| { |
| "epoch": 3.180051813471503, |
| "grad_norm": 0.3478221595287323, |
| "learning_rate": 5.560995988564023e-05, |
| "loss": 0.024, |
| "step": 4910 |
| }, |
| { |
| "epoch": 3.186528497409326, |
| "grad_norm": 0.31752872467041016, |
| "learning_rate": 5.544562657317863e-05, |
| "loss": 0.0301, |
| "step": 4920 |
| }, |
| { |
| "epoch": 3.19300518134715, |
| "grad_norm": 0.22945484519004822, |
| "learning_rate": 5.52812337082173e-05, |
| "loss": 0.0319, |
| "step": 4930 |
| }, |
| { |
| "epoch": 3.199481865284974, |
| "grad_norm": 0.3376820385456085, |
| "learning_rate": 5.511678308853026e-05, |
| "loss": 0.0229, |
| "step": 4940 |
| }, |
| { |
| "epoch": 3.2059585492227978, |
| "grad_norm": 0.4235116243362427, |
| "learning_rate": 5.495227651252315e-05, |
| "loss": 0.0223, |
| "step": 4950 |
| }, |
| { |
| "epoch": 3.2124352331606216, |
| "grad_norm": 0.3130274713039398, |
| "learning_rate": 5.478771577921351e-05, |
| "loss": 0.0302, |
| "step": 4960 |
| }, |
| { |
| "epoch": 3.2189119170984455, |
| "grad_norm": 0.27809765934944153, |
| "learning_rate": 5.462310268821118e-05, |
| "loss": 0.0223, |
| "step": 4970 |
| }, |
| { |
| "epoch": 3.2253886010362693, |
| "grad_norm": 0.2564016580581665, |
| "learning_rate": 5.445843903969854e-05, |
| "loss": 0.0232, |
| "step": 4980 |
| }, |
| { |
| "epoch": 3.231865284974093, |
| "grad_norm": 0.28008344769477844, |
| "learning_rate": 5.4293726634410855e-05, |
| "loss": 0.0211, |
| "step": 4990 |
| }, |
| { |
| "epoch": 3.238341968911917, |
| "grad_norm": 0.19657611846923828, |
| "learning_rate": 5.4128967273616625e-05, |
| "loss": 0.0251, |
| "step": 5000 |
| }, |
| { |
| "epoch": 3.244818652849741, |
| "grad_norm": 0.2586984634399414, |
| "learning_rate": 5.396416275909779e-05, |
| "loss": 0.0249, |
| "step": 5010 |
| }, |
| { |
| "epoch": 3.2512953367875648, |
| "grad_norm": 0.474925696849823, |
| "learning_rate": 5.379931489313016e-05, |
| "loss": 0.0205, |
| "step": 5020 |
| }, |
| { |
| "epoch": 3.2577720207253886, |
| "grad_norm": 0.4486338198184967, |
| "learning_rate": 5.363442547846356e-05, |
| "loss": 0.0274, |
| "step": 5030 |
| }, |
| { |
| "epoch": 3.2642487046632125, |
| "grad_norm": 0.23376742005348206, |
| "learning_rate": 5.3469496318302204e-05, |
| "loss": 0.0199, |
| "step": 5040 |
| }, |
| { |
| "epoch": 3.2707253886010363, |
| "grad_norm": 0.19954350590705872, |
| "learning_rate": 5.330452921628497e-05, |
| "loss": 0.0203, |
| "step": 5050 |
| }, |
| { |
| "epoch": 3.27720207253886, |
| "grad_norm": 0.2920505404472351, |
| "learning_rate": 5.313952597646568e-05, |
| "loss": 0.0211, |
| "step": 5060 |
| }, |
| { |
| "epoch": 3.283678756476684, |
| "grad_norm": 0.4120335578918457, |
| "learning_rate": 5.297448840329329e-05, |
| "loss": 0.026, |
| "step": 5070 |
| }, |
| { |
| "epoch": 3.290155440414508, |
| "grad_norm": 0.3703533411026001, |
| "learning_rate": 5.280941830159227e-05, |
| "loss": 0.0249, |
| "step": 5080 |
| }, |
| { |
| "epoch": 3.2966321243523318, |
| "grad_norm": 0.342776894569397, |
| "learning_rate": 5.264431747654284e-05, |
| "loss": 0.0255, |
| "step": 5090 |
| }, |
| { |
| "epoch": 3.3031088082901556, |
| "grad_norm": 0.42008474469184875, |
| "learning_rate": 5.247918773366112e-05, |
| "loss": 0.0381, |
| "step": 5100 |
| }, |
| { |
| "epoch": 3.3095854922279795, |
| "grad_norm": 0.3160938620567322, |
| "learning_rate": 5.231403087877955e-05, |
| "loss": 0.0235, |
| "step": 5110 |
| }, |
| { |
| "epoch": 3.3160621761658033, |
| "grad_norm": 0.2879834473133087, |
| "learning_rate": 5.214884871802703e-05, |
| "loss": 0.0253, |
| "step": 5120 |
| }, |
| { |
| "epoch": 3.3225388601036268, |
| "grad_norm": 0.43620437383651733, |
| "learning_rate": 5.198364305780922e-05, |
| "loss": 0.0222, |
| "step": 5130 |
| }, |
| { |
| "epoch": 3.3290155440414506, |
| "grad_norm": 0.3343920111656189, |
| "learning_rate": 5.1818415704788725e-05, |
| "loss": 0.0233, |
| "step": 5140 |
| }, |
| { |
| "epoch": 3.3354922279792745, |
| "grad_norm": 0.2971336543560028, |
| "learning_rate": 5.165316846586541e-05, |
| "loss": 0.03, |
| "step": 5150 |
| }, |
| { |
| "epoch": 3.3419689119170983, |
| "grad_norm": 0.28571948409080505, |
| "learning_rate": 5.148790314815663e-05, |
| "loss": 0.021, |
| "step": 5160 |
| }, |
| { |
| "epoch": 3.348445595854922, |
| "grad_norm": 0.25450941920280457, |
| "learning_rate": 5.132262155897739e-05, |
| "loss": 0.0251, |
| "step": 5170 |
| }, |
| { |
| "epoch": 3.354922279792746, |
| "grad_norm": 0.3214353322982788, |
| "learning_rate": 5.1157325505820694e-05, |
| "loss": 0.0312, |
| "step": 5180 |
| }, |
| { |
| "epoch": 3.36139896373057, |
| "grad_norm": 0.20665358006954193, |
| "learning_rate": 5.0992016796337686e-05, |
| "loss": 0.0186, |
| "step": 5190 |
| }, |
| { |
| "epoch": 3.3678756476683938, |
| "grad_norm": 0.40207940340042114, |
| "learning_rate": 5.0826697238317935e-05, |
| "loss": 0.0267, |
| "step": 5200 |
| }, |
| { |
| "epoch": 3.3743523316062176, |
| "grad_norm": 0.3291640281677246, |
| "learning_rate": 5.066136863966963e-05, |
| "loss": 0.0237, |
| "step": 5210 |
| }, |
| { |
| "epoch": 3.3808290155440415, |
| "grad_norm": 0.2767675817012787, |
| "learning_rate": 5.0496032808399815e-05, |
| "loss": 0.0211, |
| "step": 5220 |
| }, |
| { |
| "epoch": 3.3873056994818653, |
| "grad_norm": 0.20625481009483337, |
| "learning_rate": 5.033069155259471e-05, |
| "loss": 0.0306, |
| "step": 5230 |
| }, |
| { |
| "epoch": 3.393782383419689, |
| "grad_norm": 0.39488154649734497, |
| "learning_rate": 5.016534668039976e-05, |
| "loss": 0.0219, |
| "step": 5240 |
| }, |
| { |
| "epoch": 3.400259067357513, |
| "grad_norm": 0.20591653883457184, |
| "learning_rate": 5e-05, |
| "loss": 0.0179, |
| "step": 5250 |
| }, |
| { |
| "epoch": 3.406735751295337, |
| "grad_norm": 0.26007452607154846, |
| "learning_rate": 4.9834653319600246e-05, |
| "loss": 0.0249, |
| "step": 5260 |
| }, |
| { |
| "epoch": 3.4132124352331608, |
| "grad_norm": 0.26599636673927307, |
| "learning_rate": 4.96693084474053e-05, |
| "loss": 0.0206, |
| "step": 5270 |
| }, |
| { |
| "epoch": 3.4196891191709846, |
| "grad_norm": 0.13836756348609924, |
| "learning_rate": 4.950396719160018e-05, |
| "loss": 0.0168, |
| "step": 5280 |
| }, |
| { |
| "epoch": 3.4261658031088085, |
| "grad_norm": 0.2556383013725281, |
| "learning_rate": 4.93386313603304e-05, |
| "loss": 0.0159, |
| "step": 5290 |
| }, |
| { |
| "epoch": 3.432642487046632, |
| "grad_norm": 0.23129259049892426, |
| "learning_rate": 4.917330276168208e-05, |
| "loss": 0.0231, |
| "step": 5300 |
| }, |
| { |
| "epoch": 3.4391191709844557, |
| "grad_norm": 0.2568337619304657, |
| "learning_rate": 4.9007983203662326e-05, |
| "loss": 0.0398, |
| "step": 5310 |
| }, |
| { |
| "epoch": 3.4455958549222796, |
| "grad_norm": 0.31978926062583923, |
| "learning_rate": 4.884267449417931e-05, |
| "loss": 0.0227, |
| "step": 5320 |
| }, |
| { |
| "epoch": 3.4520725388601035, |
| "grad_norm": 0.3868251442909241, |
| "learning_rate": 4.867737844102261e-05, |
| "loss": 0.0331, |
| "step": 5330 |
| }, |
| { |
| "epoch": 3.4585492227979273, |
| "grad_norm": 0.28642046451568604, |
| "learning_rate": 4.851209685184338e-05, |
| "loss": 0.0249, |
| "step": 5340 |
| }, |
| { |
| "epoch": 3.465025906735751, |
| "grad_norm": 0.3905954360961914, |
| "learning_rate": 4.834683153413459e-05, |
| "loss": 0.0211, |
| "step": 5350 |
| }, |
| { |
| "epoch": 3.471502590673575, |
| "grad_norm": 0.2978500425815582, |
| "learning_rate": 4.818158429521129e-05, |
| "loss": 0.0321, |
| "step": 5360 |
| }, |
| { |
| "epoch": 3.477979274611399, |
| "grad_norm": 0.40893664956092834, |
| "learning_rate": 4.801635694219079e-05, |
| "loss": 0.0187, |
| "step": 5370 |
| }, |
| { |
| "epoch": 3.4844559585492227, |
| "grad_norm": 0.33391204476356506, |
| "learning_rate": 4.785115128197298e-05, |
| "loss": 0.0272, |
| "step": 5380 |
| }, |
| { |
| "epoch": 3.4909326424870466, |
| "grad_norm": 0.4223330616950989, |
| "learning_rate": 4.7685969121220456e-05, |
| "loss": 0.0247, |
| "step": 5390 |
| }, |
| { |
| "epoch": 3.4974093264248705, |
| "grad_norm": 0.23924241960048676, |
| "learning_rate": 4.7520812266338885e-05, |
| "loss": 0.0256, |
| "step": 5400 |
| }, |
| { |
| "epoch": 3.5038860103626943, |
| "grad_norm": 0.21827708184719086, |
| "learning_rate": 4.735568252345718e-05, |
| "loss": 0.0241, |
| "step": 5410 |
| }, |
| { |
| "epoch": 3.510362694300518, |
| "grad_norm": 0.36227914690971375, |
| "learning_rate": 4.7190581698407725e-05, |
| "loss": 0.0276, |
| "step": 5420 |
| }, |
| { |
| "epoch": 3.516839378238342, |
| "grad_norm": 0.2625763416290283, |
| "learning_rate": 4.702551159670672e-05, |
| "loss": 0.0239, |
| "step": 5430 |
| }, |
| { |
| "epoch": 3.523316062176166, |
| "grad_norm": 0.2994100749492645, |
| "learning_rate": 4.6860474023534335e-05, |
| "loss": 0.0245, |
| "step": 5440 |
| }, |
| { |
| "epoch": 3.5297927461139897, |
| "grad_norm": 0.31954896450042725, |
| "learning_rate": 4.669547078371504e-05, |
| "loss": 0.023, |
| "step": 5450 |
| }, |
| { |
| "epoch": 3.5362694300518136, |
| "grad_norm": 0.22966216504573822, |
| "learning_rate": 4.65305036816978e-05, |
| "loss": 0.0181, |
| "step": 5460 |
| }, |
| { |
| "epoch": 3.5427461139896375, |
| "grad_norm": 0.35159385204315186, |
| "learning_rate": 4.6365574521536445e-05, |
| "loss": 0.0242, |
| "step": 5470 |
| }, |
| { |
| "epoch": 3.5492227979274613, |
| "grad_norm": 0.2604275047779083, |
| "learning_rate": 4.620068510686985e-05, |
| "loss": 0.021, |
| "step": 5480 |
| }, |
| { |
| "epoch": 3.555699481865285, |
| "grad_norm": 0.27099111676216125, |
| "learning_rate": 4.60358372409022e-05, |
| "loss": 0.0187, |
| "step": 5490 |
| }, |
| { |
| "epoch": 3.562176165803109, |
| "grad_norm": 0.36249786615371704, |
| "learning_rate": 4.5871032726383386e-05, |
| "loss": 0.0174, |
| "step": 5500 |
| }, |
| { |
| "epoch": 3.568652849740933, |
| "grad_norm": 0.20331737399101257, |
| "learning_rate": 4.570627336558915e-05, |
| "loss": 0.0215, |
| "step": 5510 |
| }, |
| { |
| "epoch": 3.5751295336787567, |
| "grad_norm": 0.1920534372329712, |
| "learning_rate": 4.554156096030149e-05, |
| "loss": 0.0199, |
| "step": 5520 |
| }, |
| { |
| "epoch": 3.58160621761658, |
| "grad_norm": 0.24783368408679962, |
| "learning_rate": 4.537689731178883e-05, |
| "loss": 0.0177, |
| "step": 5530 |
| }, |
| { |
| "epoch": 3.588082901554404, |
| "grad_norm": 0.41117021441459656, |
| "learning_rate": 4.5212284220786494e-05, |
| "loss": 0.0217, |
| "step": 5540 |
| }, |
| { |
| "epoch": 3.594559585492228, |
| "grad_norm": 0.28663378953933716, |
| "learning_rate": 4.504772348747687e-05, |
| "loss": 0.0195, |
| "step": 5550 |
| }, |
| { |
| "epoch": 3.6010362694300517, |
| "grad_norm": 0.1578565090894699, |
| "learning_rate": 4.488321691146975e-05, |
| "loss": 0.0245, |
| "step": 5560 |
| }, |
| { |
| "epoch": 3.6075129533678756, |
| "grad_norm": 0.2837083637714386, |
| "learning_rate": 4.471876629178273e-05, |
| "loss": 0.0244, |
| "step": 5570 |
| }, |
| { |
| "epoch": 3.6139896373056994, |
| "grad_norm": 0.41021156311035156, |
| "learning_rate": 4.4554373426821374e-05, |
| "loss": 0.0176, |
| "step": 5580 |
| }, |
| { |
| "epoch": 3.6204663212435233, |
| "grad_norm": 0.459164023399353, |
| "learning_rate": 4.439004011435979e-05, |
| "loss": 0.022, |
| "step": 5590 |
| }, |
| { |
| "epoch": 3.626943005181347, |
| "grad_norm": 0.3070141077041626, |
| "learning_rate": 4.4225768151520694e-05, |
| "loss": 0.0207, |
| "step": 5600 |
| }, |
| { |
| "epoch": 3.633419689119171, |
| "grad_norm": 0.2605834901332855, |
| "learning_rate": 4.406155933475599e-05, |
| "loss": 0.0201, |
| "step": 5610 |
| }, |
| { |
| "epoch": 3.639896373056995, |
| "grad_norm": 0.33735018968582153, |
| "learning_rate": 4.3897415459827e-05, |
| "loss": 0.0235, |
| "step": 5620 |
| }, |
| { |
| "epoch": 3.6463730569948187, |
| "grad_norm": 0.1962365359067917, |
| "learning_rate": 4.373333832178478e-05, |
| "loss": 0.0143, |
| "step": 5630 |
| }, |
| { |
| "epoch": 3.6528497409326426, |
| "grad_norm": 0.21969126164913177, |
| "learning_rate": 4.3569329714950704e-05, |
| "loss": 0.0201, |
| "step": 5640 |
| }, |
| { |
| "epoch": 3.6593264248704664, |
| "grad_norm": 0.255919486284256, |
| "learning_rate": 4.3405391432896555e-05, |
| "loss": 0.0237, |
| "step": 5650 |
| }, |
| { |
| "epoch": 3.6658031088082903, |
| "grad_norm": 0.21406243741512299, |
| "learning_rate": 4.324152526842517e-05, |
| "loss": 0.0213, |
| "step": 5660 |
| }, |
| { |
| "epoch": 3.6722797927461137, |
| "grad_norm": 0.18419137597084045, |
| "learning_rate": 4.307773301355062e-05, |
| "loss": 0.0192, |
| "step": 5670 |
| }, |
| { |
| "epoch": 3.6787564766839376, |
| "grad_norm": 0.28473156690597534, |
| "learning_rate": 4.291401645947879e-05, |
| "loss": 0.0305, |
| "step": 5680 |
| }, |
| { |
| "epoch": 3.6852331606217614, |
| "grad_norm": 0.16764795780181885, |
| "learning_rate": 4.275037739658771e-05, |
| "loss": 0.0162, |
| "step": 5690 |
| }, |
| { |
| "epoch": 3.6917098445595853, |
| "grad_norm": 0.1837482899427414, |
| "learning_rate": 4.2586817614407895e-05, |
| "loss": 0.0218, |
| "step": 5700 |
| }, |
| { |
| "epoch": 3.698186528497409, |
| "grad_norm": 0.3757379651069641, |
| "learning_rate": 4.2423338901602985e-05, |
| "loss": 0.0295, |
| "step": 5710 |
| }, |
| { |
| "epoch": 3.704663212435233, |
| "grad_norm": 0.284368097782135, |
| "learning_rate": 4.2259943045949934e-05, |
| "loss": 0.0203, |
| "step": 5720 |
| }, |
| { |
| "epoch": 3.711139896373057, |
| "grad_norm": 0.3066781759262085, |
| "learning_rate": 4.209663183431969e-05, |
| "loss": 0.0198, |
| "step": 5730 |
| }, |
| { |
| "epoch": 3.7176165803108807, |
| "grad_norm": 0.26480358839035034, |
| "learning_rate": 4.1933407052657456e-05, |
| "loss": 0.0204, |
| "step": 5740 |
| }, |
| { |
| "epoch": 3.7240932642487046, |
| "grad_norm": 0.3328205347061157, |
| "learning_rate": 4.17702704859633e-05, |
| "loss": 0.0213, |
| "step": 5750 |
| }, |
| { |
| "epoch": 3.7305699481865284, |
| "grad_norm": 0.3852354884147644, |
| "learning_rate": 4.160722391827262e-05, |
| "loss": 0.0206, |
| "step": 5760 |
| }, |
| { |
| "epoch": 3.7370466321243523, |
| "grad_norm": 0.1848895400762558, |
| "learning_rate": 4.14442691326365e-05, |
| "loss": 0.0156, |
| "step": 5770 |
| }, |
| { |
| "epoch": 3.743523316062176, |
| "grad_norm": 0.4053875207901001, |
| "learning_rate": 4.1281407911102425e-05, |
| "loss": 0.0242, |
| "step": 5780 |
| }, |
| { |
| "epoch": 3.75, |
| "grad_norm": 0.21550576388835907, |
| "learning_rate": 4.111864203469457e-05, |
| "loss": 0.0179, |
| "step": 5790 |
| }, |
| { |
| "epoch": 3.756476683937824, |
| "grad_norm": 0.3576870262622833, |
| "learning_rate": 4.095597328339452e-05, |
| "loss": 0.0233, |
| "step": 5800 |
| }, |
| { |
| "epoch": 3.7629533678756477, |
| "grad_norm": 0.5374760627746582, |
| "learning_rate": 4.079340343612165e-05, |
| "loss": 0.0249, |
| "step": 5810 |
| }, |
| { |
| "epoch": 3.7694300518134716, |
| "grad_norm": 0.2569693922996521, |
| "learning_rate": 4.063093427071376e-05, |
| "loss": 0.029, |
| "step": 5820 |
| }, |
| { |
| "epoch": 3.7759067357512954, |
| "grad_norm": 0.2154322862625122, |
| "learning_rate": 4.046856756390767e-05, |
| "loss": 0.0232, |
| "step": 5830 |
| }, |
| { |
| "epoch": 3.7823834196891193, |
| "grad_norm": 0.22250936925411224, |
| "learning_rate": 4.0306305091319595e-05, |
| "loss": 0.019, |
| "step": 5840 |
| }, |
| { |
| "epoch": 3.788860103626943, |
| "grad_norm": 0.29183244705200195, |
| "learning_rate": 4.0144148627425993e-05, |
| "loss": 0.0203, |
| "step": 5850 |
| }, |
| { |
| "epoch": 3.795336787564767, |
| "grad_norm": 0.3605186939239502, |
| "learning_rate": 3.9982099945543945e-05, |
| "loss": 0.0202, |
| "step": 5860 |
| }, |
| { |
| "epoch": 3.801813471502591, |
| "grad_norm": 0.19650620222091675, |
| "learning_rate": 3.982016081781189e-05, |
| "loss": 0.0169, |
| "step": 5870 |
| }, |
| { |
| "epoch": 3.8082901554404147, |
| "grad_norm": 0.289876788854599, |
| "learning_rate": 3.965833301517017e-05, |
| "loss": 0.0272, |
| "step": 5880 |
| }, |
| { |
| "epoch": 3.8147668393782386, |
| "grad_norm": 0.291120320558548, |
| "learning_rate": 3.949661830734172e-05, |
| "loss": 0.0159, |
| "step": 5890 |
| }, |
| { |
| "epoch": 3.8212435233160624, |
| "grad_norm": 0.32480406761169434, |
| "learning_rate": 3.933501846281267e-05, |
| "loss": 0.0191, |
| "step": 5900 |
| }, |
| { |
| "epoch": 3.8277202072538863, |
| "grad_norm": 0.449545681476593, |
| "learning_rate": 3.917353524881302e-05, |
| "loss": 0.0297, |
| "step": 5910 |
| }, |
| { |
| "epoch": 3.8341968911917097, |
| "grad_norm": 0.26695263385772705, |
| "learning_rate": 3.901217043129735e-05, |
| "loss": 0.0255, |
| "step": 5920 |
| }, |
| { |
| "epoch": 3.8406735751295336, |
| "grad_norm": 0.4865301549434662, |
| "learning_rate": 3.8850925774925425e-05, |
| "loss": 0.0219, |
| "step": 5930 |
| }, |
| { |
| "epoch": 3.8471502590673574, |
| "grad_norm": 0.31623756885528564, |
| "learning_rate": 3.8689803043043e-05, |
| "loss": 0.0226, |
| "step": 5940 |
| }, |
| { |
| "epoch": 3.8536269430051813, |
| "grad_norm": 0.19429579377174377, |
| "learning_rate": 3.852880399766243e-05, |
| "loss": 0.0173, |
| "step": 5950 |
| }, |
| { |
| "epoch": 3.860103626943005, |
| "grad_norm": 0.24352151155471802, |
| "learning_rate": 3.836793039944349e-05, |
| "loss": 0.0211, |
| "step": 5960 |
| }, |
| { |
| "epoch": 3.866580310880829, |
| "grad_norm": 0.24858231842517853, |
| "learning_rate": 3.820718400767409e-05, |
| "loss": 0.02, |
| "step": 5970 |
| }, |
| { |
| "epoch": 3.873056994818653, |
| "grad_norm": 0.22467108070850372, |
| "learning_rate": 3.8046566580251e-05, |
| "loss": 0.0145, |
| "step": 5980 |
| }, |
| { |
| "epoch": 3.8795336787564767, |
| "grad_norm": 0.23540061712265015, |
| "learning_rate": 3.788607987366069e-05, |
| "loss": 0.0191, |
| "step": 5990 |
| }, |
| { |
| "epoch": 3.8860103626943006, |
| "grad_norm": 0.22473685443401337, |
| "learning_rate": 3.772572564296005e-05, |
| "loss": 0.0221, |
| "step": 6000 |
| }, |
| { |
| "epoch": 3.8924870466321244, |
| "grad_norm": 0.40158477425575256, |
| "learning_rate": 3.756550564175727e-05, |
| "loss": 0.0235, |
| "step": 6010 |
| }, |
| { |
| "epoch": 3.8989637305699483, |
| "grad_norm": 0.3086557686328888, |
| "learning_rate": 3.74054216221926e-05, |
| "loss": 0.0166, |
| "step": 6020 |
| }, |
| { |
| "epoch": 3.905440414507772, |
| "grad_norm": 0.3648836612701416, |
| "learning_rate": 3.7245475334919246e-05, |
| "loss": 0.0264, |
| "step": 6030 |
| }, |
| { |
| "epoch": 3.911917098445596, |
| "grad_norm": 0.12361598759889603, |
| "learning_rate": 3.7085668529084184e-05, |
| "loss": 0.026, |
| "step": 6040 |
| }, |
| { |
| "epoch": 3.91839378238342, |
| "grad_norm": 0.25430572032928467, |
| "learning_rate": 3.6926002952309016e-05, |
| "loss": 0.0198, |
| "step": 6050 |
| }, |
| { |
| "epoch": 3.9248704663212433, |
| "grad_norm": 0.2055133730173111, |
| "learning_rate": 3.676648035067093e-05, |
| "loss": 0.0211, |
| "step": 6060 |
| }, |
| { |
| "epoch": 3.931347150259067, |
| "grad_norm": 0.20963652431964874, |
| "learning_rate": 3.6607102468683526e-05, |
| "loss": 0.0213, |
| "step": 6070 |
| }, |
| { |
| "epoch": 3.937823834196891, |
| "grad_norm": 0.2894296646118164, |
| "learning_rate": 3.6447871049277796e-05, |
| "loss": 0.016, |
| "step": 6080 |
| }, |
| { |
| "epoch": 3.944300518134715, |
| "grad_norm": 0.31744205951690674, |
| "learning_rate": 3.628878783378302e-05, |
| "loss": 0.0177, |
| "step": 6090 |
| }, |
| { |
| "epoch": 3.9507772020725387, |
| "grad_norm": 0.2509874105453491, |
| "learning_rate": 3.612985456190778e-05, |
| "loss": 0.0282, |
| "step": 6100 |
| }, |
| { |
| "epoch": 3.9572538860103625, |
| "grad_norm": 0.22488081455230713, |
| "learning_rate": 3.597107297172084e-05, |
| "loss": 0.0259, |
| "step": 6110 |
| }, |
| { |
| "epoch": 3.9637305699481864, |
| "grad_norm": 0.255126953125, |
| "learning_rate": 3.581244479963225e-05, |
| "loss": 0.0209, |
| "step": 6120 |
| }, |
| { |
| "epoch": 3.9702072538860103, |
| "grad_norm": 0.2088916003704071, |
| "learning_rate": 3.5653971780374295e-05, |
| "loss": 0.0233, |
| "step": 6130 |
| }, |
| { |
| "epoch": 3.976683937823834, |
| "grad_norm": 0.24006441235542297, |
| "learning_rate": 3.5495655646982505e-05, |
| "loss": 0.025, |
| "step": 6140 |
| }, |
| { |
| "epoch": 3.983160621761658, |
| "grad_norm": 0.25505682826042175, |
| "learning_rate": 3.533749813077677e-05, |
| "loss": 0.0187, |
| "step": 6150 |
| }, |
| { |
| "epoch": 3.989637305699482, |
| "grad_norm": 0.3427753746509552, |
| "learning_rate": 3.517950096134232e-05, |
| "loss": 0.02, |
| "step": 6160 |
| }, |
| { |
| "epoch": 3.9961139896373057, |
| "grad_norm": 0.1884257197380066, |
| "learning_rate": 3.5021665866510925e-05, |
| "loss": 0.0175, |
| "step": 6170 |
| }, |
| { |
| "epoch": 4.0025906735751295, |
| "grad_norm": 0.45834600925445557, |
| "learning_rate": 3.4863994572341843e-05, |
| "loss": 0.0235, |
| "step": 6180 |
| }, |
| { |
| "epoch": 4.009067357512953, |
| "grad_norm": 0.3222751319408417, |
| "learning_rate": 3.470648880310313e-05, |
| "loss": 0.0142, |
| "step": 6190 |
| }, |
| { |
| "epoch": 4.015544041450777, |
| "grad_norm": 0.3138159215450287, |
| "learning_rate": 3.4549150281252636e-05, |
| "loss": 0.0215, |
| "step": 6200 |
| }, |
| { |
| "epoch": 4.022020725388601, |
| "grad_norm": 0.2751568853855133, |
| "learning_rate": 3.439198072741921e-05, |
| "loss": 0.0263, |
| "step": 6210 |
| }, |
| { |
| "epoch": 4.028497409326425, |
| "grad_norm": 0.3685873746871948, |
| "learning_rate": 3.423498186038393e-05, |
| "loss": 0.0241, |
| "step": 6220 |
| }, |
| { |
| "epoch": 4.034974093264249, |
| "grad_norm": 0.243320032954216, |
| "learning_rate": 3.407815539706124e-05, |
| "loss": 0.0201, |
| "step": 6230 |
| }, |
| { |
| "epoch": 4.041450777202073, |
| "grad_norm": 0.26391202211380005, |
| "learning_rate": 3.392150305248024e-05, |
| "loss": 0.0206, |
| "step": 6240 |
| }, |
| { |
| "epoch": 4.0479274611398965, |
| "grad_norm": 0.3051769733428955, |
| "learning_rate": 3.3765026539765834e-05, |
| "loss": 0.0174, |
| "step": 6250 |
| }, |
| { |
| "epoch": 4.05440414507772, |
| "grad_norm": 0.2602379620075226, |
| "learning_rate": 3.360872757012011e-05, |
| "loss": 0.0206, |
| "step": 6260 |
| }, |
| { |
| "epoch": 4.060880829015544, |
| "grad_norm": 0.37795671820640564, |
| "learning_rate": 3.3452607852803584e-05, |
| "loss": 0.0216, |
| "step": 6270 |
| }, |
| { |
| "epoch": 4.067357512953368, |
| "grad_norm": 0.20276519656181335, |
| "learning_rate": 3.329666909511645e-05, |
| "loss": 0.022, |
| "step": 6280 |
| }, |
| { |
| "epoch": 4.073834196891192, |
| "grad_norm": 0.2749174237251282, |
| "learning_rate": 3.3140913002379995e-05, |
| "loss": 0.019, |
| "step": 6290 |
| }, |
| { |
| "epoch": 4.080310880829016, |
| "grad_norm": 0.27063703536987305, |
| "learning_rate": 3.298534127791785e-05, |
| "loss": 0.0164, |
| "step": 6300 |
| }, |
| { |
| "epoch": 4.08678756476684, |
| "grad_norm": 0.2670196294784546, |
| "learning_rate": 3.282995562303754e-05, |
| "loss": 0.0178, |
| "step": 6310 |
| }, |
| { |
| "epoch": 4.0932642487046635, |
| "grad_norm": 0.24421292543411255, |
| "learning_rate": 3.267475773701161e-05, |
| "loss": 0.0149, |
| "step": 6320 |
| }, |
| { |
| "epoch": 4.099740932642487, |
| "grad_norm": 0.16270811855793, |
| "learning_rate": 3.251974931705933e-05, |
| "loss": 0.0202, |
| "step": 6330 |
| }, |
| { |
| "epoch": 4.106217616580311, |
| "grad_norm": 0.27116742730140686, |
| "learning_rate": 3.236493205832795e-05, |
| "loss": 0.0228, |
| "step": 6340 |
| }, |
| { |
| "epoch": 4.112694300518135, |
| "grad_norm": 0.29166123270988464, |
| "learning_rate": 3.221030765387417e-05, |
| "loss": 0.0225, |
| "step": 6350 |
| }, |
| { |
| "epoch": 4.119170984455959, |
| "grad_norm": 0.30937689542770386, |
| "learning_rate": 3.205587779464576e-05, |
| "loss": 0.0195, |
| "step": 6360 |
| }, |
| { |
| "epoch": 4.125647668393782, |
| "grad_norm": 0.32410162687301636, |
| "learning_rate": 3.190164416946285e-05, |
| "loss": 0.0154, |
| "step": 6370 |
| }, |
| { |
| "epoch": 4.132124352331606, |
| "grad_norm": 0.37122395634651184, |
| "learning_rate": 3.1747608464999725e-05, |
| "loss": 0.0197, |
| "step": 6380 |
| }, |
| { |
| "epoch": 4.13860103626943, |
| "grad_norm": 0.2892199754714966, |
| "learning_rate": 3.1593772365766105e-05, |
| "loss": 0.0253, |
| "step": 6390 |
| }, |
| { |
| "epoch": 4.1450777202072535, |
| "grad_norm": 0.20431406795978546, |
| "learning_rate": 3.144013755408895e-05, |
| "loss": 0.0181, |
| "step": 6400 |
| }, |
| { |
| "epoch": 4.151554404145077, |
| "grad_norm": 0.2995465099811554, |
| "learning_rate": 3.128670571009399e-05, |
| "loss": 0.0209, |
| "step": 6410 |
| }, |
| { |
| "epoch": 4.158031088082901, |
| "grad_norm": 0.24306261539459229, |
| "learning_rate": 3.113347851168721e-05, |
| "loss": 0.0207, |
| "step": 6420 |
| }, |
| { |
| "epoch": 4.164507772020725, |
| "grad_norm": 0.19058877229690552, |
| "learning_rate": 3.098045763453678e-05, |
| "loss": 0.0173, |
| "step": 6430 |
| }, |
| { |
| "epoch": 4.170984455958549, |
| "grad_norm": 0.15538224577903748, |
| "learning_rate": 3.082764475205442e-05, |
| "loss": 0.018, |
| "step": 6440 |
| }, |
| { |
| "epoch": 4.177461139896373, |
| "grad_norm": 0.3089154064655304, |
| "learning_rate": 3.0675041535377405e-05, |
| "loss": 0.0178, |
| "step": 6450 |
| }, |
| { |
| "epoch": 4.183937823834197, |
| "grad_norm": 0.19737383723258972, |
| "learning_rate": 3.052264965335e-05, |
| "loss": 0.015, |
| "step": 6460 |
| }, |
| { |
| "epoch": 4.1904145077720205, |
| "grad_norm": 0.25178325176239014, |
| "learning_rate": 3.0370470772505433e-05, |
| "loss": 0.0141, |
| "step": 6470 |
| }, |
| { |
| "epoch": 4.196891191709844, |
| "grad_norm": 0.16084274649620056, |
| "learning_rate": 3.0218506557047598e-05, |
| "loss": 0.0133, |
| "step": 6480 |
| }, |
| { |
| "epoch": 4.203367875647668, |
| "grad_norm": 0.18579982221126556, |
| "learning_rate": 3.006675866883275e-05, |
| "loss": 0.0147, |
| "step": 6490 |
| }, |
| { |
| "epoch": 4.209844559585492, |
| "grad_norm": 0.48777055740356445, |
| "learning_rate": 2.991522876735154e-05, |
| "loss": 0.0136, |
| "step": 6500 |
| }, |
| { |
| "epoch": 4.216321243523316, |
| "grad_norm": 0.30258408188819885, |
| "learning_rate": 2.976391850971065e-05, |
| "loss": 0.0269, |
| "step": 6510 |
| }, |
| { |
| "epoch": 4.22279792746114, |
| "grad_norm": 0.13562749326229095, |
| "learning_rate": 2.9612829550614836e-05, |
| "loss": 0.0258, |
| "step": 6520 |
| }, |
| { |
| "epoch": 4.229274611398964, |
| "grad_norm": 0.25811293721199036, |
| "learning_rate": 2.9461963542348737e-05, |
| "loss": 0.0143, |
| "step": 6530 |
| }, |
| { |
| "epoch": 4.2357512953367875, |
| "grad_norm": 0.32385483384132385, |
| "learning_rate": 2.931132213475884e-05, |
| "loss": 0.0241, |
| "step": 6540 |
| }, |
| { |
| "epoch": 4.242227979274611, |
| "grad_norm": 0.19972571730613708, |
| "learning_rate": 2.916090697523549e-05, |
| "loss": 0.0171, |
| "step": 6550 |
| }, |
| { |
| "epoch": 4.248704663212435, |
| "grad_norm": 0.21418355405330658, |
| "learning_rate": 2.9010719708694722e-05, |
| "loss": 0.0141, |
| "step": 6560 |
| }, |
| { |
| "epoch": 4.255181347150259, |
| "grad_norm": 0.430324524641037, |
| "learning_rate": 2.8860761977560436e-05, |
| "loss": 0.0191, |
| "step": 6570 |
| }, |
| { |
| "epoch": 4.261658031088083, |
| "grad_norm": 0.20250950753688812, |
| "learning_rate": 2.8711035421746367e-05, |
| "loss": 0.0195, |
| "step": 6580 |
| }, |
| { |
| "epoch": 4.268134715025907, |
| "grad_norm": 0.3518775701522827, |
| "learning_rate": 2.8561541678638142e-05, |
| "loss": 0.0195, |
| "step": 6590 |
| }, |
| { |
| "epoch": 4.274611398963731, |
| "grad_norm": 0.28056877851486206, |
| "learning_rate": 2.8412282383075363e-05, |
| "loss": 0.0145, |
| "step": 6600 |
| }, |
| { |
| "epoch": 4.2810880829015545, |
| "grad_norm": 0.17302846908569336, |
| "learning_rate": 2.8263259167333777e-05, |
| "loss": 0.014, |
| "step": 6610 |
| }, |
| { |
| "epoch": 4.287564766839378, |
| "grad_norm": 0.2156630903482437, |
| "learning_rate": 2.811447366110741e-05, |
| "loss": 0.0194, |
| "step": 6620 |
| }, |
| { |
| "epoch": 4.294041450777202, |
| "grad_norm": 0.20651696622371674, |
| "learning_rate": 2.7965927491490705e-05, |
| "loss": 0.0154, |
| "step": 6630 |
| }, |
| { |
| "epoch": 4.300518134715026, |
| "grad_norm": 0.24434152245521545, |
| "learning_rate": 2.7817622282960815e-05, |
| "loss": 0.0175, |
| "step": 6640 |
| }, |
| { |
| "epoch": 4.30699481865285, |
| "grad_norm": 0.3232467770576477, |
| "learning_rate": 2.766955965735968e-05, |
| "loss": 0.016, |
| "step": 6650 |
| }, |
| { |
| "epoch": 4.313471502590674, |
| "grad_norm": 0.2705603837966919, |
| "learning_rate": 2.7521741233876496e-05, |
| "loss": 0.0198, |
| "step": 6660 |
| }, |
| { |
| "epoch": 4.319948186528498, |
| "grad_norm": 0.302611380815506, |
| "learning_rate": 2.7374168629029813e-05, |
| "loss": 0.0221, |
| "step": 6670 |
| }, |
| { |
| "epoch": 4.3264248704663215, |
| "grad_norm": 0.23167212307453156, |
| "learning_rate": 2.7226843456650037e-05, |
| "loss": 0.0134, |
| "step": 6680 |
| }, |
| { |
| "epoch": 4.332901554404145, |
| "grad_norm": 0.21923419833183289, |
| "learning_rate": 2.707976732786166e-05, |
| "loss": 0.0194, |
| "step": 6690 |
| }, |
| { |
| "epoch": 4.339378238341969, |
| "grad_norm": 0.22099579870700836, |
| "learning_rate": 2.693294185106562e-05, |
| "loss": 0.0178, |
| "step": 6700 |
| }, |
| { |
| "epoch": 4.345854922279793, |
| "grad_norm": 0.2990250885486603, |
| "learning_rate": 2.6786368631921836e-05, |
| "loss": 0.0192, |
| "step": 6710 |
| }, |
| { |
| "epoch": 4.352331606217617, |
| "grad_norm": 0.32166576385498047, |
| "learning_rate": 2.6640049273331515e-05, |
| "loss": 0.0207, |
| "step": 6720 |
| }, |
| { |
| "epoch": 4.358808290155441, |
| "grad_norm": 0.24853219091892242, |
| "learning_rate": 2.6493985375419778e-05, |
| "loss": 0.0196, |
| "step": 6730 |
| }, |
| { |
| "epoch": 4.365284974093265, |
| "grad_norm": 0.223812997341156, |
| "learning_rate": 2.6348178535517966e-05, |
| "loss": 0.0244, |
| "step": 6740 |
| }, |
| { |
| "epoch": 4.3717616580310885, |
| "grad_norm": 0.1604139655828476, |
| "learning_rate": 2.6202630348146324e-05, |
| "loss": 0.0174, |
| "step": 6750 |
| }, |
| { |
| "epoch": 4.3782383419689115, |
| "grad_norm": 0.19458530843257904, |
| "learning_rate": 2.6057342404996522e-05, |
| "loss": 0.0176, |
| "step": 6760 |
| }, |
| { |
| "epoch": 4.384715025906735, |
| "grad_norm": 0.3442190885543823, |
| "learning_rate": 2.591231629491423e-05, |
| "loss": 0.0232, |
| "step": 6770 |
| }, |
| { |
| "epoch": 4.391191709844559, |
| "grad_norm": 0.2418828159570694, |
| "learning_rate": 2.5767553603881767e-05, |
| "loss": 0.0138, |
| "step": 6780 |
| }, |
| { |
| "epoch": 4.397668393782383, |
| "grad_norm": 0.28616422414779663, |
| "learning_rate": 2.562305591500069e-05, |
| "loss": 0.0182, |
| "step": 6790 |
| }, |
| { |
| "epoch": 4.404145077720207, |
| "grad_norm": 0.35665300488471985, |
| "learning_rate": 2.547882480847461e-05, |
| "loss": 0.0256, |
| "step": 6800 |
| }, |
| { |
| "epoch": 4.410621761658031, |
| "grad_norm": 0.28347641229629517, |
| "learning_rate": 2.5334861861591753e-05, |
| "loss": 0.0181, |
| "step": 6810 |
| }, |
| { |
| "epoch": 4.417098445595855, |
| "grad_norm": 0.2930223047733307, |
| "learning_rate": 2.5191168648707887e-05, |
| "loss": 0.0181, |
| "step": 6820 |
| }, |
| { |
| "epoch": 4.4235751295336785, |
| "grad_norm": 0.2513889968395233, |
| "learning_rate": 2.5047746741228978e-05, |
| "loss": 0.0209, |
| "step": 6830 |
| }, |
| { |
| "epoch": 4.430051813471502, |
| "grad_norm": 0.22858624160289764, |
| "learning_rate": 2.490459770759398e-05, |
| "loss": 0.0186, |
| "step": 6840 |
| }, |
| { |
| "epoch": 4.436528497409326, |
| "grad_norm": 0.2156023532152176, |
| "learning_rate": 2.476172311325783e-05, |
| "loss": 0.0154, |
| "step": 6850 |
| }, |
| { |
| "epoch": 4.44300518134715, |
| "grad_norm": 0.21967634558677673, |
| "learning_rate": 2.4619124520674146e-05, |
| "loss": 0.0192, |
| "step": 6860 |
| }, |
| { |
| "epoch": 4.449481865284974, |
| "grad_norm": 0.16934919357299805, |
| "learning_rate": 2.447680348927837e-05, |
| "loss": 0.013, |
| "step": 6870 |
| }, |
| { |
| "epoch": 4.455958549222798, |
| "grad_norm": 0.18204748630523682, |
| "learning_rate": 2.433476157547044e-05, |
| "loss": 0.0128, |
| "step": 6880 |
| }, |
| { |
| "epoch": 4.462435233160622, |
| "grad_norm": 0.2556453049182892, |
| "learning_rate": 2.419300033259798e-05, |
| "loss": 0.0242, |
| "step": 6890 |
| }, |
| { |
| "epoch": 4.4689119170984455, |
| "grad_norm": 0.30375412106513977, |
| "learning_rate": 2.405152131093926e-05, |
| "loss": 0.0123, |
| "step": 6900 |
| }, |
| { |
| "epoch": 4.475388601036269, |
| "grad_norm": 0.19570554792881012, |
| "learning_rate": 2.3910326057686127e-05, |
| "loss": 0.02, |
| "step": 6910 |
| }, |
| { |
| "epoch": 4.481865284974093, |
| "grad_norm": 0.20033107697963715, |
| "learning_rate": 2.3769416116927335e-05, |
| "loss": 0.0199, |
| "step": 6920 |
| }, |
| { |
| "epoch": 4.488341968911917, |
| "grad_norm": 0.22169610857963562, |
| "learning_rate": 2.362879302963135e-05, |
| "loss": 0.0155, |
| "step": 6930 |
| }, |
| { |
| "epoch": 4.494818652849741, |
| "grad_norm": 0.20770221948623657, |
| "learning_rate": 2.3488458333629777e-05, |
| "loss": 0.0183, |
| "step": 6940 |
| }, |
| { |
| "epoch": 4.501295336787565, |
| "grad_norm": 0.2045334279537201, |
| "learning_rate": 2.3348413563600325e-05, |
| "loss": 0.0137, |
| "step": 6950 |
| }, |
| { |
| "epoch": 4.507772020725389, |
| "grad_norm": 0.2747853696346283, |
| "learning_rate": 2.3208660251050158e-05, |
| "loss": 0.0139, |
| "step": 6960 |
| }, |
| { |
| "epoch": 4.5142487046632125, |
| "grad_norm": 0.2516135573387146, |
| "learning_rate": 2.3069199924299174e-05, |
| "loss": 0.0185, |
| "step": 6970 |
| }, |
| { |
| "epoch": 4.520725388601036, |
| "grad_norm": 0.28163227438926697, |
| "learning_rate": 2.29300341084631e-05, |
| "loss": 0.0194, |
| "step": 6980 |
| }, |
| { |
| "epoch": 4.52720207253886, |
| "grad_norm": 0.18591704964637756, |
| "learning_rate": 2.279116432543705e-05, |
| "loss": 0.026, |
| "step": 6990 |
| }, |
| { |
| "epoch": 4.533678756476684, |
| "grad_norm": 0.2261771857738495, |
| "learning_rate": 2.2652592093878666e-05, |
| "loss": 0.0208, |
| "step": 7000 |
| }, |
| { |
| "epoch": 4.540155440414508, |
| "grad_norm": 0.22614069283008575, |
| "learning_rate": 2.251431892919171e-05, |
| "loss": 0.0154, |
| "step": 7010 |
| }, |
| { |
| "epoch": 4.546632124352332, |
| "grad_norm": 0.2026568055152893, |
| "learning_rate": 2.237634634350934e-05, |
| "loss": 0.019, |
| "step": 7020 |
| }, |
| { |
| "epoch": 4.553108808290156, |
| "grad_norm": 0.16602849960327148, |
| "learning_rate": 2.2238675845677663e-05, |
| "loss": 0.0121, |
| "step": 7030 |
| }, |
| { |
| "epoch": 4.5595854922279795, |
| "grad_norm": 0.32185062766075134, |
| "learning_rate": 2.2101308941239203e-05, |
| "loss": 0.0189, |
| "step": 7040 |
| }, |
| { |
| "epoch": 4.566062176165803, |
| "grad_norm": 0.1680583357810974, |
| "learning_rate": 2.196424713241637e-05, |
| "loss": 0.013, |
| "step": 7050 |
| }, |
| { |
| "epoch": 4.572538860103627, |
| "grad_norm": 0.16130873560905457, |
| "learning_rate": 2.182749191809518e-05, |
| "loss": 0.0171, |
| "step": 7060 |
| }, |
| { |
| "epoch": 4.579015544041451, |
| "grad_norm": 0.276980459690094, |
| "learning_rate": 2.1691044793808734e-05, |
| "loss": 0.0154, |
| "step": 7070 |
| }, |
| { |
| "epoch": 4.585492227979275, |
| "grad_norm": 0.2915078401565552, |
| "learning_rate": 2.1554907251720945e-05, |
| "loss": 0.0162, |
| "step": 7080 |
| }, |
| { |
| "epoch": 4.591968911917099, |
| "grad_norm": 0.5446373224258423, |
| "learning_rate": 2.1419080780610123e-05, |
| "loss": 0.0263, |
| "step": 7090 |
| }, |
| { |
| "epoch": 4.598445595854923, |
| "grad_norm": 0.3362918794155121, |
| "learning_rate": 2.128356686585282e-05, |
| "loss": 0.0156, |
| "step": 7100 |
| }, |
| { |
| "epoch": 4.6049222797927465, |
| "grad_norm": 0.2678106427192688, |
| "learning_rate": 2.1148366989407496e-05, |
| "loss": 0.0197, |
| "step": 7110 |
| }, |
| { |
| "epoch": 4.61139896373057, |
| "grad_norm": 0.18777549266815186, |
| "learning_rate": 2.1013482629798333e-05, |
| "loss": 0.0167, |
| "step": 7120 |
| }, |
| { |
| "epoch": 4.617875647668393, |
| "grad_norm": 0.17594756186008453, |
| "learning_rate": 2.0878915262099098e-05, |
| "loss": 0.0215, |
| "step": 7130 |
| }, |
| { |
| "epoch": 4.624352331606218, |
| "grad_norm": 0.22304749488830566, |
| "learning_rate": 2.0744666357916925e-05, |
| "loss": 0.0226, |
| "step": 7140 |
| }, |
| { |
| "epoch": 4.630829015544041, |
| "grad_norm": 0.18945784866809845, |
| "learning_rate": 2.061073738537635e-05, |
| "loss": 0.0134, |
| "step": 7150 |
| }, |
| { |
| "epoch": 4.637305699481866, |
| "grad_norm": 0.152085542678833, |
| "learning_rate": 2.0477129809103147e-05, |
| "loss": 0.0175, |
| "step": 7160 |
| }, |
| { |
| "epoch": 4.643782383419689, |
| "grad_norm": 0.1946026086807251, |
| "learning_rate": 2.0343845090208368e-05, |
| "loss": 0.0223, |
| "step": 7170 |
| }, |
| { |
| "epoch": 4.650259067357513, |
| "grad_norm": 0.17056028544902802, |
| "learning_rate": 2.0210884686272368e-05, |
| "loss": 0.017, |
| "step": 7180 |
| }, |
| { |
| "epoch": 4.6567357512953365, |
| "grad_norm": 0.20490196347236633, |
| "learning_rate": 2.0078250051328784e-05, |
| "loss": 0.0146, |
| "step": 7190 |
| }, |
| { |
| "epoch": 4.66321243523316, |
| "grad_norm": 0.2622607350349426, |
| "learning_rate": 1.9945942635848748e-05, |
| "loss": 0.0184, |
| "step": 7200 |
| }, |
| { |
| "epoch": 4.669689119170984, |
| "grad_norm": 0.27773723006248474, |
| "learning_rate": 1.981396388672496e-05, |
| "loss": 0.0193, |
| "step": 7210 |
| }, |
| { |
| "epoch": 4.676165803108808, |
| "grad_norm": 0.14281229674816132, |
| "learning_rate": 1.9682315247255894e-05, |
| "loss": 0.0115, |
| "step": 7220 |
| }, |
| { |
| "epoch": 4.682642487046632, |
| "grad_norm": 0.40963441133499146, |
| "learning_rate": 1.9550998157129946e-05, |
| "loss": 0.0148, |
| "step": 7230 |
| }, |
| { |
| "epoch": 4.689119170984456, |
| "grad_norm": 0.164134681224823, |
| "learning_rate": 1.942001405240979e-05, |
| "loss": 0.0128, |
| "step": 7240 |
| }, |
| { |
| "epoch": 4.69559585492228, |
| "grad_norm": 0.32639560103416443, |
| "learning_rate": 1.928936436551661e-05, |
| "loss": 0.0153, |
| "step": 7250 |
| }, |
| { |
| "epoch": 4.7020725388601035, |
| "grad_norm": 0.3040475845336914, |
| "learning_rate": 1.9159050525214452e-05, |
| "loss": 0.0217, |
| "step": 7260 |
| }, |
| { |
| "epoch": 4.708549222797927, |
| "grad_norm": 0.15811248123645782, |
| "learning_rate": 1.9029073956594606e-05, |
| "loss": 0.0205, |
| "step": 7270 |
| }, |
| { |
| "epoch": 4.715025906735751, |
| "grad_norm": 0.21602000296115875, |
| "learning_rate": 1.8899436081059975e-05, |
| "loss": 0.0129, |
| "step": 7280 |
| }, |
| { |
| "epoch": 4.721502590673575, |
| "grad_norm": 0.25439000129699707, |
| "learning_rate": 1.877013831630961e-05, |
| "loss": 0.0165, |
| "step": 7290 |
| }, |
| { |
| "epoch": 4.727979274611399, |
| "grad_norm": 0.2095918357372284, |
| "learning_rate": 1.8641182076323148e-05, |
| "loss": 0.024, |
| "step": 7300 |
| }, |
| { |
| "epoch": 4.734455958549223, |
| "grad_norm": 0.20923274755477905, |
| "learning_rate": 1.851256877134538e-05, |
| "loss": 0.0162, |
| "step": 7310 |
| }, |
| { |
| "epoch": 4.740932642487047, |
| "grad_norm": 0.2099110335111618, |
| "learning_rate": 1.838429980787081e-05, |
| "loss": 0.0158, |
| "step": 7320 |
| }, |
| { |
| "epoch": 4.7474093264248705, |
| "grad_norm": 0.30646830797195435, |
| "learning_rate": 1.8256376588628238e-05, |
| "loss": 0.0134, |
| "step": 7330 |
| }, |
| { |
| "epoch": 4.753886010362694, |
| "grad_norm": 0.10917850583791733, |
| "learning_rate": 1.8128800512565513e-05, |
| "loss": 0.0203, |
| "step": 7340 |
| }, |
| { |
| "epoch": 4.760362694300518, |
| "grad_norm": 0.30095070600509644, |
| "learning_rate": 1.800157297483417e-05, |
| "loss": 0.0173, |
| "step": 7350 |
| }, |
| { |
| "epoch": 4.766839378238342, |
| "grad_norm": 0.2577114701271057, |
| "learning_rate": 1.787469536677419e-05, |
| "loss": 0.0197, |
| "step": 7360 |
| }, |
| { |
| "epoch": 4.773316062176166, |
| "grad_norm": 0.3354087471961975, |
| "learning_rate": 1.774816907589873e-05, |
| "loss": 0.0186, |
| "step": 7370 |
| }, |
| { |
| "epoch": 4.77979274611399, |
| "grad_norm": 0.2139454483985901, |
| "learning_rate": 1.7621995485879062e-05, |
| "loss": 0.0207, |
| "step": 7380 |
| }, |
| { |
| "epoch": 4.786269430051814, |
| "grad_norm": 0.15414099395275116, |
| "learning_rate": 1.749617597652934e-05, |
| "loss": 0.0174, |
| "step": 7390 |
| }, |
| { |
| "epoch": 4.7927461139896375, |
| "grad_norm": 0.23587022721767426, |
| "learning_rate": 1.7370711923791567e-05, |
| "loss": 0.0161, |
| "step": 7400 |
| }, |
| { |
| "epoch": 4.799222797927461, |
| "grad_norm": 0.32957470417022705, |
| "learning_rate": 1.7245604699720535e-05, |
| "loss": 0.016, |
| "step": 7410 |
| }, |
| { |
| "epoch": 4.805699481865285, |
| "grad_norm": 0.381789892911911, |
| "learning_rate": 1.712085567246878e-05, |
| "loss": 0.0278, |
| "step": 7420 |
| }, |
| { |
| "epoch": 4.812176165803109, |
| "grad_norm": 0.10974530875682831, |
| "learning_rate": 1.699646620627168e-05, |
| "loss": 0.0204, |
| "step": 7430 |
| }, |
| { |
| "epoch": 4.818652849740933, |
| "grad_norm": 0.13477617502212524, |
| "learning_rate": 1.6872437661432517e-05, |
| "loss": 0.013, |
| "step": 7440 |
| }, |
| { |
| "epoch": 4.825129533678757, |
| "grad_norm": 0.24634115397930145, |
| "learning_rate": 1.6748771394307585e-05, |
| "loss": 0.0168, |
| "step": 7450 |
| }, |
| { |
| "epoch": 4.831606217616581, |
| "grad_norm": 0.20444193482398987, |
| "learning_rate": 1.662546875729138e-05, |
| "loss": 0.0128, |
| "step": 7460 |
| }, |
| { |
| "epoch": 4.8380829015544045, |
| "grad_norm": 0.21118474006652832, |
| "learning_rate": 1.6502531098801753e-05, |
| "loss": 0.0149, |
| "step": 7470 |
| }, |
| { |
| "epoch": 4.844559585492228, |
| "grad_norm": 0.21043027937412262, |
| "learning_rate": 1.637995976326527e-05, |
| "loss": 0.0235, |
| "step": 7480 |
| }, |
| { |
| "epoch": 4.851036269430052, |
| "grad_norm": 0.23634518682956696, |
| "learning_rate": 1.62577560911024e-05, |
| "loss": 0.0152, |
| "step": 7490 |
| }, |
| { |
| "epoch": 4.857512953367876, |
| "grad_norm": 0.11259549856185913, |
| "learning_rate": 1.6135921418712956e-05, |
| "loss": 0.0175, |
| "step": 7500 |
| }, |
| { |
| "epoch": 4.8639896373057, |
| "grad_norm": 0.210161030292511, |
| "learning_rate": 1.6014457078461353e-05, |
| "loss": 0.016, |
| "step": 7510 |
| }, |
| { |
| "epoch": 4.870466321243523, |
| "grad_norm": 0.26830533146858215, |
| "learning_rate": 1.5893364398662176e-05, |
| "loss": 0.02, |
| "step": 7520 |
| }, |
| { |
| "epoch": 4.876943005181348, |
| "grad_norm": 0.2090412825345993, |
| "learning_rate": 1.5772644703565565e-05, |
| "loss": 0.019, |
| "step": 7530 |
| }, |
| { |
| "epoch": 4.883419689119171, |
| "grad_norm": 0.22939516603946686, |
| "learning_rate": 1.5652299313342773e-05, |
| "loss": 0.0136, |
| "step": 7540 |
| }, |
| { |
| "epoch": 4.889896373056995, |
| "grad_norm": 0.1718941479921341, |
| "learning_rate": 1.553232954407171e-05, |
| "loss": 0.0176, |
| "step": 7550 |
| }, |
| { |
| "epoch": 4.896373056994818, |
| "grad_norm": 0.17308102548122406, |
| "learning_rate": 1.5412736707722537e-05, |
| "loss": 0.0176, |
| "step": 7560 |
| }, |
| { |
| "epoch": 4.902849740932642, |
| "grad_norm": 0.11239796131849289, |
| "learning_rate": 1.5293522112143373e-05, |
| "loss": 0.0121, |
| "step": 7570 |
| }, |
| { |
| "epoch": 4.909326424870466, |
| "grad_norm": 0.35807281732559204, |
| "learning_rate": 1.517468706104589e-05, |
| "loss": 0.0146, |
| "step": 7580 |
| }, |
| { |
| "epoch": 4.91580310880829, |
| "grad_norm": 0.34626898169517517, |
| "learning_rate": 1.5056232853991209e-05, |
| "loss": 0.0176, |
| "step": 7590 |
| }, |
| { |
| "epoch": 4.922279792746114, |
| "grad_norm": 0.19787181913852692, |
| "learning_rate": 1.4938160786375572e-05, |
| "loss": 0.0148, |
| "step": 7600 |
| }, |
| { |
| "epoch": 4.928756476683938, |
| "grad_norm": 0.1723092943429947, |
| "learning_rate": 1.4820472149416154e-05, |
| "loss": 0.0154, |
| "step": 7610 |
| }, |
| { |
| "epoch": 4.935233160621761, |
| "grad_norm": 0.12467465549707413, |
| "learning_rate": 1.470316823013707e-05, |
| "loss": 0.0125, |
| "step": 7620 |
| }, |
| { |
| "epoch": 4.941709844559585, |
| "grad_norm": 0.36818578839302063, |
| "learning_rate": 1.4586250311355132e-05, |
| "loss": 0.0238, |
| "step": 7630 |
| }, |
| { |
| "epoch": 4.948186528497409, |
| "grad_norm": 0.17660541832447052, |
| "learning_rate": 1.4469719671666043e-05, |
| "loss": 0.0141, |
| "step": 7640 |
| }, |
| { |
| "epoch": 4.954663212435233, |
| "grad_norm": 0.24274510145187378, |
| "learning_rate": 1.435357758543015e-05, |
| "loss": 0.0171, |
| "step": 7650 |
| }, |
| { |
| "epoch": 4.961139896373057, |
| "grad_norm": 0.3167835772037506, |
| "learning_rate": 1.4237825322758736e-05, |
| "loss": 0.0215, |
| "step": 7660 |
| }, |
| { |
| "epoch": 4.967616580310881, |
| "grad_norm": 0.20456522703170776, |
| "learning_rate": 1.412246414949997e-05, |
| "loss": 0.012, |
| "step": 7670 |
| }, |
| { |
| "epoch": 4.974093264248705, |
| "grad_norm": 0.08046074211597443, |
| "learning_rate": 1.4007495327225162e-05, |
| "loss": 0.0123, |
| "step": 7680 |
| }, |
| { |
| "epoch": 4.980569948186528, |
| "grad_norm": 0.27661871910095215, |
| "learning_rate": 1.389292011321498e-05, |
| "loss": 0.0143, |
| "step": 7690 |
| }, |
| { |
| "epoch": 4.987046632124352, |
| "grad_norm": 0.33071455359458923, |
| "learning_rate": 1.3778739760445552e-05, |
| "loss": 0.0148, |
| "step": 7700 |
| }, |
| { |
| "epoch": 4.993523316062176, |
| "grad_norm": 0.33523082733154297, |
| "learning_rate": 1.3664955517574968e-05, |
| "loss": 0.0136, |
| "step": 7710 |
| }, |
| { |
| "epoch": 5.0, |
| "grad_norm": 0.15871787071228027, |
| "learning_rate": 1.3551568628929434e-05, |
| "loss": 0.0193, |
| "step": 7720 |
| }, |
| { |
| "epoch": 5.006476683937824, |
| "grad_norm": 0.20482423901557922, |
| "learning_rate": 1.343858033448982e-05, |
| "loss": 0.0152, |
| "step": 7730 |
| }, |
| { |
| "epoch": 5.012953367875648, |
| "grad_norm": 0.2882786691188812, |
| "learning_rate": 1.3325991869878013e-05, |
| "loss": 0.0127, |
| "step": 7740 |
| }, |
| { |
| "epoch": 5.019430051813472, |
| "grad_norm": 0.22831624746322632, |
| "learning_rate": 1.3213804466343421e-05, |
| "loss": 0.0184, |
| "step": 7750 |
| }, |
| { |
| "epoch": 5.025906735751295, |
| "grad_norm": 0.12518496811389923, |
| "learning_rate": 1.3102019350749528e-05, |
| "loss": 0.0141, |
| "step": 7760 |
| }, |
| { |
| "epoch": 5.032383419689119, |
| "grad_norm": 0.13528479635715485, |
| "learning_rate": 1.299063774556042e-05, |
| "loss": 0.0085, |
| "step": 7770 |
| }, |
| { |
| "epoch": 5.038860103626943, |
| "grad_norm": 0.1275235116481781, |
| "learning_rate": 1.2879660868827508e-05, |
| "loss": 0.0117, |
| "step": 7780 |
| }, |
| { |
| "epoch": 5.045336787564767, |
| "grad_norm": 0.20098711550235748, |
| "learning_rate": 1.2769089934176126e-05, |
| "loss": 0.0167, |
| "step": 7790 |
| }, |
| { |
| "epoch": 5.051813471502591, |
| "grad_norm": 0.09308230131864548, |
| "learning_rate": 1.2658926150792322e-05, |
| "loss": 0.0135, |
| "step": 7800 |
| }, |
| { |
| "epoch": 5.058290155440415, |
| "grad_norm": 0.1514425277709961, |
| "learning_rate": 1.2549170723409549e-05, |
| "loss": 0.0182, |
| "step": 7810 |
| }, |
| { |
| "epoch": 5.064766839378239, |
| "grad_norm": 0.3412984013557434, |
| "learning_rate": 1.243982485229559e-05, |
| "loss": 0.0125, |
| "step": 7820 |
| }, |
| { |
| "epoch": 5.071243523316062, |
| "grad_norm": 0.08874571323394775, |
| "learning_rate": 1.233088973323937e-05, |
| "loss": 0.0105, |
| "step": 7830 |
| }, |
| { |
| "epoch": 5.077720207253886, |
| "grad_norm": 0.11828942596912384, |
| "learning_rate": 1.2222366557537911e-05, |
| "loss": 0.0158, |
| "step": 7840 |
| }, |
| { |
| "epoch": 5.08419689119171, |
| "grad_norm": 0.24079908430576324, |
| "learning_rate": 1.2114256511983274e-05, |
| "loss": 0.0128, |
| "step": 7850 |
| }, |
| { |
| "epoch": 5.090673575129534, |
| "grad_norm": 0.12796323001384735, |
| "learning_rate": 1.2006560778849578e-05, |
| "loss": 0.0158, |
| "step": 7860 |
| }, |
| { |
| "epoch": 5.097150259067358, |
| "grad_norm": 0.4417702555656433, |
| "learning_rate": 1.1899280535880119e-05, |
| "loss": 0.0192, |
| "step": 7870 |
| }, |
| { |
| "epoch": 5.103626943005182, |
| "grad_norm": 0.29181721806526184, |
| "learning_rate": 1.1792416956274444e-05, |
| "loss": 0.0148, |
| "step": 7880 |
| }, |
| { |
| "epoch": 5.110103626943006, |
| "grad_norm": 0.15089385211467743, |
| "learning_rate": 1.1685971208675539e-05, |
| "loss": 0.0133, |
| "step": 7890 |
| }, |
| { |
| "epoch": 5.116580310880829, |
| "grad_norm": 0.22751882672309875, |
| "learning_rate": 1.157994445715706e-05, |
| "loss": 0.0179, |
| "step": 7900 |
| }, |
| { |
| "epoch": 5.123056994818652, |
| "grad_norm": 0.18326793611049652, |
| "learning_rate": 1.1474337861210543e-05, |
| "loss": 0.0135, |
| "step": 7910 |
| }, |
| { |
| "epoch": 5.129533678756476, |
| "grad_norm": 0.22920016944408417, |
| "learning_rate": 1.1369152575732822e-05, |
| "loss": 0.0128, |
| "step": 7920 |
| }, |
| { |
| "epoch": 5.1360103626943, |
| "grad_norm": 0.3829505145549774, |
| "learning_rate": 1.1264389751013326e-05, |
| "loss": 0.0118, |
| "step": 7930 |
| }, |
| { |
| "epoch": 5.142487046632124, |
| "grad_norm": 0.26591065526008606, |
| "learning_rate": 1.1160050532721528e-05, |
| "loss": 0.0176, |
| "step": 7940 |
| }, |
| { |
| "epoch": 5.148963730569948, |
| "grad_norm": 0.1697339564561844, |
| "learning_rate": 1.1056136061894384e-05, |
| "loss": 0.0161, |
| "step": 7950 |
| }, |
| { |
| "epoch": 5.155440414507772, |
| "grad_norm": 0.23472066223621368, |
| "learning_rate": 1.095264747492391e-05, |
| "loss": 0.0098, |
| "step": 7960 |
| }, |
| { |
| "epoch": 5.1619170984455955, |
| "grad_norm": 0.2254534214735031, |
| "learning_rate": 1.0849585903544706e-05, |
| "loss": 0.0155, |
| "step": 7970 |
| }, |
| { |
| "epoch": 5.168393782383419, |
| "grad_norm": 0.1349540650844574, |
| "learning_rate": 1.0746952474821614e-05, |
| "loss": 0.0127, |
| "step": 7980 |
| }, |
| { |
| "epoch": 5.174870466321243, |
| "grad_norm": 0.27007216215133667, |
| "learning_rate": 1.0644748311137376e-05, |
| "loss": 0.0146, |
| "step": 7990 |
| }, |
| { |
| "epoch": 5.181347150259067, |
| "grad_norm": 0.19731518626213074, |
| "learning_rate": 1.0542974530180327e-05, |
| "loss": 0.0158, |
| "step": 8000 |
| }, |
| { |
| "epoch": 5.187823834196891, |
| "grad_norm": 0.2689395844936371, |
| "learning_rate": 1.0441632244932237e-05, |
| "loss": 0.0154, |
| "step": 8010 |
| }, |
| { |
| "epoch": 5.194300518134715, |
| "grad_norm": 0.2564564645290375, |
| "learning_rate": 1.0340722563656107e-05, |
| "loss": 0.014, |
| "step": 8020 |
| }, |
| { |
| "epoch": 5.200777202072539, |
| "grad_norm": 0.1243646964430809, |
| "learning_rate": 1.0240246589884044e-05, |
| "loss": 0.0123, |
| "step": 8030 |
| }, |
| { |
| "epoch": 5.2072538860103625, |
| "grad_norm": 0.2494179755449295, |
| "learning_rate": 1.0140205422405214e-05, |
| "loss": 0.0139, |
| "step": 8040 |
| }, |
| { |
| "epoch": 5.213730569948186, |
| "grad_norm": 0.18832044303417206, |
| "learning_rate": 1.0040600155253765e-05, |
| "loss": 0.0143, |
| "step": 8050 |
| }, |
| { |
| "epoch": 5.22020725388601, |
| "grad_norm": 0.17419497668743134, |
| "learning_rate": 9.941431877696955e-06, |
| "loss": 0.0131, |
| "step": 8060 |
| }, |
| { |
| "epoch": 5.226683937823834, |
| "grad_norm": 0.42716658115386963, |
| "learning_rate": 9.842701674223187e-06, |
| "loss": 0.0209, |
| "step": 8070 |
| }, |
| { |
| "epoch": 5.233160621761658, |
| "grad_norm": 0.21540680527687073, |
| "learning_rate": 9.744410624530148e-06, |
| "loss": 0.0169, |
| "step": 8080 |
| }, |
| { |
| "epoch": 5.239637305699482, |
| "grad_norm": 0.1386623978614807, |
| "learning_rate": 9.646559803512994e-06, |
| "loss": 0.0137, |
| "step": 8090 |
| }, |
| { |
| "epoch": 5.246113989637306, |
| "grad_norm": 0.21496275067329407, |
| "learning_rate": 9.549150281252633e-06, |
| "loss": 0.0121, |
| "step": 8100 |
| }, |
| { |
| "epoch": 5.2525906735751295, |
| "grad_norm": 0.204043909907341, |
| "learning_rate": 9.452183123004e-06, |
| "loss": 0.0188, |
| "step": 8110 |
| }, |
| { |
| "epoch": 5.259067357512953, |
| "grad_norm": 0.22977150976657867, |
| "learning_rate": 9.355659389184396e-06, |
| "loss": 0.018, |
| "step": 8120 |
| }, |
| { |
| "epoch": 5.265544041450777, |
| "grad_norm": 0.18128716945648193, |
| "learning_rate": 9.259580135361929e-06, |
| "loss": 0.0166, |
| "step": 8130 |
| }, |
| { |
| "epoch": 5.272020725388601, |
| "grad_norm": 0.10355131328105927, |
| "learning_rate": 9.163946412243896e-06, |
| "loss": 0.0138, |
| "step": 8140 |
| }, |
| { |
| "epoch": 5.278497409326425, |
| "grad_norm": 0.1374073177576065, |
| "learning_rate": 9.068759265665384e-06, |
| "loss": 0.0139, |
| "step": 8150 |
| }, |
| { |
| "epoch": 5.284974093264249, |
| "grad_norm": 0.27741244435310364, |
| "learning_rate": 8.974019736577777e-06, |
| "loss": 0.0103, |
| "step": 8160 |
| }, |
| { |
| "epoch": 5.291450777202073, |
| "grad_norm": 0.14168044924736023, |
| "learning_rate": 8.879728861037384e-06, |
| "loss": 0.0146, |
| "step": 8170 |
| }, |
| { |
| "epoch": 5.2979274611398965, |
| "grad_norm": 0.15541480481624603, |
| "learning_rate": 8.785887670194138e-06, |
| "loss": 0.0094, |
| "step": 8180 |
| }, |
| { |
| "epoch": 5.30440414507772, |
| "grad_norm": 0.16454242169857025, |
| "learning_rate": 8.692497190280224e-06, |
| "loss": 0.0173, |
| "step": 8190 |
| }, |
| { |
| "epoch": 5.310880829015544, |
| "grad_norm": 0.16756625473499298, |
| "learning_rate": 8.599558442598998e-06, |
| "loss": 0.0165, |
| "step": 8200 |
| }, |
| { |
| "epoch": 5.317357512953368, |
| "grad_norm": 0.3439781069755554, |
| "learning_rate": 8.507072443513702e-06, |
| "loss": 0.0156, |
| "step": 8210 |
| }, |
| { |
| "epoch": 5.323834196891192, |
| "grad_norm": 0.18753990530967712, |
| "learning_rate": 8.415040204436426e-06, |
| "loss": 0.0141, |
| "step": 8220 |
| }, |
| { |
| "epoch": 5.330310880829016, |
| "grad_norm": 0.18207696080207825, |
| "learning_rate": 8.323462731816961e-06, |
| "loss": 0.015, |
| "step": 8230 |
| }, |
| { |
| "epoch": 5.33678756476684, |
| "grad_norm": 0.15657085180282593, |
| "learning_rate": 8.232341027131885e-06, |
| "loss": 0.0137, |
| "step": 8240 |
| }, |
| { |
| "epoch": 5.3432642487046635, |
| "grad_norm": 0.10936085879802704, |
| "learning_rate": 8.141676086873572e-06, |
| "loss": 0.0172, |
| "step": 8250 |
| }, |
| { |
| "epoch": 5.349740932642487, |
| "grad_norm": 0.26479002833366394, |
| "learning_rate": 8.051468902539272e-06, |
| "loss": 0.0099, |
| "step": 8260 |
| }, |
| { |
| "epoch": 5.356217616580311, |
| "grad_norm": 0.23503267765045166, |
| "learning_rate": 7.96172046062032e-06, |
| "loss": 0.0136, |
| "step": 8270 |
| }, |
| { |
| "epoch": 5.362694300518135, |
| "grad_norm": 0.19794131815433502, |
| "learning_rate": 7.872431742591268e-06, |
| "loss": 0.0165, |
| "step": 8280 |
| }, |
| { |
| "epoch": 5.369170984455959, |
| "grad_norm": 0.24394656717777252, |
| "learning_rate": 7.783603724899257e-06, |
| "loss": 0.0191, |
| "step": 8290 |
| }, |
| { |
| "epoch": 5.375647668393782, |
| "grad_norm": 0.17414677143096924, |
| "learning_rate": 7.695237378953223e-06, |
| "loss": 0.0108, |
| "step": 8300 |
| }, |
| { |
| "epoch": 5.382124352331607, |
| "grad_norm": 0.13544592261314392, |
| "learning_rate": 7.607333671113409e-06, |
| "loss": 0.0082, |
| "step": 8310 |
| }, |
| { |
| "epoch": 5.38860103626943, |
| "grad_norm": 0.1350071132183075, |
| "learning_rate": 7.519893562680663e-06, |
| "loss": 0.0077, |
| "step": 8320 |
| }, |
| { |
| "epoch": 5.3950777202072535, |
| "grad_norm": 0.20169180631637573, |
| "learning_rate": 7.432918009885997e-06, |
| "loss": 0.0178, |
| "step": 8330 |
| }, |
| { |
| "epoch": 5.401554404145077, |
| "grad_norm": 0.19569142162799835, |
| "learning_rate": 7.3464079638801365e-06, |
| "loss": 0.0186, |
| "step": 8340 |
| }, |
| { |
| "epoch": 5.408031088082901, |
| "grad_norm": 0.31749585270881653, |
| "learning_rate": 7.260364370723044e-06, |
| "loss": 0.0155, |
| "step": 8350 |
| }, |
| { |
| "epoch": 5.414507772020725, |
| "grad_norm": 0.18974344432353973, |
| "learning_rate": 7.174788171373731e-06, |
| "loss": 0.0136, |
| "step": 8360 |
| }, |
| { |
| "epoch": 5.420984455958549, |
| "grad_norm": 0.2608870267868042, |
| "learning_rate": 7.089680301679752e-06, |
| "loss": 0.0234, |
| "step": 8370 |
| }, |
| { |
| "epoch": 5.427461139896373, |
| "grad_norm": 0.08300212025642395, |
| "learning_rate": 7.005041692367154e-06, |
| "loss": 0.0143, |
| "step": 8380 |
| }, |
| { |
| "epoch": 5.433937823834197, |
| "grad_norm": 0.17654754221439362, |
| "learning_rate": 6.92087326903022e-06, |
| "loss": 0.0104, |
| "step": 8390 |
| }, |
| { |
| "epoch": 5.4404145077720205, |
| "grad_norm": 0.109446220099926, |
| "learning_rate": 6.837175952121306e-06, |
| "loss": 0.0106, |
| "step": 8400 |
| }, |
| { |
| "epoch": 5.446891191709844, |
| "grad_norm": 0.3720182776451111, |
| "learning_rate": 6.753950656940905e-06, |
| "loss": 0.012, |
| "step": 8410 |
| }, |
| { |
| "epoch": 5.453367875647668, |
| "grad_norm": 0.17289039492607117, |
| "learning_rate": 6.671198293627479e-06, |
| "loss": 0.0119, |
| "step": 8420 |
| }, |
| { |
| "epoch": 5.459844559585492, |
| "grad_norm": 0.21983198821544647, |
| "learning_rate": 6.588919767147639e-06, |
| "loss": 0.0111, |
| "step": 8430 |
| }, |
| { |
| "epoch": 5.466321243523316, |
| "grad_norm": 0.14437580108642578, |
| "learning_rate": 6.5071159772861436e-06, |
| "loss": 0.0152, |
| "step": 8440 |
| }, |
| { |
| "epoch": 5.47279792746114, |
| "grad_norm": 0.30240964889526367, |
| "learning_rate": 6.425787818636131e-06, |
| "loss": 0.014, |
| "step": 8450 |
| }, |
| { |
| "epoch": 5.479274611398964, |
| "grad_norm": 0.3552602529525757, |
| "learning_rate": 6.344936180589351e-06, |
| "loss": 0.0085, |
| "step": 8460 |
| }, |
| { |
| "epoch": 5.4857512953367875, |
| "grad_norm": 0.3316027820110321, |
| "learning_rate": 6.264561947326331e-06, |
| "loss": 0.0128, |
| "step": 8470 |
| }, |
| { |
| "epoch": 5.492227979274611, |
| "grad_norm": 0.17865358293056488, |
| "learning_rate": 6.184665997806832e-06, |
| "loss": 0.0135, |
| "step": 8480 |
| }, |
| { |
| "epoch": 5.498704663212435, |
| "grad_norm": 0.336990624666214, |
| "learning_rate": 6.1052492057601275e-06, |
| "loss": 0.0138, |
| "step": 8490 |
| }, |
| { |
| "epoch": 5.505181347150259, |
| "grad_norm": 0.23426495492458344, |
| "learning_rate": 6.026312439675552e-06, |
| "loss": 0.0149, |
| "step": 8500 |
| }, |
| { |
| "epoch": 5.511658031088083, |
| "grad_norm": 0.2391207218170166, |
| "learning_rate": 5.947856562792925e-06, |
| "loss": 0.0171, |
| "step": 8510 |
| }, |
| { |
| "epoch": 5.518134715025907, |
| "grad_norm": 0.1208881288766861, |
| "learning_rate": 5.869882433093155e-06, |
| "loss": 0.0166, |
| "step": 8520 |
| }, |
| { |
| "epoch": 5.524611398963731, |
| "grad_norm": 0.15781645476818085, |
| "learning_rate": 5.79239090328883e-06, |
| "loss": 0.0109, |
| "step": 8530 |
| }, |
| { |
| "epoch": 5.5310880829015545, |
| "grad_norm": 0.31300756335258484, |
| "learning_rate": 5.715382820814885e-06, |
| "loss": 0.0173, |
| "step": 8540 |
| }, |
| { |
| "epoch": 5.537564766839378, |
| "grad_norm": 0.13253220915794373, |
| "learning_rate": 5.6388590278194096e-06, |
| "loss": 0.0112, |
| "step": 8550 |
| }, |
| { |
| "epoch": 5.544041450777202, |
| "grad_norm": 0.1631617695093155, |
| "learning_rate": 5.562820361154314e-06, |
| "loss": 0.0117, |
| "step": 8560 |
| }, |
| { |
| "epoch": 5.550518134715026, |
| "grad_norm": 0.2857914865016937, |
| "learning_rate": 5.48726765236629e-06, |
| "loss": 0.011, |
| "step": 8570 |
| }, |
| { |
| "epoch": 5.55699481865285, |
| "grad_norm": 0.286710649728775, |
| "learning_rate": 5.412201727687644e-06, |
| "loss": 0.0109, |
| "step": 8580 |
| }, |
| { |
| "epoch": 5.563471502590674, |
| "grad_norm": 0.1950463354587555, |
| "learning_rate": 5.337623408027293e-06, |
| "loss": 0.0143, |
| "step": 8590 |
| }, |
| { |
| "epoch": 5.569948186528498, |
| "grad_norm": 0.1861039400100708, |
| "learning_rate": 5.263533508961827e-06, |
| "loss": 0.0098, |
| "step": 8600 |
| }, |
| { |
| "epoch": 5.5764248704663215, |
| "grad_norm": 0.266740083694458, |
| "learning_rate": 5.1899328407264855e-06, |
| "loss": 0.014, |
| "step": 8610 |
| }, |
| { |
| "epoch": 5.582901554404145, |
| "grad_norm": 0.2564321756362915, |
| "learning_rate": 5.116822208206396e-06, |
| "loss": 0.0104, |
| "step": 8620 |
| }, |
| { |
| "epoch": 5.589378238341969, |
| "grad_norm": 0.20271873474121094, |
| "learning_rate": 5.044202410927706e-06, |
| "loss": 0.017, |
| "step": 8630 |
| }, |
| { |
| "epoch": 5.595854922279793, |
| "grad_norm": 0.12181955575942993, |
| "learning_rate": 4.972074243048897e-06, |
| "loss": 0.0093, |
| "step": 8640 |
| }, |
| { |
| "epoch": 5.602331606217617, |
| "grad_norm": 0.28861793875694275, |
| "learning_rate": 4.900438493352055e-06, |
| "loss": 0.0105, |
| "step": 8650 |
| }, |
| { |
| "epoch": 5.608808290155441, |
| "grad_norm": 0.17971809208393097, |
| "learning_rate": 4.829295945234258e-06, |
| "loss": 0.011, |
| "step": 8660 |
| }, |
| { |
| "epoch": 5.615284974093264, |
| "grad_norm": 0.10693217068910599, |
| "learning_rate": 4.758647376699032e-06, |
| "loss": 0.0072, |
| "step": 8670 |
| }, |
| { |
| "epoch": 5.6217616580310885, |
| "grad_norm": 0.1231376975774765, |
| "learning_rate": 4.688493560347773e-06, |
| "loss": 0.0125, |
| "step": 8680 |
| }, |
| { |
| "epoch": 5.6282383419689115, |
| "grad_norm": 0.31137993931770325, |
| "learning_rate": 4.618835263371396e-06, |
| "loss": 0.0141, |
| "step": 8690 |
| }, |
| { |
| "epoch": 5.634715025906736, |
| "grad_norm": 0.10346370935440063, |
| "learning_rate": 4.549673247541875e-06, |
| "loss": 0.0096, |
| "step": 8700 |
| }, |
| { |
| "epoch": 5.641191709844559, |
| "grad_norm": 0.16310732066631317, |
| "learning_rate": 4.48100826920394e-06, |
| "loss": 0.0119, |
| "step": 8710 |
| }, |
| { |
| "epoch": 5.647668393782383, |
| "grad_norm": 0.12703819572925568, |
| "learning_rate": 4.412841079266777e-06, |
| "loss": 0.0097, |
| "step": 8720 |
| }, |
| { |
| "epoch": 5.654145077720207, |
| "grad_norm": 0.35846859216690063, |
| "learning_rate": 4.3451724231958644e-06, |
| "loss": 0.0152, |
| "step": 8730 |
| }, |
| { |
| "epoch": 5.660621761658031, |
| "grad_norm": 0.12785233557224274, |
| "learning_rate": 4.27800304100478e-06, |
| "loss": 0.0099, |
| "step": 8740 |
| }, |
| { |
| "epoch": 5.667098445595855, |
| "grad_norm": 0.17708571255207062, |
| "learning_rate": 4.2113336672471245e-06, |
| "loss": 0.013, |
| "step": 8750 |
| }, |
| { |
| "epoch": 5.6735751295336785, |
| "grad_norm": 0.4389305114746094, |
| "learning_rate": 4.145165031008508e-06, |
| "loss": 0.0092, |
| "step": 8760 |
| }, |
| { |
| "epoch": 5.680051813471502, |
| "grad_norm": 0.1950322389602661, |
| "learning_rate": 4.079497855898501e-06, |
| "loss": 0.0133, |
| "step": 8770 |
| }, |
| { |
| "epoch": 5.686528497409326, |
| "grad_norm": 0.2934739589691162, |
| "learning_rate": 4.01433286004283e-06, |
| "loss": 0.018, |
| "step": 8780 |
| }, |
| { |
| "epoch": 5.69300518134715, |
| "grad_norm": 0.21868778765201569, |
| "learning_rate": 3.949670756075447e-06, |
| "loss": 0.0178, |
| "step": 8790 |
| }, |
| { |
| "epoch": 5.699481865284974, |
| "grad_norm": 0.12526535987854004, |
| "learning_rate": 3.885512251130763e-06, |
| "loss": 0.0144, |
| "step": 8800 |
| }, |
| { |
| "epoch": 5.705958549222798, |
| "grad_norm": 0.3391956090927124, |
| "learning_rate": 3.821858046835913e-06, |
| "loss": 0.0216, |
| "step": 8810 |
| }, |
| { |
| "epoch": 5.712435233160622, |
| "grad_norm": 0.22200612723827362, |
| "learning_rate": 3.75870883930306e-06, |
| "loss": 0.0102, |
| "step": 8820 |
| }, |
| { |
| "epoch": 5.7189119170984455, |
| "grad_norm": 0.08342672139406204, |
| "learning_rate": 3.696065319121833e-06, |
| "loss": 0.0141, |
| "step": 8830 |
| }, |
| { |
| "epoch": 5.725388601036269, |
| "grad_norm": 0.3359827399253845, |
| "learning_rate": 3.6339281713517303e-06, |
| "loss": 0.0173, |
| "step": 8840 |
| }, |
| { |
| "epoch": 5.731865284974093, |
| "grad_norm": 0.2735726237297058, |
| "learning_rate": 3.5722980755146517e-06, |
| "loss": 0.0186, |
| "step": 8850 |
| }, |
| { |
| "epoch": 5.738341968911917, |
| "grad_norm": 0.2652912735939026, |
| "learning_rate": 3.511175705587433e-06, |
| "loss": 0.0101, |
| "step": 8860 |
| }, |
| { |
| "epoch": 5.744818652849741, |
| "grad_norm": 0.16694048047065735, |
| "learning_rate": 3.4505617299945336e-06, |
| "loss": 0.0114, |
| "step": 8870 |
| }, |
| { |
| "epoch": 5.751295336787565, |
| "grad_norm": 0.268002450466156, |
| "learning_rate": 3.390456811600673e-06, |
| "loss": 0.0199, |
| "step": 8880 |
| }, |
| { |
| "epoch": 5.757772020725389, |
| "grad_norm": 0.1921738237142563, |
| "learning_rate": 3.3308616077036115e-06, |
| "loss": 0.0119, |
| "step": 8890 |
| }, |
| { |
| "epoch": 5.7642487046632125, |
| "grad_norm": 0.17599613964557648, |
| "learning_rate": 3.271776770026963e-06, |
| "loss": 0.0137, |
| "step": 8900 |
| }, |
| { |
| "epoch": 5.770725388601036, |
| "grad_norm": 0.2630617618560791, |
| "learning_rate": 3.213202944713023e-06, |
| "loss": 0.0124, |
| "step": 8910 |
| }, |
| { |
| "epoch": 5.77720207253886, |
| "grad_norm": 0.13770775496959686, |
| "learning_rate": 3.155140772315773e-06, |
| "loss": 0.0125, |
| "step": 8920 |
| }, |
| { |
| "epoch": 5.783678756476684, |
| "grad_norm": 0.1589035540819168, |
| "learning_rate": 3.0975908877938277e-06, |
| "loss": 0.0093, |
| "step": 8930 |
| }, |
| { |
| "epoch": 5.790155440414508, |
| "grad_norm": 0.22588101029396057, |
| "learning_rate": 3.040553920503503e-06, |
| "loss": 0.0124, |
| "step": 8940 |
| }, |
| { |
| "epoch": 5.796632124352332, |
| "grad_norm": 0.15115275979042053, |
| "learning_rate": 2.9840304941919415e-06, |
| "loss": 0.015, |
| "step": 8950 |
| }, |
| { |
| "epoch": 5.803108808290156, |
| "grad_norm": 0.2765119969844818, |
| "learning_rate": 2.928021226990263e-06, |
| "loss": 0.0138, |
| "step": 8960 |
| }, |
| { |
| "epoch": 5.8095854922279795, |
| "grad_norm": 0.10985146462917328, |
| "learning_rate": 2.8725267314068495e-06, |
| "loss": 0.0123, |
| "step": 8970 |
| }, |
| { |
| "epoch": 5.816062176165803, |
| "grad_norm": 0.135583758354187, |
| "learning_rate": 2.817547614320615e-06, |
| "loss": 0.0134, |
| "step": 8980 |
| }, |
| { |
| "epoch": 5.822538860103627, |
| "grad_norm": 0.10807247459888458, |
| "learning_rate": 2.7630844769743757e-06, |
| "loss": 0.0109, |
| "step": 8990 |
| }, |
| { |
| "epoch": 5.829015544041451, |
| "grad_norm": 0.30291974544525146, |
| "learning_rate": 2.7091379149682685e-06, |
| "loss": 0.0145, |
| "step": 9000 |
| }, |
| { |
| "epoch": 5.835492227979275, |
| "grad_norm": 0.21395935118198395, |
| "learning_rate": 2.6557085182532582e-06, |
| "loss": 0.0225, |
| "step": 9010 |
| }, |
| { |
| "epoch": 5.841968911917099, |
| "grad_norm": 0.15883110463619232, |
| "learning_rate": 2.602796871124663e-06, |
| "loss": 0.0156, |
| "step": 9020 |
| }, |
| { |
| "epoch": 5.848445595854923, |
| "grad_norm": 0.20018735527992249, |
| "learning_rate": 2.5504035522157854e-06, |
| "loss": 0.0166, |
| "step": 9030 |
| }, |
| { |
| "epoch": 5.8549222797927465, |
| "grad_norm": 0.17231498658657074, |
| "learning_rate": 2.4985291344915674e-06, |
| "loss": 0.0115, |
| "step": 9040 |
| }, |
| { |
| "epoch": 5.86139896373057, |
| "grad_norm": 0.12376800179481506, |
| "learning_rate": 2.4471741852423237e-06, |
| "loss": 0.0117, |
| "step": 9050 |
| }, |
| { |
| "epoch": 5.867875647668393, |
| "grad_norm": 0.1932302713394165, |
| "learning_rate": 2.3963392660775575e-06, |
| "loss": 0.0169, |
| "step": 9060 |
| }, |
| { |
| "epoch": 5.874352331606218, |
| "grad_norm": 0.2442460060119629, |
| "learning_rate": 2.3460249329197824e-06, |
| "loss": 0.0119, |
| "step": 9070 |
| }, |
| { |
| "epoch": 5.880829015544041, |
| "grad_norm": 0.40588802099227905, |
| "learning_rate": 2.296231735998511e-06, |
| "loss": 0.0123, |
| "step": 9080 |
| }, |
| { |
| "epoch": 5.887305699481866, |
| "grad_norm": 0.13834908604621887, |
| "learning_rate": 2.2469602198441573e-06, |
| "loss": 0.0124, |
| "step": 9090 |
| }, |
| { |
| "epoch": 5.893782383419689, |
| "grad_norm": 0.12029829621315002, |
| "learning_rate": 2.1982109232821178e-06, |
| "loss": 0.0125, |
| "step": 9100 |
| }, |
| { |
| "epoch": 5.900259067357513, |
| "grad_norm": 0.23821647465229034, |
| "learning_rate": 2.149984379426906e-06, |
| "loss": 0.0104, |
| "step": 9110 |
| }, |
| { |
| "epoch": 5.9067357512953365, |
| "grad_norm": 0.14522571861743927, |
| "learning_rate": 2.102281115676258e-06, |
| "loss": 0.0095, |
| "step": 9120 |
| }, |
| { |
| "epoch": 5.91321243523316, |
| "grad_norm": 0.32294297218322754, |
| "learning_rate": 2.0551016537054493e-06, |
| "loss": 0.0195, |
| "step": 9130 |
| }, |
| { |
| "epoch": 5.919689119170984, |
| "grad_norm": 0.2826516628265381, |
| "learning_rate": 2.008446509461498e-06, |
| "loss": 0.0114, |
| "step": 9140 |
| }, |
| { |
| "epoch": 5.926165803108808, |
| "grad_norm": 0.19465412199497223, |
| "learning_rate": 1.962316193157593e-06, |
| "loss": 0.0108, |
| "step": 9150 |
| }, |
| { |
| "epoch": 5.932642487046632, |
| "grad_norm": 0.14356905221939087, |
| "learning_rate": 1.91671120926748e-06, |
| "loss": 0.0102, |
| "step": 9160 |
| }, |
| { |
| "epoch": 5.939119170984456, |
| "grad_norm": 0.1268174797296524, |
| "learning_rate": 1.8716320565199618e-06, |
| "loss": 0.0116, |
| "step": 9170 |
| }, |
| { |
| "epoch": 5.94559585492228, |
| "grad_norm": 0.4275663197040558, |
| "learning_rate": 1.8270792278934302e-06, |
| "loss": 0.0117, |
| "step": 9180 |
| }, |
| { |
| "epoch": 5.9520725388601035, |
| "grad_norm": 0.2674071192741394, |
| "learning_rate": 1.7830532106104747e-06, |
| "loss": 0.0129, |
| "step": 9190 |
| }, |
| { |
| "epoch": 5.958549222797927, |
| "grad_norm": 0.1318097561597824, |
| "learning_rate": 1.7395544861325718e-06, |
| "loss": 0.0089, |
| "step": 9200 |
| }, |
| { |
| "epoch": 5.965025906735751, |
| "grad_norm": 0.1907251924276352, |
| "learning_rate": 1.696583530154794e-06, |
| "loss": 0.0184, |
| "step": 9210 |
| }, |
| { |
| "epoch": 5.971502590673575, |
| "grad_norm": 0.12996065616607666, |
| "learning_rate": 1.6541408126006463e-06, |
| "loss": 0.0133, |
| "step": 9220 |
| }, |
| { |
| "epoch": 5.977979274611399, |
| "grad_norm": 0.1298728883266449, |
| "learning_rate": 1.6122267976168781e-06, |
| "loss": 0.0139, |
| "step": 9230 |
| }, |
| { |
| "epoch": 5.984455958549223, |
| "grad_norm": 0.23814009130001068, |
| "learning_rate": 1.5708419435684462e-06, |
| "loss": 0.0173, |
| "step": 9240 |
| }, |
| { |
| "epoch": 5.990932642487047, |
| "grad_norm": 0.2996975779533386, |
| "learning_rate": 1.5299867030334814e-06, |
| "loss": 0.0137, |
| "step": 9250 |
| }, |
| { |
| "epoch": 5.9974093264248705, |
| "grad_norm": 0.13092438876628876, |
| "learning_rate": 1.4896615227983468e-06, |
| "loss": 0.0119, |
| "step": 9260 |
| }, |
| { |
| "epoch": 6.003886010362694, |
| "grad_norm": 0.1732238382101059, |
| "learning_rate": 1.4498668438527597e-06, |
| "loss": 0.0134, |
| "step": 9270 |
| }, |
| { |
| "epoch": 6.010362694300518, |
| "grad_norm": 0.17689648270606995, |
| "learning_rate": 1.4106031013849496e-06, |
| "loss": 0.0073, |
| "step": 9280 |
| }, |
| { |
| "epoch": 6.016839378238342, |
| "grad_norm": 0.13013668358325958, |
| "learning_rate": 1.3718707247769135e-06, |
| "loss": 0.0157, |
| "step": 9290 |
| }, |
| { |
| "epoch": 6.023316062176166, |
| "grad_norm": 0.149629145860672, |
| "learning_rate": 1.333670137599713e-06, |
| "loss": 0.012, |
| "step": 9300 |
| }, |
| { |
| "epoch": 6.02979274611399, |
| "grad_norm": 0.12040393799543381, |
| "learning_rate": 1.2960017576088446e-06, |
| "loss": 0.0175, |
| "step": 9310 |
| }, |
| { |
| "epoch": 6.036269430051814, |
| "grad_norm": 0.28257250785827637, |
| "learning_rate": 1.2588659967397e-06, |
| "loss": 0.0281, |
| "step": 9320 |
| }, |
| { |
| "epoch": 6.0427461139896375, |
| "grad_norm": 0.1398344486951828, |
| "learning_rate": 1.222263261102985e-06, |
| "loss": 0.0151, |
| "step": 9330 |
| }, |
| { |
| "epoch": 6.049222797927461, |
| "grad_norm": 0.294515460729599, |
| "learning_rate": 1.1861939509803687e-06, |
| "loss": 0.0141, |
| "step": 9340 |
| }, |
| { |
| "epoch": 6.055699481865285, |
| "grad_norm": 0.06825648248195648, |
| "learning_rate": 1.1506584608200367e-06, |
| "loss": 0.0092, |
| "step": 9350 |
| }, |
| { |
| "epoch": 6.062176165803109, |
| "grad_norm": 0.06912140548229218, |
| "learning_rate": 1.1156571792324211e-06, |
| "loss": 0.008, |
| "step": 9360 |
| }, |
| { |
| "epoch": 6.068652849740933, |
| "grad_norm": 0.1832340657711029, |
| "learning_rate": 1.0811904889859336e-06, |
| "loss": 0.0195, |
| "step": 9370 |
| }, |
| { |
| "epoch": 6.075129533678757, |
| "grad_norm": 0.15293700993061066, |
| "learning_rate": 1.0472587670027678e-06, |
| "loss": 0.0075, |
| "step": 9380 |
| }, |
| { |
| "epoch": 6.081606217616581, |
| "grad_norm": 0.18111123144626617, |
| "learning_rate": 1.0138623843548078e-06, |
| "loss": 0.0173, |
| "step": 9390 |
| }, |
| { |
| "epoch": 6.0880829015544045, |
| "grad_norm": 0.1301887333393097, |
| "learning_rate": 9.810017062595322e-07, |
| "loss": 0.0125, |
| "step": 9400 |
| }, |
| { |
| "epoch": 6.094559585492228, |
| "grad_norm": 0.12335970997810364, |
| "learning_rate": 9.486770920760668e-07, |
| "loss": 0.0102, |
| "step": 9410 |
| }, |
| { |
| "epoch": 6.101036269430052, |
| "grad_norm": 0.1748531013727188, |
| "learning_rate": 9.168888953011989e-07, |
| "loss": 0.0129, |
| "step": 9420 |
| }, |
| { |
| "epoch": 6.107512953367876, |
| "grad_norm": 0.21782076358795166, |
| "learning_rate": 8.856374635655695e-07, |
| "loss": 0.0125, |
| "step": 9430 |
| }, |
| { |
| "epoch": 6.1139896373057, |
| "grad_norm": 0.269727498292923, |
| "learning_rate": 8.549231386298151e-07, |
| "loss": 0.0142, |
| "step": 9440 |
| }, |
| { |
| "epoch": 6.120466321243524, |
| "grad_norm": 0.10577181726694107, |
| "learning_rate": 8.247462563808817e-07, |
| "loss": 0.0093, |
| "step": 9450 |
| }, |
| { |
| "epoch": 6.126943005181348, |
| "grad_norm": 0.10951922833919525, |
| "learning_rate": 7.951071468283167e-07, |
| "loss": 0.0142, |
| "step": 9460 |
| }, |
| { |
| "epoch": 6.133419689119171, |
| "grad_norm": 0.181460440158844, |
| "learning_rate": 7.66006134100672e-07, |
| "loss": 0.0112, |
| "step": 9470 |
| }, |
| { |
| "epoch": 6.139896373056994, |
| "grad_norm": 0.10866732150316238, |
| "learning_rate": 7.374435364419674e-07, |
| "loss": 0.0076, |
| "step": 9480 |
| }, |
| { |
| "epoch": 6.146373056994818, |
| "grad_norm": 0.21688801050186157, |
| "learning_rate": 7.094196662081831e-07, |
| "loss": 0.0131, |
| "step": 9490 |
| }, |
| { |
| "epoch": 6.152849740932642, |
| "grad_norm": 0.17768456041812897, |
| "learning_rate": 6.819348298638839e-07, |
| "loss": 0.0104, |
| "step": 9500 |
| }, |
| { |
| "epoch": 6.159326424870466, |
| "grad_norm": 0.1749340444803238, |
| "learning_rate": 6.549893279788277e-07, |
| "loss": 0.0126, |
| "step": 9510 |
| }, |
| { |
| "epoch": 6.16580310880829, |
| "grad_norm": 0.08541977405548096, |
| "learning_rate": 6.285834552247128e-07, |
| "loss": 0.0097, |
| "step": 9520 |
| }, |
| { |
| "epoch": 6.172279792746114, |
| "grad_norm": 0.17559358477592468, |
| "learning_rate": 6.027175003719354e-07, |
| "loss": 0.0128, |
| "step": 9530 |
| }, |
| { |
| "epoch": 6.178756476683938, |
| "grad_norm": 0.16296590864658356, |
| "learning_rate": 5.773917462864264e-07, |
| "loss": 0.0137, |
| "step": 9540 |
| }, |
| { |
| "epoch": 6.185233160621761, |
| "grad_norm": 0.27558839321136475, |
| "learning_rate": 5.526064699265753e-07, |
| "loss": 0.0128, |
| "step": 9550 |
| }, |
| { |
| "epoch": 6.191709844559585, |
| "grad_norm": 0.26449841260910034, |
| "learning_rate": 5.283619423401998e-07, |
| "loss": 0.0139, |
| "step": 9560 |
| }, |
| { |
| "epoch": 6.198186528497409, |
| "grad_norm": 0.21926769614219666, |
| "learning_rate": 5.046584286615697e-07, |
| "loss": 0.0132, |
| "step": 9570 |
| }, |
| { |
| "epoch": 6.204663212435233, |
| "grad_norm": 0.08541421592235565, |
| "learning_rate": 4.814961881085045e-07, |
| "loss": 0.0058, |
| "step": 9580 |
| }, |
| { |
| "epoch": 6.211139896373057, |
| "grad_norm": 0.08796455711126328, |
| "learning_rate": 4.5887547397955864e-07, |
| "loss": 0.0103, |
| "step": 9590 |
| }, |
| { |
| "epoch": 6.217616580310881, |
| "grad_norm": 0.22742117941379547, |
| "learning_rate": 4.367965336512403e-07, |
| "loss": 0.0179, |
| "step": 9600 |
| }, |
| { |
| "epoch": 6.224093264248705, |
| "grad_norm": 0.09479006379842758, |
| "learning_rate": 4.1525960857530243e-07, |
| "loss": 0.0117, |
| "step": 9610 |
| }, |
| { |
| "epoch": 6.230569948186528, |
| "grad_norm": 0.24150408804416656, |
| "learning_rate": 3.9426493427611177e-07, |
| "loss": 0.0125, |
| "step": 9620 |
| }, |
| { |
| "epoch": 6.237046632124352, |
| "grad_norm": 0.07416193932294846, |
| "learning_rate": 3.738127403480507e-07, |
| "loss": 0.0125, |
| "step": 9630 |
| }, |
| { |
| "epoch": 6.243523316062176, |
| "grad_norm": 0.1948796957731247, |
| "learning_rate": 3.5390325045304706e-07, |
| "loss": 0.0089, |
| "step": 9640 |
| }, |
| { |
| "epoch": 6.25, |
| "grad_norm": 0.15250228345394135, |
| "learning_rate": 3.3453668231809286e-07, |
| "loss": 0.0127, |
| "step": 9650 |
| }, |
| { |
| "epoch": 6.256476683937824, |
| "grad_norm": 0.1568685621023178, |
| "learning_rate": 3.157132477328628e-07, |
| "loss": 0.009, |
| "step": 9660 |
| }, |
| { |
| "epoch": 6.262953367875648, |
| "grad_norm": 0.23393931984901428, |
| "learning_rate": 2.9743315254743833e-07, |
| "loss": 0.023, |
| "step": 9670 |
| }, |
| { |
| "epoch": 6.269430051813472, |
| "grad_norm": 0.24607960879802704, |
| "learning_rate": 2.796965966699927e-07, |
| "loss": 0.0222, |
| "step": 9680 |
| }, |
| { |
| "epoch": 6.275906735751295, |
| "grad_norm": 0.2668175995349884, |
| "learning_rate": 2.625037740646763e-07, |
| "loss": 0.007, |
| "step": 9690 |
| }, |
| { |
| "epoch": 6.282383419689119, |
| "grad_norm": 0.1123218759894371, |
| "learning_rate": 2.458548727494292e-07, |
| "loss": 0.0178, |
| "step": 9700 |
| }, |
| { |
| "epoch": 6.288860103626943, |
| "grad_norm": 0.08573432266712189, |
| "learning_rate": 2.2975007479397738e-07, |
| "loss": 0.0097, |
| "step": 9710 |
| }, |
| { |
| "epoch": 6.295336787564767, |
| "grad_norm": 0.24590806663036346, |
| "learning_rate": 2.1418955631781202e-07, |
| "loss": 0.0102, |
| "step": 9720 |
| }, |
| { |
| "epoch": 6.301813471502591, |
| "grad_norm": 0.12596063315868378, |
| "learning_rate": 1.9917348748826335e-07, |
| "loss": 0.0142, |
| "step": 9730 |
| }, |
| { |
| "epoch": 6.308290155440415, |
| "grad_norm": 0.2864455282688141, |
| "learning_rate": 1.847020325186577e-07, |
| "loss": 0.0143, |
| "step": 9740 |
| }, |
| { |
| "epoch": 6.314766839378239, |
| "grad_norm": 0.31299856305122375, |
| "learning_rate": 1.7077534966650766e-07, |
| "loss": 0.0123, |
| "step": 9750 |
| }, |
| { |
| "epoch": 6.321243523316062, |
| "grad_norm": 0.2245355099439621, |
| "learning_rate": 1.5739359123178587e-07, |
| "loss": 0.0141, |
| "step": 9760 |
| }, |
| { |
| "epoch": 6.327720207253886, |
| "grad_norm": 0.23238977789878845, |
| "learning_rate": 1.4455690355525964e-07, |
| "loss": 0.0085, |
| "step": 9770 |
| }, |
| { |
| "epoch": 6.33419689119171, |
| "grad_norm": 0.1620815098285675, |
| "learning_rate": 1.3226542701689215e-07, |
| "loss": 0.0121, |
| "step": 9780 |
| }, |
| { |
| "epoch": 6.340673575129534, |
| "grad_norm": 0.14035151898860931, |
| "learning_rate": 1.2051929603428825e-07, |
| "loss": 0.0153, |
| "step": 9790 |
| }, |
| { |
| "epoch": 6.347150259067358, |
| "grad_norm": 0.12476951628923416, |
| "learning_rate": 1.0931863906127327e-07, |
| "loss": 0.0094, |
| "step": 9800 |
| }, |
| { |
| "epoch": 6.353626943005182, |
| "grad_norm": 0.1264141947031021, |
| "learning_rate": 9.866357858642205e-08, |
| "loss": 0.0108, |
| "step": 9810 |
| }, |
| { |
| "epoch": 6.360103626943006, |
| "grad_norm": 0.19493091106414795, |
| "learning_rate": 8.855423113177664e-08, |
| "loss": 0.0137, |
| "step": 9820 |
| }, |
| { |
| "epoch": 6.366580310880829, |
| "grad_norm": 0.2744383215904236, |
| "learning_rate": 7.899070725153613e-08, |
| "loss": 0.0088, |
| "step": 9830 |
| }, |
| { |
| "epoch": 6.373056994818652, |
| "grad_norm": 0.1168895810842514, |
| "learning_rate": 6.997311153086883e-08, |
| "loss": 0.0094, |
| "step": 9840 |
| }, |
| { |
| "epoch": 6.379533678756477, |
| "grad_norm": 0.15993544459342957, |
| "learning_rate": 6.150154258476315e-08, |
| "loss": 0.0146, |
| "step": 9850 |
| }, |
| { |
| "epoch": 6.3860103626943, |
| "grad_norm": 0.13558878004550934, |
| "learning_rate": 5.3576093056922906e-08, |
| "loss": 0.0111, |
| "step": 9860 |
| }, |
| { |
| "epoch": 6.392487046632124, |
| "grad_norm": 0.21038438379764557, |
| "learning_rate": 4.619684961881254e-08, |
| "loss": 0.0112, |
| "step": 9870 |
| }, |
| { |
| "epoch": 6.398963730569948, |
| "grad_norm": 0.24471516907215118, |
| "learning_rate": 3.936389296864129e-08, |
| "loss": 0.0093, |
| "step": 9880 |
| }, |
| { |
| "epoch": 6.405440414507772, |
| "grad_norm": 0.05535868927836418, |
| "learning_rate": 3.3077297830541584e-08, |
| "loss": 0.0146, |
| "step": 9890 |
| }, |
| { |
| "epoch": 6.4119170984455955, |
| "grad_norm": 0.34832239151000977, |
| "learning_rate": 2.7337132953697554e-08, |
| "loss": 0.0164, |
| "step": 9900 |
| }, |
| { |
| "epoch": 6.418393782383419, |
| "grad_norm": 0.11602523177862167, |
| "learning_rate": 2.214346111164556e-08, |
| "loss": 0.013, |
| "step": 9910 |
| }, |
| { |
| "epoch": 6.424870466321243, |
| "grad_norm": 0.12291716039180756, |
| "learning_rate": 1.749633910153592e-08, |
| "loss": 0.0098, |
| "step": 9920 |
| }, |
| { |
| "epoch": 6.431347150259067, |
| "grad_norm": 0.1366141438484192, |
| "learning_rate": 1.3395817743561134e-08, |
| "loss": 0.0132, |
| "step": 9930 |
| }, |
| { |
| "epoch": 6.437823834196891, |
| "grad_norm": 0.1532369703054428, |
| "learning_rate": 9.841941880361916e-09, |
| "loss": 0.0151, |
| "step": 9940 |
| }, |
| { |
| "epoch": 6.444300518134715, |
| "grad_norm": 0.12662509083747864, |
| "learning_rate": 6.834750376549792e-09, |
| "loss": 0.014, |
| "step": 9950 |
| }, |
| { |
| "epoch": 6.450777202072539, |
| "grad_norm": 0.07399441301822662, |
| "learning_rate": 4.3742761183018784e-09, |
| "loss": 0.0217, |
| "step": 9960 |
| }, |
| { |
| "epoch": 6.4572538860103625, |
| "grad_norm": 0.29164353013038635, |
| "learning_rate": 2.4605460129556445e-09, |
| "loss": 0.0128, |
| "step": 9970 |
| }, |
| { |
| "epoch": 6.463730569948186, |
| "grad_norm": 0.27305173873901367, |
| "learning_rate": 1.0935809887702154e-09, |
| "loss": 0.0113, |
| "step": 9980 |
| }, |
| { |
| "epoch": 6.47020725388601, |
| "grad_norm": 0.10412518680095673, |
| "learning_rate": 2.7339599464326627e-10, |
| "loss": 0.0121, |
| "step": 9990 |
| }, |
| { |
| "epoch": 6.476683937823834, |
| "grad_norm": 0.16107480227947235, |
| "learning_rate": 0.0, |
| "loss": 0.0127, |
| "step": 10000 |
| }, |
| { |
| "epoch": 6.476683937823834, |
| "step": 10000, |
| "total_flos": 3.3046784608923354e+17, |
| "train_loss": 0.039629857166856526, |
| "train_runtime": 2421.3782, |
| "train_samples_per_second": 66.078, |
| "train_steps_per_second": 4.13 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 10000, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 7, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 3.3046784608923354e+17, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|