{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 1000, "global_step": 43943, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00022756753066472475, "grad_norm": 812.0, "learning_rate": 2.0477815699658704e-07, "loss": 11.2718, "step": 10 }, { "epoch": 0.0004551350613294495, "grad_norm": 884.0, "learning_rate": 4.3230944254835047e-07, "loss": 10.3364, "step": 20 }, { "epoch": 0.0006827025919941743, "grad_norm": 752.0, "learning_rate": 6.598407281001138e-07, "loss": 7.8486, "step": 30 }, { "epoch": 0.000910270122658899, "grad_norm": 540.0, "learning_rate": 8.873720136518772e-07, "loss": 7.4714, "step": 40 }, { "epoch": 0.0011378376533236238, "grad_norm": 544.0, "learning_rate": 1.1149032992036406e-06, "loss": 8.5469, "step": 50 }, { "epoch": 0.0013654051839883485, "grad_norm": 1192.0, "learning_rate": 1.342434584755404e-06, "loss": 10.141, "step": 60 }, { "epoch": 0.0015929727146530733, "grad_norm": 916.0, "learning_rate": 1.5699658703071675e-06, "loss": 6.4641, "step": 70 }, { "epoch": 0.001820540245317798, "grad_norm": 482.0, "learning_rate": 1.7974971558589307e-06, "loss": 5.5834, "step": 80 }, { "epoch": 0.002048107775982523, "grad_norm": 1480.0, "learning_rate": 2.025028441410694e-06, "loss": 5.2911, "step": 90 }, { "epoch": 0.0022756753066472475, "grad_norm": 660.0, "learning_rate": 2.2525597269624573e-06, "loss": 3.8162, "step": 100 }, { "epoch": 0.0025032428373119725, "grad_norm": 312.0, "learning_rate": 2.4800910125142208e-06, "loss": 3.4865, "step": 110 }, { "epoch": 0.002730810367976697, "grad_norm": 332.0, "learning_rate": 2.7076222980659846e-06, "loss": 1.789, "step": 120 }, { "epoch": 0.002958377898641422, "grad_norm": 772.0, "learning_rate": 2.9351535836177476e-06, "loss": 2.034, "step": 130 }, { "epoch": 0.0031859454293061465, "grad_norm": 251.0, "learning_rate": 3.162684869169511e-06, "loss": 2.5315, "step": 140 }, { "epoch": 0.0034135129599708715, "grad_norm": 256.0, "learning_rate": 3.390216154721274e-06, "loss": 2.2956, "step": 150 }, { "epoch": 0.003641080490635596, "grad_norm": 334.0, "learning_rate": 3.617747440273038e-06, "loss": 1.6865, "step": 160 }, { "epoch": 0.003868648021300321, "grad_norm": 828.0, "learning_rate": 3.845278725824802e-06, "loss": 1.9043, "step": 170 }, { "epoch": 0.004096215551965046, "grad_norm": 76.0, "learning_rate": 4.072810011376565e-06, "loss": 1.1723, "step": 180 }, { "epoch": 0.0043237830826297705, "grad_norm": 756.0, "learning_rate": 4.300341296928328e-06, "loss": 1.1716, "step": 190 }, { "epoch": 0.004551350613294495, "grad_norm": 53.75, "learning_rate": 4.527872582480091e-06, "loss": 1.0108, "step": 200 }, { "epoch": 0.00477891814395922, "grad_norm": 976.0, "learning_rate": 4.755403868031855e-06, "loss": 1.2443, "step": 210 }, { "epoch": 0.005006485674623945, "grad_norm": 201.0, "learning_rate": 4.982935153583618e-06, "loss": 1.0298, "step": 220 }, { "epoch": 0.0052340532052886695, "grad_norm": 556.0, "learning_rate": 5.210466439135382e-06, "loss": 1.5186, "step": 230 }, { "epoch": 0.005461620735953394, "grad_norm": 20.0, "learning_rate": 5.437997724687145e-06, "loss": 0.6934, "step": 240 }, { "epoch": 0.005689188266618119, "grad_norm": 234.0, "learning_rate": 5.665529010238908e-06, "loss": 1.6481, "step": 250 }, { "epoch": 0.005916755797282844, "grad_norm": 98.0, "learning_rate": 5.893060295790671e-06, "loss": 1.8, "step": 260 }, { "epoch": 0.0061443233279475686, "grad_norm": 1.453125, "learning_rate": 6.120591581342435e-06, "loss": 1.9409, "step": 270 }, { "epoch": 0.006371890858612293, "grad_norm": 452.0, "learning_rate": 6.348122866894198e-06, "loss": 1.3056, "step": 280 }, { "epoch": 0.006599458389277018, "grad_norm": 212.0, "learning_rate": 6.575654152445961e-06, "loss": 1.5522, "step": 290 }, { "epoch": 0.006827025919941743, "grad_norm": 944.0, "learning_rate": 6.803185437997726e-06, "loss": 0.947, "step": 300 }, { "epoch": 0.0070545934506064676, "grad_norm": 728.0, "learning_rate": 7.030716723549489e-06, "loss": 1.137, "step": 310 }, { "epoch": 0.007282160981271192, "grad_norm": 126.0, "learning_rate": 7.258248009101252e-06, "loss": 0.692, "step": 320 }, { "epoch": 0.007509728511935917, "grad_norm": 215.0, "learning_rate": 7.485779294653015e-06, "loss": 0.93, "step": 330 }, { "epoch": 0.007737296042600642, "grad_norm": 732.0, "learning_rate": 7.713310580204779e-06, "loss": 1.4648, "step": 340 }, { "epoch": 0.007964863573265367, "grad_norm": 1.0625, "learning_rate": 7.940841865756542e-06, "loss": 1.1252, "step": 350 }, { "epoch": 0.008192431103930092, "grad_norm": 13.9375, "learning_rate": 8.168373151308305e-06, "loss": 1.1186, "step": 360 }, { "epoch": 0.008419998634594816, "grad_norm": 108.5, "learning_rate": 8.395904436860068e-06, "loss": 1.6202, "step": 370 }, { "epoch": 0.008647566165259541, "grad_norm": 0.033447265625, "learning_rate": 8.623435722411833e-06, "loss": 1.4176, "step": 380 }, { "epoch": 0.008875133695924265, "grad_norm": 450.0, "learning_rate": 8.850967007963596e-06, "loss": 1.8093, "step": 390 }, { "epoch": 0.00910270122658899, "grad_norm": 95.0, "learning_rate": 9.078498293515359e-06, "loss": 1.8751, "step": 400 }, { "epoch": 0.009330268757253716, "grad_norm": 185.0, "learning_rate": 9.306029579067123e-06, "loss": 1.7214, "step": 410 }, { "epoch": 0.00955783628791844, "grad_norm": 7.59375, "learning_rate": 9.533560864618886e-06, "loss": 0.9538, "step": 420 }, { "epoch": 0.009785403818583165, "grad_norm": 370.0, "learning_rate": 9.76109215017065e-06, "loss": 1.9393, "step": 430 }, { "epoch": 0.01001297134924789, "grad_norm": 178.0, "learning_rate": 9.988623435722412e-06, "loss": 1.9089, "step": 440 }, { "epoch": 0.010240538879912614, "grad_norm": 342.0, "learning_rate": 1.0216154721274177e-05, "loss": 0.6716, "step": 450 }, { "epoch": 0.010468106410577339, "grad_norm": 241.0, "learning_rate": 1.044368600682594e-05, "loss": 2.0107, "step": 460 }, { "epoch": 0.010695673941242063, "grad_norm": 202.0, "learning_rate": 1.0671217292377703e-05, "loss": 1.106, "step": 470 }, { "epoch": 0.010923241471906788, "grad_norm": 272.0, "learning_rate": 1.0898748577929466e-05, "loss": 1.1183, "step": 480 }, { "epoch": 0.011150809002571514, "grad_norm": 187.0, "learning_rate": 1.1126279863481229e-05, "loss": 0.7857, "step": 490 }, { "epoch": 0.011378376533236237, "grad_norm": 80.5, "learning_rate": 1.1353811149032994e-05, "loss": 1.2253, "step": 500 }, { "epoch": 0.011605944063900963, "grad_norm": 280.0, "learning_rate": 1.1581342434584757e-05, "loss": 0.9281, "step": 510 }, { "epoch": 0.011833511594565688, "grad_norm": 178.0, "learning_rate": 1.180887372013652e-05, "loss": 1.0197, "step": 520 }, { "epoch": 0.012061079125230412, "grad_norm": 45.25, "learning_rate": 1.2036405005688283e-05, "loss": 0.924, "step": 530 }, { "epoch": 0.012288646655895137, "grad_norm": 0.7109375, "learning_rate": 1.2263936291240046e-05, "loss": 1.0238, "step": 540 }, { "epoch": 0.01251621418655986, "grad_norm": 0.01190185546875, "learning_rate": 1.2491467576791809e-05, "loss": 0.7098, "step": 550 }, { "epoch": 0.012743781717224586, "grad_norm": 24.625, "learning_rate": 1.2718998862343572e-05, "loss": 0.7466, "step": 560 }, { "epoch": 0.012971349247889312, "grad_norm": 96.0, "learning_rate": 1.2946530147895338e-05, "loss": 0.7371, "step": 570 }, { "epoch": 0.013198916778554035, "grad_norm": 233.0, "learning_rate": 1.3174061433447101e-05, "loss": 2.01, "step": 580 }, { "epoch": 0.01342648430921876, "grad_norm": 5.4375, "learning_rate": 1.3401592718998864e-05, "loss": 0.6927, "step": 590 }, { "epoch": 0.013654051839883486, "grad_norm": 164.0, "learning_rate": 1.3629124004550627e-05, "loss": 1.0115, "step": 600 }, { "epoch": 0.01388161937054821, "grad_norm": 278.0, "learning_rate": 1.385665529010239e-05, "loss": 1.4222, "step": 610 }, { "epoch": 0.014109186901212935, "grad_norm": 20.5, "learning_rate": 1.4084186575654153e-05, "loss": 1.1939, "step": 620 }, { "epoch": 0.01433675443187766, "grad_norm": 184.0, "learning_rate": 1.4311717861205916e-05, "loss": 1.2414, "step": 630 }, { "epoch": 0.014564321962542384, "grad_norm": 78.5, "learning_rate": 1.4539249146757681e-05, "loss": 1.2714, "step": 640 }, { "epoch": 0.01479188949320711, "grad_norm": 300.0, "learning_rate": 1.4766780432309444e-05, "loss": 1.0122, "step": 650 }, { "epoch": 0.015019457023871833, "grad_norm": 318.0, "learning_rate": 1.4994311717861207e-05, "loss": 1.2019, "step": 660 }, { "epoch": 0.015247024554536559, "grad_norm": 692.0, "learning_rate": 1.522184300341297e-05, "loss": 1.1806, "step": 670 }, { "epoch": 0.015474592085201284, "grad_norm": 5.90625, "learning_rate": 1.5449374288964735e-05, "loss": 0.9984, "step": 680 }, { "epoch": 0.015702159615866008, "grad_norm": 524.0, "learning_rate": 1.5676905574516498e-05, "loss": 1.6459, "step": 690 }, { "epoch": 0.015929727146530733, "grad_norm": 53.0, "learning_rate": 1.590443686006826e-05, "loss": 0.3876, "step": 700 }, { "epoch": 0.01615729467719546, "grad_norm": 150.0, "learning_rate": 1.6131968145620024e-05, "loss": 1.0511, "step": 710 }, { "epoch": 0.016384862207860184, "grad_norm": 532.0, "learning_rate": 1.6359499431171787e-05, "loss": 1.1115, "step": 720 }, { "epoch": 0.016612429738524906, "grad_norm": 212.0, "learning_rate": 1.658703071672355e-05, "loss": 1.2553, "step": 730 }, { "epoch": 0.01683999726918963, "grad_norm": 161.0, "learning_rate": 1.6814562002275313e-05, "loss": 1.1006, "step": 740 }, { "epoch": 0.017067564799854357, "grad_norm": 17.625, "learning_rate": 1.7042093287827076e-05, "loss": 1.7845, "step": 750 }, { "epoch": 0.017295132330519082, "grad_norm": 158.0, "learning_rate": 1.726962457337884e-05, "loss": 1.8899, "step": 760 }, { "epoch": 0.017522699861183808, "grad_norm": 61.75, "learning_rate": 1.7497155858930602e-05, "loss": 0.6635, "step": 770 }, { "epoch": 0.01775026739184853, "grad_norm": 380.0, "learning_rate": 1.7724687144482368e-05, "loss": 0.9673, "step": 780 }, { "epoch": 0.017977834922513255, "grad_norm": 270.0, "learning_rate": 1.795221843003413e-05, "loss": 2.6468, "step": 790 }, { "epoch": 0.01820540245317798, "grad_norm": 211.0, "learning_rate": 1.8179749715585894e-05, "loss": 0.5992, "step": 800 }, { "epoch": 0.018432969983842706, "grad_norm": 176.0, "learning_rate": 1.8407281001137657e-05, "loss": 0.5747, "step": 810 }, { "epoch": 0.01866053751450743, "grad_norm": 692.0, "learning_rate": 1.863481228668942e-05, "loss": 0.9565, "step": 820 }, { "epoch": 0.018888105045172156, "grad_norm": 366.0, "learning_rate": 1.8862343572241183e-05, "loss": 2.7505, "step": 830 }, { "epoch": 0.01911567257583688, "grad_norm": 80.0, "learning_rate": 1.9089874857792946e-05, "loss": 1.6672, "step": 840 }, { "epoch": 0.019343240106501604, "grad_norm": 0.052734375, "learning_rate": 1.9317406143344713e-05, "loss": 1.2575, "step": 850 }, { "epoch": 0.01957080763716633, "grad_norm": 48.75, "learning_rate": 1.9544937428896476e-05, "loss": 1.5053, "step": 860 }, { "epoch": 0.019798375167831055, "grad_norm": 4.71875, "learning_rate": 1.977246871444824e-05, "loss": 0.8211, "step": 870 }, { "epoch": 0.02002594269849578, "grad_norm": 35.5, "learning_rate": 2e-05, "loss": 1.5695, "step": 880 }, { "epoch": 0.020253510229160502, "grad_norm": 62.25, "learning_rate": 1.999535574958202e-05, "loss": 1.2563, "step": 890 }, { "epoch": 0.020481077759825227, "grad_norm": 184.0, "learning_rate": 1.9990711499164036e-05, "loss": 1.0591, "step": 900 }, { "epoch": 0.020708645290489953, "grad_norm": 494.0, "learning_rate": 1.9986067248746056e-05, "loss": 1.5684, "step": 910 }, { "epoch": 0.020936212821154678, "grad_norm": 50.0, "learning_rate": 1.998142299832807e-05, "loss": 1.0449, "step": 920 }, { "epoch": 0.021163780351819404, "grad_norm": 99.0, "learning_rate": 1.9976778747910087e-05, "loss": 1.009, "step": 930 }, { "epoch": 0.021391347882484125, "grad_norm": 132.0, "learning_rate": 1.9972134497492107e-05, "loss": 1.0228, "step": 940 }, { "epoch": 0.02161891541314885, "grad_norm": 532.0, "learning_rate": 1.9967490247074124e-05, "loss": 2.0096, "step": 950 }, { "epoch": 0.021846482943813576, "grad_norm": 61.0, "learning_rate": 1.996284599665614e-05, "loss": 1.0553, "step": 960 }, { "epoch": 0.0220740504744783, "grad_norm": 104.5, "learning_rate": 1.995820174623816e-05, "loss": 1.25, "step": 970 }, { "epoch": 0.022301618005143027, "grad_norm": 42.75, "learning_rate": 1.9953557495820176e-05, "loss": 0.8325, "step": 980 }, { "epoch": 0.022529185535807753, "grad_norm": 0.1728515625, "learning_rate": 1.9948913245402193e-05, "loss": 0.5951, "step": 990 }, { "epoch": 0.022756753066472474, "grad_norm": 143.0, "learning_rate": 1.994426899498421e-05, "loss": 0.3057, "step": 1000 }, { "epoch": 0.0229843205971372, "grad_norm": 106.0, "learning_rate": 1.9939624744566227e-05, "loss": 1.384, "step": 1010 }, { "epoch": 0.023211888127801925, "grad_norm": 211.0, "learning_rate": 1.9934980494148247e-05, "loss": 2.4723, "step": 1020 }, { "epoch": 0.02343945565846665, "grad_norm": 564.0, "learning_rate": 1.9930336243730264e-05, "loss": 1.3514, "step": 1030 }, { "epoch": 0.023667023189131376, "grad_norm": 7.5, "learning_rate": 1.992569199331228e-05, "loss": 2.2944, "step": 1040 }, { "epoch": 0.023894590719796098, "grad_norm": 0.09423828125, "learning_rate": 1.99210477428943e-05, "loss": 0.3816, "step": 1050 }, { "epoch": 0.024122158250460823, "grad_norm": 0.001495361328125, "learning_rate": 1.9916403492476315e-05, "loss": 0.9169, "step": 1060 }, { "epoch": 0.02434972578112555, "grad_norm": 118.0, "learning_rate": 1.9911759242058332e-05, "loss": 1.8365, "step": 1070 }, { "epoch": 0.024577293311790274, "grad_norm": 113.5, "learning_rate": 1.990711499164035e-05, "loss": 1.2963, "step": 1080 }, { "epoch": 0.024804860842455, "grad_norm": 0.083984375, "learning_rate": 1.990247074122237e-05, "loss": 0.5726, "step": 1090 }, { "epoch": 0.02503242837311972, "grad_norm": 148.0, "learning_rate": 1.9897826490804387e-05, "loss": 0.8862, "step": 1100 }, { "epoch": 0.025259995903784447, "grad_norm": 133.0, "learning_rate": 1.9893182240386404e-05, "loss": 0.4664, "step": 1110 }, { "epoch": 0.025487563434449172, "grad_norm": 127.0, "learning_rate": 1.988853798996842e-05, "loss": 1.0277, "step": 1120 }, { "epoch": 0.025715130965113898, "grad_norm": 141.0, "learning_rate": 1.9883893739550438e-05, "loss": 2.3615, "step": 1130 }, { "epoch": 0.025942698495778623, "grad_norm": 0.26171875, "learning_rate": 1.9879249489132455e-05, "loss": 0.6918, "step": 1140 }, { "epoch": 0.02617026602644335, "grad_norm": 212.0, "learning_rate": 1.9874605238714472e-05, "loss": 1.9012, "step": 1150 }, { "epoch": 0.02639783355710807, "grad_norm": 0.0908203125, "learning_rate": 1.986996098829649e-05, "loss": 1.6964, "step": 1160 }, { "epoch": 0.026625401087772796, "grad_norm": 494.0, "learning_rate": 1.986531673787851e-05, "loss": 1.7084, "step": 1170 }, { "epoch": 0.02685296861843752, "grad_norm": 412.0, "learning_rate": 1.9860672487460527e-05, "loss": 1.8972, "step": 1180 }, { "epoch": 0.027080536149102247, "grad_norm": 157.0, "learning_rate": 1.9856028237042544e-05, "loss": 1.0771, "step": 1190 }, { "epoch": 0.027308103679766972, "grad_norm": 89.5, "learning_rate": 1.985138398662456e-05, "loss": 1.5232, "step": 1200 }, { "epoch": 0.027535671210431694, "grad_norm": 2.046875, "learning_rate": 1.9846739736206578e-05, "loss": 0.568, "step": 1210 }, { "epoch": 0.02776323874109642, "grad_norm": 0.00019168853759765625, "learning_rate": 1.9842095485788595e-05, "loss": 1.0279, "step": 1220 }, { "epoch": 0.027990806271761145, "grad_norm": 314.0, "learning_rate": 1.9837451235370612e-05, "loss": 1.6179, "step": 1230 }, { "epoch": 0.02821837380242587, "grad_norm": 920.0, "learning_rate": 1.983280698495263e-05, "loss": 1.7443, "step": 1240 }, { "epoch": 0.028445941333090596, "grad_norm": 93.0, "learning_rate": 1.982816273453465e-05, "loss": 0.8336, "step": 1250 }, { "epoch": 0.02867350886375532, "grad_norm": 0.0016937255859375, "learning_rate": 1.9823518484116667e-05, "loss": 1.2344, "step": 1260 }, { "epoch": 0.028901076394420043, "grad_norm": 146.0, "learning_rate": 1.981887423369868e-05, "loss": 1.2899, "step": 1270 }, { "epoch": 0.02912864392508477, "grad_norm": 145.0, "learning_rate": 1.98142299832807e-05, "loss": 1.1792, "step": 1280 }, { "epoch": 0.029356211455749494, "grad_norm": 131.0, "learning_rate": 1.9809585732862718e-05, "loss": 1.18, "step": 1290 }, { "epoch": 0.02958377898641422, "grad_norm": 324.0, "learning_rate": 1.9804941482444735e-05, "loss": 1.3393, "step": 1300 }, { "epoch": 0.029811346517078945, "grad_norm": 134.0, "learning_rate": 1.9800297232026752e-05, "loss": 1.2237, "step": 1310 }, { "epoch": 0.030038914047743667, "grad_norm": 13.3125, "learning_rate": 1.979565298160877e-05, "loss": 1.9049, "step": 1320 }, { "epoch": 0.030266481578408392, "grad_norm": 0.0030517578125, "learning_rate": 1.979100873119079e-05, "loss": 0.7462, "step": 1330 }, { "epoch": 0.030494049109073117, "grad_norm": 252.0, "learning_rate": 1.9786364480772803e-05, "loss": 2.214, "step": 1340 }, { "epoch": 0.030721616639737843, "grad_norm": 440.0, "learning_rate": 1.978172023035482e-05, "loss": 1.1145, "step": 1350 }, { "epoch": 0.030949184170402568, "grad_norm": 169.0, "learning_rate": 1.977707597993684e-05, "loss": 1.1504, "step": 1360 }, { "epoch": 0.03117675170106729, "grad_norm": 51.5, "learning_rate": 1.9772431729518858e-05, "loss": 1.1276, "step": 1370 }, { "epoch": 0.031404319231732016, "grad_norm": 202.0, "learning_rate": 1.9767787479100875e-05, "loss": 1.4093, "step": 1380 }, { "epoch": 0.03163188676239674, "grad_norm": 8.5, "learning_rate": 1.9763143228682892e-05, "loss": 0.7489, "step": 1390 }, { "epoch": 0.031859454293061466, "grad_norm": 49.0, "learning_rate": 1.9758498978264912e-05, "loss": 1.858, "step": 1400 }, { "epoch": 0.03208702182372619, "grad_norm": 80.5, "learning_rate": 1.9753854727846926e-05, "loss": 0.9938, "step": 1410 }, { "epoch": 0.03231458935439092, "grad_norm": 16.375, "learning_rate": 1.9749210477428943e-05, "loss": 1.2759, "step": 1420 }, { "epoch": 0.03254215688505564, "grad_norm": 46.75, "learning_rate": 1.9744566227010963e-05, "loss": 1.0914, "step": 1430 }, { "epoch": 0.03276972441572037, "grad_norm": 0.484375, "learning_rate": 1.973992197659298e-05, "loss": 0.8914, "step": 1440 }, { "epoch": 0.032997291946385086, "grad_norm": 196.0, "learning_rate": 1.9735277726174997e-05, "loss": 1.3525, "step": 1450 }, { "epoch": 0.03322485947704981, "grad_norm": 216.0, "learning_rate": 1.9730633475757015e-05, "loss": 1.2875, "step": 1460 }, { "epoch": 0.03345242700771454, "grad_norm": 130.0, "learning_rate": 1.972598922533903e-05, "loss": 0.8692, "step": 1470 }, { "epoch": 0.03367999453837926, "grad_norm": 34.5, "learning_rate": 1.972134497492105e-05, "loss": 1.2848, "step": 1480 }, { "epoch": 0.03390756206904399, "grad_norm": 1.3046875, "learning_rate": 1.9716700724503066e-05, "loss": 1.1641, "step": 1490 }, { "epoch": 0.03413512959970871, "grad_norm": 95.0, "learning_rate": 1.9712056474085083e-05, "loss": 0.8262, "step": 1500 }, { "epoch": 0.03436269713037344, "grad_norm": 0.1142578125, "learning_rate": 1.9707412223667103e-05, "loss": 1.7248, "step": 1510 }, { "epoch": 0.034590264661038164, "grad_norm": 330.0, "learning_rate": 1.970276797324912e-05, "loss": 0.7362, "step": 1520 }, { "epoch": 0.03481783219170289, "grad_norm": 135.0, "learning_rate": 1.9698123722831137e-05, "loss": 0.5457, "step": 1530 }, { "epoch": 0.035045399722367615, "grad_norm": 23.75, "learning_rate": 1.9693479472413154e-05, "loss": 0.5933, "step": 1540 }, { "epoch": 0.03527296725303234, "grad_norm": 188.0, "learning_rate": 1.968883522199517e-05, "loss": 1.4926, "step": 1550 }, { "epoch": 0.03550053478369706, "grad_norm": 0.5625, "learning_rate": 1.968419097157719e-05, "loss": 0.7648, "step": 1560 }, { "epoch": 0.035728102314361784, "grad_norm": 0.06005859375, "learning_rate": 1.9679546721159206e-05, "loss": 1.7118, "step": 1570 }, { "epoch": 0.03595566984502651, "grad_norm": 616.0, "learning_rate": 1.9674902470741223e-05, "loss": 1.1693, "step": 1580 }, { "epoch": 0.036183237375691235, "grad_norm": 0.80859375, "learning_rate": 1.9670258220323243e-05, "loss": 0.4716, "step": 1590 }, { "epoch": 0.03641080490635596, "grad_norm": 253.0, "learning_rate": 1.966561396990526e-05, "loss": 0.9428, "step": 1600 }, { "epoch": 0.036638372437020686, "grad_norm": 450.0, "learning_rate": 1.9660969719487277e-05, "loss": 1.91, "step": 1610 }, { "epoch": 0.03686593996768541, "grad_norm": 218.0, "learning_rate": 1.9656325469069294e-05, "loss": 0.4805, "step": 1620 }, { "epoch": 0.03709350749835014, "grad_norm": 2.09375, "learning_rate": 1.965168121865131e-05, "loss": 1.2511, "step": 1630 }, { "epoch": 0.03732107502901486, "grad_norm": 7.65625, "learning_rate": 1.9647036968233328e-05, "loss": 0.8677, "step": 1640 }, { "epoch": 0.03754864255967959, "grad_norm": 7.84375, "learning_rate": 1.9642392717815345e-05, "loss": 0.6325, "step": 1650 }, { "epoch": 0.03777621009034431, "grad_norm": 1.203125, "learning_rate": 1.9637748467397362e-05, "loss": 0.6305, "step": 1660 }, { "epoch": 0.03800377762100903, "grad_norm": 0.0186767578125, "learning_rate": 1.9633104216979383e-05, "loss": 0.8823, "step": 1670 }, { "epoch": 0.03823134515167376, "grad_norm": 218.0, "learning_rate": 1.96284599665614e-05, "loss": 1.872, "step": 1680 }, { "epoch": 0.03845891268233848, "grad_norm": 80.0, "learning_rate": 1.9623815716143414e-05, "loss": 2.0605, "step": 1690 }, { "epoch": 0.03868648021300321, "grad_norm": 300.0, "learning_rate": 1.9619171465725434e-05, "loss": 1.421, "step": 1700 }, { "epoch": 0.03891404774366793, "grad_norm": 1.25, "learning_rate": 1.961452721530745e-05, "loss": 0.7913, "step": 1710 }, { "epoch": 0.03914161527433266, "grad_norm": 60.0, "learning_rate": 1.9609882964889468e-05, "loss": 0.6761, "step": 1720 }, { "epoch": 0.039369182804997384, "grad_norm": 162.0, "learning_rate": 1.9605238714471485e-05, "loss": 1.4394, "step": 1730 }, { "epoch": 0.03959675033566211, "grad_norm": 74.5, "learning_rate": 1.9600594464053506e-05, "loss": 1.4238, "step": 1740 }, { "epoch": 0.039824317866326835, "grad_norm": 8.5, "learning_rate": 1.9595950213635523e-05, "loss": 0.8131, "step": 1750 }, { "epoch": 0.04005188539699156, "grad_norm": 38.5, "learning_rate": 1.9591305963217536e-05, "loss": 0.4427, "step": 1760 }, { "epoch": 0.040279452927656285, "grad_norm": 155.0, "learning_rate": 1.9586661712799553e-05, "loss": 0.8234, "step": 1770 }, { "epoch": 0.040507020458321004, "grad_norm": 14.8125, "learning_rate": 1.9582017462381574e-05, "loss": 1.557, "step": 1780 }, { "epoch": 0.04073458798898573, "grad_norm": 239.0, "learning_rate": 1.957737321196359e-05, "loss": 1.9888, "step": 1790 }, { "epoch": 0.040962155519650455, "grad_norm": 23.5, "learning_rate": 1.9572728961545608e-05, "loss": 0.8928, "step": 1800 }, { "epoch": 0.04118972305031518, "grad_norm": 27.25, "learning_rate": 1.9568084711127625e-05, "loss": 1.3141, "step": 1810 }, { "epoch": 0.041417290580979906, "grad_norm": 1.125, "learning_rate": 1.9563440460709645e-05, "loss": 1.3772, "step": 1820 }, { "epoch": 0.04164485811164463, "grad_norm": 201.0, "learning_rate": 1.955879621029166e-05, "loss": 2.2851, "step": 1830 }, { "epoch": 0.041872425642309356, "grad_norm": 134.0, "learning_rate": 1.9554151959873676e-05, "loss": 1.3275, "step": 1840 }, { "epoch": 0.04209999317297408, "grad_norm": 143.0, "learning_rate": 1.9549507709455697e-05, "loss": 1.6835, "step": 1850 }, { "epoch": 0.04232756070363881, "grad_norm": 105.5, "learning_rate": 1.9544863459037714e-05, "loss": 1.5849, "step": 1860 }, { "epoch": 0.04255512823430353, "grad_norm": 55.25, "learning_rate": 1.954021920861973e-05, "loss": 0.785, "step": 1870 }, { "epoch": 0.04278269576496825, "grad_norm": 239.0, "learning_rate": 1.9535574958201748e-05, "loss": 1.3229, "step": 1880 }, { "epoch": 0.043010263295632976, "grad_norm": 11.0625, "learning_rate": 1.9530930707783765e-05, "loss": 0.8176, "step": 1890 }, { "epoch": 0.0432378308262977, "grad_norm": 0.0126953125, "learning_rate": 1.9526286457365782e-05, "loss": 0.9469, "step": 1900 }, { "epoch": 0.04346539835696243, "grad_norm": 251.0, "learning_rate": 1.95216422069478e-05, "loss": 1.5199, "step": 1910 }, { "epoch": 0.04369296588762715, "grad_norm": 227.0, "learning_rate": 1.9516997956529816e-05, "loss": 0.6017, "step": 1920 }, { "epoch": 0.04392053341829188, "grad_norm": 223.0, "learning_rate": 1.9512353706111836e-05, "loss": 0.2719, "step": 1930 }, { "epoch": 0.0441481009489566, "grad_norm": 188.0, "learning_rate": 1.9507709455693854e-05, "loss": 3.0157, "step": 1940 }, { "epoch": 0.04437566847962133, "grad_norm": 50.5, "learning_rate": 1.950306520527587e-05, "loss": 0.5819, "step": 1950 }, { "epoch": 0.044603236010286054, "grad_norm": 0.185546875, "learning_rate": 1.9498420954857888e-05, "loss": 0.272, "step": 1960 }, { "epoch": 0.04483080354095078, "grad_norm": 572.0, "learning_rate": 1.9493776704439905e-05, "loss": 1.2198, "step": 1970 }, { "epoch": 0.045058371071615505, "grad_norm": 13.3125, "learning_rate": 1.9489132454021922e-05, "loss": 1.1429, "step": 1980 }, { "epoch": 0.045285938602280223, "grad_norm": 0.09130859375, "learning_rate": 1.948448820360394e-05, "loss": 0.8416, "step": 1990 }, { "epoch": 0.04551350613294495, "grad_norm": 64.0, "learning_rate": 1.9479843953185956e-05, "loss": 0.7706, "step": 2000 }, { "epoch": 0.045741073663609674, "grad_norm": 147.0, "learning_rate": 1.9475199702767976e-05, "loss": 0.6702, "step": 2010 }, { "epoch": 0.0459686411942744, "grad_norm": 142.0, "learning_rate": 1.9470555452349993e-05, "loss": 2.0384, "step": 2020 }, { "epoch": 0.046196208724939125, "grad_norm": 87.0, "learning_rate": 1.946591120193201e-05, "loss": 0.8487, "step": 2030 }, { "epoch": 0.04642377625560385, "grad_norm": 0.2314453125, "learning_rate": 1.9461266951514027e-05, "loss": 0.8028, "step": 2040 }, { "epoch": 0.046651343786268576, "grad_norm": 100.0, "learning_rate": 1.9456622701096044e-05, "loss": 1.4738, "step": 2050 }, { "epoch": 0.0468789113169333, "grad_norm": 96.5, "learning_rate": 1.945197845067806e-05, "loss": 0.7392, "step": 2060 }, { "epoch": 0.04710647884759803, "grad_norm": 476.0, "learning_rate": 1.944733420026008e-05, "loss": 1.2594, "step": 2070 }, { "epoch": 0.04733404637826275, "grad_norm": 152.0, "learning_rate": 1.9442689949842096e-05, "loss": 1.0102, "step": 2080 }, { "epoch": 0.04756161390892748, "grad_norm": 0.19140625, "learning_rate": 1.9438045699424116e-05, "loss": 1.063, "step": 2090 }, { "epoch": 0.047789181439592196, "grad_norm": 180.0, "learning_rate": 1.9433401449006133e-05, "loss": 1.1975, "step": 2100 }, { "epoch": 0.04801674897025692, "grad_norm": 1.0859375, "learning_rate": 1.9428757198588147e-05, "loss": 1.7231, "step": 2110 }, { "epoch": 0.04824431650092165, "grad_norm": 0.251953125, "learning_rate": 1.9424112948170167e-05, "loss": 1.8199, "step": 2120 }, { "epoch": 0.04847188403158637, "grad_norm": 0.58203125, "learning_rate": 1.9419468697752184e-05, "loss": 0.9709, "step": 2130 }, { "epoch": 0.0486994515622511, "grad_norm": 201.0, "learning_rate": 1.94148244473342e-05, "loss": 0.8072, "step": 2140 }, { "epoch": 0.04892701909291582, "grad_norm": 52.25, "learning_rate": 1.941018019691622e-05, "loss": 0.488, "step": 2150 }, { "epoch": 0.04915458662358055, "grad_norm": 196.0, "learning_rate": 1.940553594649824e-05, "loss": 0.6251, "step": 2160 }, { "epoch": 0.049382154154245274, "grad_norm": 6.375, "learning_rate": 1.9400891696080256e-05, "loss": 0.5646, "step": 2170 }, { "epoch": 0.04960972168491, "grad_norm": 294.0, "learning_rate": 1.939624744566227e-05, "loss": 1.6046, "step": 2180 }, { "epoch": 0.049837289215574725, "grad_norm": 336.0, "learning_rate": 1.939160319524429e-05, "loss": 2.6019, "step": 2190 }, { "epoch": 0.05006485674623944, "grad_norm": 356.0, "learning_rate": 1.9386958944826307e-05, "loss": 0.6041, "step": 2200 }, { "epoch": 0.05029242427690417, "grad_norm": 48.25, "learning_rate": 1.9382314694408324e-05, "loss": 0.6656, "step": 2210 }, { "epoch": 0.050519991807568894, "grad_norm": 94.5, "learning_rate": 1.937767044399034e-05, "loss": 1.0116, "step": 2220 }, { "epoch": 0.05074755933823362, "grad_norm": 396.0, "learning_rate": 1.9373026193572358e-05, "loss": 1.7477, "step": 2230 }, { "epoch": 0.050975126868898345, "grad_norm": 122.5, "learning_rate": 1.936838194315438e-05, "loss": 1.3148, "step": 2240 }, { "epoch": 0.05120269439956307, "grad_norm": 322.0, "learning_rate": 1.9363737692736392e-05, "loss": 0.99, "step": 2250 }, { "epoch": 0.051430261930227796, "grad_norm": 4.25, "learning_rate": 1.935909344231841e-05, "loss": 0.9586, "step": 2260 }, { "epoch": 0.05165782946089252, "grad_norm": 386.0, "learning_rate": 1.935444919190043e-05, "loss": 1.4372, "step": 2270 }, { "epoch": 0.051885396991557246, "grad_norm": 226.0, "learning_rate": 1.9349804941482447e-05, "loss": 1.2, "step": 2280 }, { "epoch": 0.05211296452222197, "grad_norm": 0.09765625, "learning_rate": 1.9345160691064464e-05, "loss": 0.7493, "step": 2290 }, { "epoch": 0.0523405320528867, "grad_norm": 450.0, "learning_rate": 1.934051644064648e-05, "loss": 1.3263, "step": 2300 }, { "epoch": 0.052568099583551416, "grad_norm": 249.0, "learning_rate": 1.9335872190228498e-05, "loss": 1.5568, "step": 2310 }, { "epoch": 0.05279566711421614, "grad_norm": 111.5, "learning_rate": 1.9331227939810515e-05, "loss": 0.9231, "step": 2320 }, { "epoch": 0.053023234644880866, "grad_norm": 0.095703125, "learning_rate": 1.9326583689392532e-05, "loss": 0.8617, "step": 2330 }, { "epoch": 0.05325080217554559, "grad_norm": 66.0, "learning_rate": 1.932193943897455e-05, "loss": 1.2371, "step": 2340 }, { "epoch": 0.05347836970621032, "grad_norm": 123.5, "learning_rate": 1.931729518855657e-05, "loss": 1.4119, "step": 2350 }, { "epoch": 0.05370593723687504, "grad_norm": 0.06103515625, "learning_rate": 1.9312650938138587e-05, "loss": 1.1572, "step": 2360 }, { "epoch": 0.05393350476753977, "grad_norm": 91.5, "learning_rate": 1.9308006687720604e-05, "loss": 1.5039, "step": 2370 }, { "epoch": 0.05416107229820449, "grad_norm": 2.296875, "learning_rate": 1.930336243730262e-05, "loss": 0.7795, "step": 2380 }, { "epoch": 0.05438863982886922, "grad_norm": 0.00537109375, "learning_rate": 1.9298718186884638e-05, "loss": 0.9257, "step": 2390 }, { "epoch": 0.054616207359533944, "grad_norm": 304.0, "learning_rate": 1.9294073936466655e-05, "loss": 1.897, "step": 2400 }, { "epoch": 0.05484377489019867, "grad_norm": 326.0, "learning_rate": 1.9289429686048672e-05, "loss": 0.6469, "step": 2410 }, { "epoch": 0.05507134242086339, "grad_norm": 119.5, "learning_rate": 1.928478543563069e-05, "loss": 1.1891, "step": 2420 }, { "epoch": 0.055298909951528114, "grad_norm": 65.0, "learning_rate": 1.928014118521271e-05, "loss": 0.588, "step": 2430 }, { "epoch": 0.05552647748219284, "grad_norm": 232.0, "learning_rate": 1.9275496934794727e-05, "loss": 1.1786, "step": 2440 }, { "epoch": 0.055754045012857564, "grad_norm": 132.0, "learning_rate": 1.9270852684376744e-05, "loss": 2.1528, "step": 2450 }, { "epoch": 0.05598161254352229, "grad_norm": 118.5, "learning_rate": 1.926620843395876e-05, "loss": 0.8023, "step": 2460 }, { "epoch": 0.056209180074187015, "grad_norm": 44.5, "learning_rate": 1.9261564183540778e-05, "loss": 0.9334, "step": 2470 }, { "epoch": 0.05643674760485174, "grad_norm": 16.25, "learning_rate": 1.9256919933122795e-05, "loss": 1.6425, "step": 2480 }, { "epoch": 0.056664315135516466, "grad_norm": 89.0, "learning_rate": 1.9252275682704812e-05, "loss": 0.944, "step": 2490 }, { "epoch": 0.05689188266618119, "grad_norm": 73.5, "learning_rate": 1.9247631432286832e-05, "loss": 1.9675, "step": 2500 }, { "epoch": 0.05711945019684592, "grad_norm": 136.0, "learning_rate": 1.924298718186885e-05, "loss": 1.5524, "step": 2510 }, { "epoch": 0.05734701772751064, "grad_norm": 2.984375, "learning_rate": 1.9238342931450866e-05, "loss": 1.8072, "step": 2520 }, { "epoch": 0.05757458525817536, "grad_norm": 1.390625, "learning_rate": 1.9233698681032883e-05, "loss": 1.3017, "step": 2530 }, { "epoch": 0.057802152788840086, "grad_norm": 0.46484375, "learning_rate": 1.92290544306149e-05, "loss": 1.1158, "step": 2540 }, { "epoch": 0.05802972031950481, "grad_norm": 127.0, "learning_rate": 1.9224410180196918e-05, "loss": 1.4952, "step": 2550 }, { "epoch": 0.05825728785016954, "grad_norm": 3.078125, "learning_rate": 1.9219765929778935e-05, "loss": 1.5725, "step": 2560 }, { "epoch": 0.05848485538083426, "grad_norm": 0.9140625, "learning_rate": 1.921512167936095e-05, "loss": 0.8994, "step": 2570 }, { "epoch": 0.05871242291149899, "grad_norm": 98.0, "learning_rate": 1.9210477428942972e-05, "loss": 1.2764, "step": 2580 }, { "epoch": 0.05893999044216371, "grad_norm": 66.0, "learning_rate": 1.920583317852499e-05, "loss": 0.7414, "step": 2590 }, { "epoch": 0.05916755797282844, "grad_norm": 1.5703125, "learning_rate": 1.9201188928107003e-05, "loss": 1.7539, "step": 2600 }, { "epoch": 0.059395125503493164, "grad_norm": 135.0, "learning_rate": 1.9196544677689023e-05, "loss": 1.6336, "step": 2610 }, { "epoch": 0.05962269303415789, "grad_norm": 246.0, "learning_rate": 1.919190042727104e-05, "loss": 1.028, "step": 2620 }, { "epoch": 0.05985026056482261, "grad_norm": 0.0069580078125, "learning_rate": 1.9187256176853057e-05, "loss": 0.5558, "step": 2630 }, { "epoch": 0.06007782809548733, "grad_norm": 100.0, "learning_rate": 1.9182611926435074e-05, "loss": 1.0111, "step": 2640 }, { "epoch": 0.06030539562615206, "grad_norm": 26.0, "learning_rate": 1.917796767601709e-05, "loss": 0.4442, "step": 2650 }, { "epoch": 0.060532963156816784, "grad_norm": 147.0, "learning_rate": 1.9173323425599112e-05, "loss": 0.4586, "step": 2660 }, { "epoch": 0.06076053068748151, "grad_norm": 100.5, "learning_rate": 1.9168679175181126e-05, "loss": 0.4942, "step": 2670 }, { "epoch": 0.060988098218146235, "grad_norm": 247.0, "learning_rate": 1.9164034924763143e-05, "loss": 2.1476, "step": 2680 }, { "epoch": 0.06121566574881096, "grad_norm": 103.5, "learning_rate": 1.9159390674345163e-05, "loss": 1.1642, "step": 2690 }, { "epoch": 0.061443233279475686, "grad_norm": 740.0, "learning_rate": 1.915474642392718e-05, "loss": 1.7681, "step": 2700 }, { "epoch": 0.06167080081014041, "grad_norm": 0.000766754150390625, "learning_rate": 1.9150102173509197e-05, "loss": 1.0278, "step": 2710 }, { "epoch": 0.061898368340805136, "grad_norm": 0.034912109375, "learning_rate": 1.9145457923091214e-05, "loss": 0.5248, "step": 2720 }, { "epoch": 0.06212593587146986, "grad_norm": 210.0, "learning_rate": 1.914081367267323e-05, "loss": 0.9303, "step": 2730 }, { "epoch": 0.06235350340213458, "grad_norm": 164.0, "learning_rate": 1.913616942225525e-05, "loss": 1.0463, "step": 2740 }, { "epoch": 0.06258107093279931, "grad_norm": 21.0, "learning_rate": 1.9131525171837265e-05, "loss": 0.6941, "step": 2750 }, { "epoch": 0.06280863846346403, "grad_norm": 2.96875, "learning_rate": 1.9126880921419283e-05, "loss": 1.2098, "step": 2760 }, { "epoch": 0.06303620599412876, "grad_norm": 3.203125, "learning_rate": 1.9122236671001303e-05, "loss": 1.9144, "step": 2770 }, { "epoch": 0.06326377352479348, "grad_norm": 104.5, "learning_rate": 1.911759242058332e-05, "loss": 0.7588, "step": 2780 }, { "epoch": 0.06349134105545821, "grad_norm": 92.0, "learning_rate": 1.9112948170165337e-05, "loss": 1.9258, "step": 2790 }, { "epoch": 0.06371890858612293, "grad_norm": 0.06494140625, "learning_rate": 1.9108303919747354e-05, "loss": 0.3629, "step": 2800 }, { "epoch": 0.06394647611678765, "grad_norm": 0.01263427734375, "learning_rate": 1.910365966932937e-05, "loss": 0.5499, "step": 2810 }, { "epoch": 0.06417404364745238, "grad_norm": 0.06494140625, "learning_rate": 1.9099015418911388e-05, "loss": 0.6557, "step": 2820 }, { "epoch": 0.0644016111781171, "grad_norm": 0.07177734375, "learning_rate": 1.9094371168493405e-05, "loss": 1.2387, "step": 2830 }, { "epoch": 0.06462917870878183, "grad_norm": 840.0, "learning_rate": 1.9089726918075426e-05, "loss": 1.9772, "step": 2840 }, { "epoch": 0.06485674623944655, "grad_norm": 392.0, "learning_rate": 1.9085082667657443e-05, "loss": 2.0302, "step": 2850 }, { "epoch": 0.06508431377011129, "grad_norm": 122.0, "learning_rate": 1.908043841723946e-05, "loss": 0.9849, "step": 2860 }, { "epoch": 0.065311881300776, "grad_norm": 6.46875, "learning_rate": 1.9075794166821477e-05, "loss": 0.6327, "step": 2870 }, { "epoch": 0.06553944883144074, "grad_norm": 0.29296875, "learning_rate": 1.9071149916403494e-05, "loss": 0.2366, "step": 2880 }, { "epoch": 0.06576701636210545, "grad_norm": 0.000507354736328125, "learning_rate": 1.906650566598551e-05, "loss": 1.0514, "step": 2890 }, { "epoch": 0.06599458389277017, "grad_norm": 200.0, "learning_rate": 1.9061861415567528e-05, "loss": 0.6058, "step": 2900 }, { "epoch": 0.0662221514234349, "grad_norm": 207.0, "learning_rate": 1.9057217165149545e-05, "loss": 0.8723, "step": 2910 }, { "epoch": 0.06644971895409962, "grad_norm": 286.0, "learning_rate": 1.9052572914731566e-05, "loss": 0.734, "step": 2920 }, { "epoch": 0.06667728648476436, "grad_norm": 163.0, "learning_rate": 1.9047928664313583e-05, "loss": 2.5342, "step": 2930 }, { "epoch": 0.06690485401542907, "grad_norm": 340.0, "learning_rate": 1.90432844138956e-05, "loss": 1.2687, "step": 2940 }, { "epoch": 0.0671324215460938, "grad_norm": 1.0703125, "learning_rate": 1.9038640163477617e-05, "loss": 0.5862, "step": 2950 }, { "epoch": 0.06735998907675853, "grad_norm": 101.0, "learning_rate": 1.9033995913059634e-05, "loss": 0.9355, "step": 2960 }, { "epoch": 0.06758755660742326, "grad_norm": 116.0, "learning_rate": 1.902935166264165e-05, "loss": 1.6136, "step": 2970 }, { "epoch": 0.06781512413808798, "grad_norm": 132.0, "learning_rate": 1.9024707412223668e-05, "loss": 0.8792, "step": 2980 }, { "epoch": 0.06804269166875271, "grad_norm": 0.0028533935546875, "learning_rate": 1.9020063161805685e-05, "loss": 1.4523, "step": 2990 }, { "epoch": 0.06827025919941743, "grad_norm": 145.0, "learning_rate": 1.9015418911387705e-05, "loss": 0.8868, "step": 3000 }, { "epoch": 0.06849782673008215, "grad_norm": 51.5, "learning_rate": 1.9010774660969722e-05, "loss": 0.7803, "step": 3010 }, { "epoch": 0.06872539426074688, "grad_norm": 63.25, "learning_rate": 1.9006130410551736e-05, "loss": 0.9399, "step": 3020 }, { "epoch": 0.0689529617914116, "grad_norm": 0.0634765625, "learning_rate": 1.9001486160133757e-05, "loss": 0.6319, "step": 3030 }, { "epoch": 0.06918052932207633, "grad_norm": 282.0, "learning_rate": 1.8996841909715774e-05, "loss": 1.5013, "step": 3040 }, { "epoch": 0.06940809685274105, "grad_norm": 66.5, "learning_rate": 1.899219765929779e-05, "loss": 0.6958, "step": 3050 }, { "epoch": 0.06963566438340578, "grad_norm": 1.4453125, "learning_rate": 1.8987553408879808e-05, "loss": 1.2576, "step": 3060 }, { "epoch": 0.0698632319140705, "grad_norm": 31.75, "learning_rate": 1.8982909158461825e-05, "loss": 0.9072, "step": 3070 }, { "epoch": 0.07009079944473523, "grad_norm": 0.498046875, "learning_rate": 1.8978264908043845e-05, "loss": 0.4992, "step": 3080 }, { "epoch": 0.07031836697539995, "grad_norm": 0.0003662109375, "learning_rate": 1.897362065762586e-05, "loss": 0.975, "step": 3090 }, { "epoch": 0.07054593450606468, "grad_norm": 19.375, "learning_rate": 1.8968976407207876e-05, "loss": 0.3622, "step": 3100 }, { "epoch": 0.0707735020367294, "grad_norm": 1.0703125, "learning_rate": 1.8964332156789896e-05, "loss": 1.5332, "step": 3110 }, { "epoch": 0.07100106956739412, "grad_norm": 158.0, "learning_rate": 1.8959687906371913e-05, "loss": 0.8343, "step": 3120 }, { "epoch": 0.07122863709805885, "grad_norm": 89.0, "learning_rate": 1.895504365595393e-05, "loss": 1.2094, "step": 3130 }, { "epoch": 0.07145620462872357, "grad_norm": 0.00084686279296875, "learning_rate": 1.8950399405535948e-05, "loss": 0.3673, "step": 3140 }, { "epoch": 0.0716837721593883, "grad_norm": 157.0, "learning_rate": 1.8945755155117968e-05, "loss": 0.5675, "step": 3150 }, { "epoch": 0.07191133969005302, "grad_norm": 0.08349609375, "learning_rate": 1.894111090469998e-05, "loss": 1.7348, "step": 3160 }, { "epoch": 0.07213890722071775, "grad_norm": 224.0, "learning_rate": 1.8936466654282e-05, "loss": 1.0345, "step": 3170 }, { "epoch": 0.07236647475138247, "grad_norm": 324.0, "learning_rate": 1.893182240386402e-05, "loss": 1.0265, "step": 3180 }, { "epoch": 0.0725940422820472, "grad_norm": 0.498046875, "learning_rate": 1.8927178153446036e-05, "loss": 1.011, "step": 3190 }, { "epoch": 0.07282160981271192, "grad_norm": 5.625, "learning_rate": 1.8922533903028053e-05, "loss": 0.8601, "step": 3200 }, { "epoch": 0.07304917734337665, "grad_norm": 0.0003604888916015625, "learning_rate": 1.891788965261007e-05, "loss": 0.9518, "step": 3210 }, { "epoch": 0.07327674487404137, "grad_norm": 0.859375, "learning_rate": 1.8913245402192087e-05, "loss": 0.5403, "step": 3220 }, { "epoch": 0.07350431240470609, "grad_norm": 97.5, "learning_rate": 1.8908601151774104e-05, "loss": 1.6403, "step": 3230 }, { "epoch": 0.07373187993537082, "grad_norm": 171.0, "learning_rate": 1.890395690135612e-05, "loss": 0.6887, "step": 3240 }, { "epoch": 0.07395944746603554, "grad_norm": 115.5, "learning_rate": 1.889931265093814e-05, "loss": 1.3209, "step": 3250 }, { "epoch": 0.07418701499670027, "grad_norm": 236.0, "learning_rate": 1.889466840052016e-05, "loss": 0.9023, "step": 3260 }, { "epoch": 0.07441458252736499, "grad_norm": 127.5, "learning_rate": 1.8890024150102176e-05, "loss": 0.8366, "step": 3270 }, { "epoch": 0.07464215005802972, "grad_norm": 251.0, "learning_rate": 1.8885379899684193e-05, "loss": 1.1234, "step": 3280 }, { "epoch": 0.07486971758869444, "grad_norm": 37.0, "learning_rate": 1.888073564926621e-05, "loss": 0.4003, "step": 3290 }, { "epoch": 0.07509728511935918, "grad_norm": 15.9375, "learning_rate": 1.8876091398848227e-05, "loss": 0.5239, "step": 3300 }, { "epoch": 0.0753248526500239, "grad_norm": 1.2265625, "learning_rate": 1.8871447148430244e-05, "loss": 0.9845, "step": 3310 }, { "epoch": 0.07555242018068863, "grad_norm": 74.5, "learning_rate": 1.886680289801226e-05, "loss": 0.9918, "step": 3320 }, { "epoch": 0.07577998771135334, "grad_norm": 15.375, "learning_rate": 1.886215864759428e-05, "loss": 1.9841, "step": 3330 }, { "epoch": 0.07600755524201806, "grad_norm": 11.4375, "learning_rate": 1.88575143971763e-05, "loss": 0.987, "step": 3340 }, { "epoch": 0.0762351227726828, "grad_norm": 181.0, "learning_rate": 1.8852870146758316e-05, "loss": 1.1012, "step": 3350 }, { "epoch": 0.07646269030334751, "grad_norm": 540.0, "learning_rate": 1.8848225896340333e-05, "loss": 1.1926, "step": 3360 }, { "epoch": 0.07669025783401225, "grad_norm": 79.0, "learning_rate": 1.884358164592235e-05, "loss": 0.4642, "step": 3370 }, { "epoch": 0.07691782536467696, "grad_norm": 266.0, "learning_rate": 1.8838937395504367e-05, "loss": 1.8232, "step": 3380 }, { "epoch": 0.0771453928953417, "grad_norm": 240.0, "learning_rate": 1.8834293145086384e-05, "loss": 2.7924, "step": 3390 }, { "epoch": 0.07737296042600642, "grad_norm": 38.75, "learning_rate": 1.88296488946684e-05, "loss": 0.8273, "step": 3400 }, { "epoch": 0.07760052795667115, "grad_norm": 80.0, "learning_rate": 1.8825004644250418e-05, "loss": 0.7285, "step": 3410 }, { "epoch": 0.07782809548733587, "grad_norm": 236.0, "learning_rate": 1.882036039383244e-05, "loss": 1.482, "step": 3420 }, { "epoch": 0.0780556630180006, "grad_norm": 274.0, "learning_rate": 1.8815716143414456e-05, "loss": 0.988, "step": 3430 }, { "epoch": 0.07828323054866532, "grad_norm": 8.8125, "learning_rate": 1.881107189299647e-05, "loss": 0.9761, "step": 3440 }, { "epoch": 0.07851079807933004, "grad_norm": 0.55859375, "learning_rate": 1.880642764257849e-05, "loss": 0.5996, "step": 3450 }, { "epoch": 0.07873836560999477, "grad_norm": 4.28125, "learning_rate": 1.8801783392160507e-05, "loss": 0.3362, "step": 3460 }, { "epoch": 0.07896593314065949, "grad_norm": 97.0, "learning_rate": 1.8797139141742524e-05, "loss": 0.9861, "step": 3470 }, { "epoch": 0.07919350067132422, "grad_norm": 57.75, "learning_rate": 1.879249489132454e-05, "loss": 0.6532, "step": 3480 }, { "epoch": 0.07942106820198894, "grad_norm": 127.5, "learning_rate": 1.878785064090656e-05, "loss": 1.1781, "step": 3490 }, { "epoch": 0.07964863573265367, "grad_norm": 432.0, "learning_rate": 1.878320639048858e-05, "loss": 0.8597, "step": 3500 }, { "epoch": 0.07987620326331839, "grad_norm": 121.5, "learning_rate": 1.8778562140070592e-05, "loss": 1.7989, "step": 3510 }, { "epoch": 0.08010377079398312, "grad_norm": 102.0, "learning_rate": 1.877391788965261e-05, "loss": 0.4543, "step": 3520 }, { "epoch": 0.08033133832464784, "grad_norm": 6.0625, "learning_rate": 1.876927363923463e-05, "loss": 1.2402, "step": 3530 }, { "epoch": 0.08055890585531257, "grad_norm": 149.0, "learning_rate": 1.8764629388816647e-05, "loss": 1.0314, "step": 3540 }, { "epoch": 0.08078647338597729, "grad_norm": 0.125, "learning_rate": 1.8759985138398664e-05, "loss": 0.7963, "step": 3550 }, { "epoch": 0.08101404091664201, "grad_norm": 6.8125, "learning_rate": 1.875534088798068e-05, "loss": 1.9317, "step": 3560 }, { "epoch": 0.08124160844730674, "grad_norm": 0.302734375, "learning_rate": 1.87506966375627e-05, "loss": 0.397, "step": 3570 }, { "epoch": 0.08146917597797146, "grad_norm": 173.0, "learning_rate": 1.8746052387144715e-05, "loss": 1.0142, "step": 3580 }, { "epoch": 0.08169674350863619, "grad_norm": 59.25, "learning_rate": 1.8741408136726732e-05, "loss": 0.4263, "step": 3590 }, { "epoch": 0.08192431103930091, "grad_norm": 0.00128173828125, "learning_rate": 1.8736763886308752e-05, "loss": 1.2497, "step": 3600 }, { "epoch": 0.08215187856996564, "grad_norm": 43.75, "learning_rate": 1.873211963589077e-05, "loss": 0.3371, "step": 3610 }, { "epoch": 0.08237944610063036, "grad_norm": 278.0, "learning_rate": 1.8727475385472786e-05, "loss": 1.0487, "step": 3620 }, { "epoch": 0.08260701363129509, "grad_norm": 72.5, "learning_rate": 1.8722831135054804e-05, "loss": 1.8185, "step": 3630 }, { "epoch": 0.08283458116195981, "grad_norm": 207.0, "learning_rate": 1.871818688463682e-05, "loss": 1.0284, "step": 3640 }, { "epoch": 0.08306214869262453, "grad_norm": 0.01336669921875, "learning_rate": 1.8713542634218838e-05, "loss": 1.8483, "step": 3650 }, { "epoch": 0.08328971622328926, "grad_norm": 154.0, "learning_rate": 1.8708898383800855e-05, "loss": 1.2936, "step": 3660 }, { "epoch": 0.08351728375395398, "grad_norm": 31.5, "learning_rate": 1.8704254133382872e-05, "loss": 0.6827, "step": 3670 }, { "epoch": 0.08374485128461871, "grad_norm": 0.0028839111328125, "learning_rate": 1.8699609882964892e-05, "loss": 1.1802, "step": 3680 }, { "epoch": 0.08397241881528343, "grad_norm": 216.0, "learning_rate": 1.869496563254691e-05, "loss": 0.9687, "step": 3690 }, { "epoch": 0.08419998634594816, "grad_norm": 0.275390625, "learning_rate": 1.8690321382128926e-05, "loss": 0.2817, "step": 3700 }, { "epoch": 0.08442755387661288, "grad_norm": 0.025146484375, "learning_rate": 1.8685677131710943e-05, "loss": 0.8772, "step": 3710 }, { "epoch": 0.08465512140727761, "grad_norm": 0.0859375, "learning_rate": 1.868103288129296e-05, "loss": 1.1622, "step": 3720 }, { "epoch": 0.08488268893794233, "grad_norm": 0.034423828125, "learning_rate": 1.8676388630874977e-05, "loss": 0.6207, "step": 3730 }, { "epoch": 0.08511025646860707, "grad_norm": 174.0, "learning_rate": 1.8671744380456995e-05, "loss": 1.2877, "step": 3740 }, { "epoch": 0.08533782399927178, "grad_norm": 234.0, "learning_rate": 1.866710013003901e-05, "loss": 0.8022, "step": 3750 }, { "epoch": 0.0855653915299365, "grad_norm": 77.5, "learning_rate": 1.8662455879621032e-05, "loss": 1.1684, "step": 3760 }, { "epoch": 0.08579295906060123, "grad_norm": 2.03125, "learning_rate": 1.865781162920305e-05, "loss": 0.6528, "step": 3770 }, { "epoch": 0.08602052659126595, "grad_norm": 506.0, "learning_rate": 1.8653167378785066e-05, "loss": 0.8789, "step": 3780 }, { "epoch": 0.08624809412193069, "grad_norm": 0.50390625, "learning_rate": 1.8648523128367083e-05, "loss": 0.1293, "step": 3790 }, { "epoch": 0.0864756616525954, "grad_norm": 0.0002651214599609375, "learning_rate": 1.86438788779491e-05, "loss": 0.8254, "step": 3800 }, { "epoch": 0.08670322918326014, "grad_norm": 217.0, "learning_rate": 1.8639234627531117e-05, "loss": 1.1668, "step": 3810 }, { "epoch": 0.08693079671392485, "grad_norm": 0.00701904296875, "learning_rate": 1.8634590377113134e-05, "loss": 0.6171, "step": 3820 }, { "epoch": 0.08715836424458959, "grad_norm": 109.0, "learning_rate": 1.8629946126695155e-05, "loss": 0.347, "step": 3830 }, { "epoch": 0.0873859317752543, "grad_norm": 0.1259765625, "learning_rate": 1.8625301876277172e-05, "loss": 0.9697, "step": 3840 }, { "epoch": 0.08761349930591904, "grad_norm": 108.0, "learning_rate": 1.862065762585919e-05, "loss": 1.4774, "step": 3850 }, { "epoch": 0.08784106683658376, "grad_norm": 70.0, "learning_rate": 1.8616013375441203e-05, "loss": 0.6104, "step": 3860 }, { "epoch": 0.08806863436724847, "grad_norm": 0.00185394287109375, "learning_rate": 1.8611369125023223e-05, "loss": 0.5244, "step": 3870 }, { "epoch": 0.0882962018979132, "grad_norm": 100.5, "learning_rate": 1.860672487460524e-05, "loss": 0.5807, "step": 3880 }, { "epoch": 0.08852376942857793, "grad_norm": 115.5, "learning_rate": 1.8602080624187257e-05, "loss": 1.3363, "step": 3890 }, { "epoch": 0.08875133695924266, "grad_norm": 976.0, "learning_rate": 1.8597436373769274e-05, "loss": 2.4015, "step": 3900 }, { "epoch": 0.08897890448990738, "grad_norm": 0.5078125, "learning_rate": 1.8592792123351295e-05, "loss": 0.8609, "step": 3910 }, { "epoch": 0.08920647202057211, "grad_norm": 0.02294921875, "learning_rate": 1.8588147872933312e-05, "loss": 1.0725, "step": 3920 }, { "epoch": 0.08943403955123683, "grad_norm": 0.00072479248046875, "learning_rate": 1.8583503622515325e-05, "loss": 1.5132, "step": 3930 }, { "epoch": 0.08966160708190156, "grad_norm": 111.0, "learning_rate": 1.8578859372097346e-05, "loss": 0.6439, "step": 3940 }, { "epoch": 0.08988917461256628, "grad_norm": 0.0302734375, "learning_rate": 1.8574215121679363e-05, "loss": 0.8196, "step": 3950 }, { "epoch": 0.09011674214323101, "grad_norm": 37.0, "learning_rate": 1.856957087126138e-05, "loss": 0.4857, "step": 3960 }, { "epoch": 0.09034430967389573, "grad_norm": 95.5, "learning_rate": 1.8564926620843397e-05, "loss": 0.6249, "step": 3970 }, { "epoch": 0.09057187720456045, "grad_norm": 48.75, "learning_rate": 1.8560282370425414e-05, "loss": 1.6335, "step": 3980 }, { "epoch": 0.09079944473522518, "grad_norm": 0.035400390625, "learning_rate": 1.8555638120007434e-05, "loss": 1.3521, "step": 3990 }, { "epoch": 0.0910270122658899, "grad_norm": 0.00341796875, "learning_rate": 1.8550993869589448e-05, "loss": 0.581, "step": 4000 }, { "epoch": 0.09125457979655463, "grad_norm": 46.5, "learning_rate": 1.8546349619171465e-05, "loss": 1.0666, "step": 4010 }, { "epoch": 0.09148214732721935, "grad_norm": 452.0, "learning_rate": 1.8541705368753486e-05, "loss": 0.8431, "step": 4020 }, { "epoch": 0.09170971485788408, "grad_norm": 97.0, "learning_rate": 1.8537061118335503e-05, "loss": 1.2089, "step": 4030 }, { "epoch": 0.0919372823885488, "grad_norm": 0.0067138671875, "learning_rate": 1.853241686791752e-05, "loss": 0.8273, "step": 4040 }, { "epoch": 0.09216484991921353, "grad_norm": 228.0, "learning_rate": 1.8527772617499537e-05, "loss": 1.6389, "step": 4050 }, { "epoch": 0.09239241744987825, "grad_norm": 115.0, "learning_rate": 1.8523128367081554e-05, "loss": 1.0782, "step": 4060 }, { "epoch": 0.09261998498054298, "grad_norm": 310.0, "learning_rate": 1.851848411666357e-05, "loss": 1.6441, "step": 4070 }, { "epoch": 0.0928475525112077, "grad_norm": 296.0, "learning_rate": 1.8513839866245588e-05, "loss": 0.9944, "step": 4080 }, { "epoch": 0.09307512004187242, "grad_norm": 7.0625, "learning_rate": 1.8509195615827605e-05, "loss": 0.9066, "step": 4090 }, { "epoch": 0.09330268757253715, "grad_norm": 158.0, "learning_rate": 1.8504551365409625e-05, "loss": 1.0299, "step": 4100 }, { "epoch": 0.09353025510320187, "grad_norm": 4.78125, "learning_rate": 1.8499907114991643e-05, "loss": 1.0905, "step": 4110 }, { "epoch": 0.0937578226338666, "grad_norm": 136.0, "learning_rate": 1.849526286457366e-05, "loss": 1.3911, "step": 4120 }, { "epoch": 0.09398539016453132, "grad_norm": 0.00775146484375, "learning_rate": 1.8490618614155677e-05, "loss": 0.7027, "step": 4130 }, { "epoch": 0.09421295769519605, "grad_norm": 0.00384521484375, "learning_rate": 1.8485974363737694e-05, "loss": 1.0323, "step": 4140 }, { "epoch": 0.09444052522586077, "grad_norm": 95.0, "learning_rate": 1.848133011331971e-05, "loss": 0.403, "step": 4150 }, { "epoch": 0.0946680927565255, "grad_norm": 0.00714111328125, "learning_rate": 1.8476685862901728e-05, "loss": 0.6832, "step": 4160 }, { "epoch": 0.09489566028719022, "grad_norm": 0.00115966796875, "learning_rate": 1.8472041612483745e-05, "loss": 0.7143, "step": 4170 }, { "epoch": 0.09512322781785496, "grad_norm": 175.0, "learning_rate": 1.8467397362065765e-05, "loss": 0.7458, "step": 4180 }, { "epoch": 0.09535079534851967, "grad_norm": 47.75, "learning_rate": 1.8462753111647782e-05, "loss": 0.8852, "step": 4190 }, { "epoch": 0.09557836287918439, "grad_norm": 688.0, "learning_rate": 1.84581088612298e-05, "loss": 1.6918, "step": 4200 }, { "epoch": 0.09580593040984912, "grad_norm": 230.0, "learning_rate": 1.8453464610811816e-05, "loss": 1.1986, "step": 4210 }, { "epoch": 0.09603349794051384, "grad_norm": 0.01336669921875, "learning_rate": 1.8448820360393834e-05, "loss": 1.2436, "step": 4220 }, { "epoch": 0.09626106547117858, "grad_norm": 0.011962890625, "learning_rate": 1.844417610997585e-05, "loss": 0.4942, "step": 4230 }, { "epoch": 0.0964886330018433, "grad_norm": 2.4375, "learning_rate": 1.8439531859557868e-05, "loss": 0.8558, "step": 4240 }, { "epoch": 0.09671620053250803, "grad_norm": 256.0, "learning_rate": 1.8434887609139888e-05, "loss": 2.4483, "step": 4250 }, { "epoch": 0.09694376806317274, "grad_norm": 161.0, "learning_rate": 1.8430243358721905e-05, "loss": 1.2884, "step": 4260 }, { "epoch": 0.09717133559383748, "grad_norm": 0.56640625, "learning_rate": 1.8425599108303922e-05, "loss": 1.0118, "step": 4270 }, { "epoch": 0.0973989031245022, "grad_norm": 0.0252685546875, "learning_rate": 1.842095485788594e-05, "loss": 1.5501, "step": 4280 }, { "epoch": 0.09762647065516693, "grad_norm": 168.0, "learning_rate": 1.8416310607467956e-05, "loss": 0.9357, "step": 4290 }, { "epoch": 0.09785403818583165, "grad_norm": 3.71875, "learning_rate": 1.8411666357049973e-05, "loss": 0.7578, "step": 4300 }, { "epoch": 0.09808160571649636, "grad_norm": 113.0, "learning_rate": 1.840702210663199e-05, "loss": 1.304, "step": 4310 }, { "epoch": 0.0983091732471611, "grad_norm": 34.5, "learning_rate": 1.8402377856214007e-05, "loss": 0.5295, "step": 4320 }, { "epoch": 0.09853674077782582, "grad_norm": 528.0, "learning_rate": 1.8397733605796028e-05, "loss": 1.0567, "step": 4330 }, { "epoch": 0.09876430830849055, "grad_norm": 60.25, "learning_rate": 1.8393089355378045e-05, "loss": 2.4355, "step": 4340 }, { "epoch": 0.09899187583915527, "grad_norm": 184.0, "learning_rate": 1.838844510496006e-05, "loss": 0.9955, "step": 4350 }, { "epoch": 0.09921944336982, "grad_norm": 187.0, "learning_rate": 1.838380085454208e-05, "loss": 0.6422, "step": 4360 }, { "epoch": 0.09944701090048472, "grad_norm": 126.0, "learning_rate": 1.8379156604124096e-05, "loss": 0.964, "step": 4370 }, { "epoch": 0.09967457843114945, "grad_norm": 0.00162506103515625, "learning_rate": 1.8374512353706113e-05, "loss": 1.6056, "step": 4380 }, { "epoch": 0.09990214596181417, "grad_norm": 106.5, "learning_rate": 1.836986810328813e-05, "loss": 1.0443, "step": 4390 }, { "epoch": 0.10012971349247889, "grad_norm": 256.0, "learning_rate": 1.8365223852870147e-05, "loss": 0.9527, "step": 4400 }, { "epoch": 0.10035728102314362, "grad_norm": 378.0, "learning_rate": 1.8360579602452168e-05, "loss": 1.4318, "step": 4410 }, { "epoch": 0.10058484855380834, "grad_norm": 490.0, "learning_rate": 1.835593535203418e-05, "loss": 0.737, "step": 4420 }, { "epoch": 0.10081241608447307, "grad_norm": 12.8125, "learning_rate": 1.83512911016162e-05, "loss": 1.3707, "step": 4430 }, { "epoch": 0.10103998361513779, "grad_norm": 0.08984375, "learning_rate": 1.834664685119822e-05, "loss": 0.5255, "step": 4440 }, { "epoch": 0.10126755114580252, "grad_norm": 6.09375, "learning_rate": 1.8342002600780236e-05, "loss": 0.9958, "step": 4450 }, { "epoch": 0.10149511867646724, "grad_norm": 458.0, "learning_rate": 1.8337358350362253e-05, "loss": 1.8693, "step": 4460 }, { "epoch": 0.10172268620713197, "grad_norm": 229.0, "learning_rate": 1.833271409994427e-05, "loss": 1.1017, "step": 4470 }, { "epoch": 0.10195025373779669, "grad_norm": 160.0, "learning_rate": 1.8328069849526287e-05, "loss": 0.5105, "step": 4480 }, { "epoch": 0.10217782126846142, "grad_norm": 236.0, "learning_rate": 1.8323425599108304e-05, "loss": 0.8901, "step": 4490 }, { "epoch": 0.10240538879912614, "grad_norm": 136.0, "learning_rate": 1.831878134869032e-05, "loss": 1.1035, "step": 4500 }, { "epoch": 0.10263295632979086, "grad_norm": 0.00201416015625, "learning_rate": 1.8314137098272338e-05, "loss": 0.237, "step": 4510 }, { "epoch": 0.10286052386045559, "grad_norm": 0.80078125, "learning_rate": 1.830949284785436e-05, "loss": 0.6616, "step": 4520 }, { "epoch": 0.10308809139112031, "grad_norm": 204.0, "learning_rate": 1.8304848597436376e-05, "loss": 0.4937, "step": 4530 }, { "epoch": 0.10331565892178504, "grad_norm": 189.0, "learning_rate": 1.8300204347018393e-05, "loss": 0.7375, "step": 4540 }, { "epoch": 0.10354322645244976, "grad_norm": 161.0, "learning_rate": 1.829556009660041e-05, "loss": 1.5167, "step": 4550 }, { "epoch": 0.10377079398311449, "grad_norm": 102.0, "learning_rate": 1.8290915846182427e-05, "loss": 1.0837, "step": 4560 }, { "epoch": 0.10399836151377921, "grad_norm": 49.25, "learning_rate": 1.8286271595764444e-05, "loss": 1.2148, "step": 4570 }, { "epoch": 0.10422592904444394, "grad_norm": 84.5, "learning_rate": 1.828162734534646e-05, "loss": 0.9579, "step": 4580 }, { "epoch": 0.10445349657510866, "grad_norm": 0.0018768310546875, "learning_rate": 1.827698309492848e-05, "loss": 1.3927, "step": 4590 }, { "epoch": 0.1046810641057734, "grad_norm": 155.0, "learning_rate": 1.82723388445105e-05, "loss": 0.6643, "step": 4600 }, { "epoch": 0.10490863163643811, "grad_norm": 0.009033203125, "learning_rate": 1.8267694594092516e-05, "loss": 1.0951, "step": 4610 }, { "epoch": 0.10513619916710283, "grad_norm": 85.0, "learning_rate": 1.8263050343674533e-05, "loss": 0.7842, "step": 4620 }, { "epoch": 0.10536376669776756, "grad_norm": 8.6875, "learning_rate": 1.825840609325655e-05, "loss": 0.5471, "step": 4630 }, { "epoch": 0.10559133422843228, "grad_norm": 0.000812530517578125, "learning_rate": 1.8253761842838567e-05, "loss": 0.5135, "step": 4640 }, { "epoch": 0.10581890175909701, "grad_norm": 201.0, "learning_rate": 1.8249117592420584e-05, "loss": 0.7411, "step": 4650 }, { "epoch": 0.10604646928976173, "grad_norm": 418.0, "learning_rate": 1.82444733420026e-05, "loss": 1.7365, "step": 4660 }, { "epoch": 0.10627403682042647, "grad_norm": 0.000965118408203125, "learning_rate": 1.823982909158462e-05, "loss": 0.5488, "step": 4670 }, { "epoch": 0.10650160435109118, "grad_norm": 112.5, "learning_rate": 1.823518484116664e-05, "loss": 0.9816, "step": 4680 }, { "epoch": 0.10672917188175592, "grad_norm": 144.0, "learning_rate": 1.8230540590748655e-05, "loss": 1.6494, "step": 4690 }, { "epoch": 0.10695673941242063, "grad_norm": 121.0, "learning_rate": 1.8225896340330672e-05, "loss": 1.0814, "step": 4700 }, { "epoch": 0.10718430694308537, "grad_norm": 88.5, "learning_rate": 1.822125208991269e-05, "loss": 1.2509, "step": 4710 }, { "epoch": 0.10741187447375009, "grad_norm": 199.0, "learning_rate": 1.8216607839494707e-05, "loss": 1.3065, "step": 4720 }, { "epoch": 0.1076394420044148, "grad_norm": 110.5, "learning_rate": 1.8211963589076724e-05, "loss": 1.008, "step": 4730 }, { "epoch": 0.10786700953507954, "grad_norm": 98.0, "learning_rate": 1.820731933865874e-05, "loss": 0.9274, "step": 4740 }, { "epoch": 0.10809457706574425, "grad_norm": 256.0, "learning_rate": 1.820267508824076e-05, "loss": 1.8326, "step": 4750 }, { "epoch": 0.10832214459640899, "grad_norm": 394.0, "learning_rate": 1.8198030837822778e-05, "loss": 1.1789, "step": 4760 }, { "epoch": 0.1085497121270737, "grad_norm": 242.0, "learning_rate": 1.8193386587404795e-05, "loss": 0.8762, "step": 4770 }, { "epoch": 0.10877727965773844, "grad_norm": 201.0, "learning_rate": 1.8188742336986812e-05, "loss": 1.3885, "step": 4780 }, { "epoch": 0.10900484718840316, "grad_norm": 184.0, "learning_rate": 1.818409808656883e-05, "loss": 1.2246, "step": 4790 }, { "epoch": 0.10923241471906789, "grad_norm": 133.0, "learning_rate": 1.8179453836150846e-05, "loss": 1.7681, "step": 4800 }, { "epoch": 0.1094599822497326, "grad_norm": 104.0, "learning_rate": 1.8174809585732863e-05, "loss": 0.7568, "step": 4810 }, { "epoch": 0.10968754978039734, "grad_norm": 56.25, "learning_rate": 1.817016533531488e-05, "loss": 0.3507, "step": 4820 }, { "epoch": 0.10991511731106206, "grad_norm": 0.119140625, "learning_rate": 1.81655210848969e-05, "loss": 1.3329, "step": 4830 }, { "epoch": 0.11014268484172678, "grad_norm": 53.5, "learning_rate": 1.8160876834478918e-05, "loss": 0.6663, "step": 4840 }, { "epoch": 0.11037025237239151, "grad_norm": 19.75, "learning_rate": 1.815623258406093e-05, "loss": 2.3035, "step": 4850 }, { "epoch": 0.11059781990305623, "grad_norm": 0.0159912109375, "learning_rate": 1.8151588333642952e-05, "loss": 0.5893, "step": 4860 }, { "epoch": 0.11082538743372096, "grad_norm": 159.0, "learning_rate": 1.814694408322497e-05, "loss": 1.0684, "step": 4870 }, { "epoch": 0.11105295496438568, "grad_norm": 139.0, "learning_rate": 1.8142299832806986e-05, "loss": 1.0775, "step": 4880 }, { "epoch": 0.11128052249505041, "grad_norm": 108.0, "learning_rate": 1.8137655582389003e-05, "loss": 0.6041, "step": 4890 }, { "epoch": 0.11150809002571513, "grad_norm": 59.5, "learning_rate": 1.8133011331971024e-05, "loss": 0.4602, "step": 4900 }, { "epoch": 0.11173565755637986, "grad_norm": 27.25, "learning_rate": 1.812836708155304e-05, "loss": 0.623, "step": 4910 }, { "epoch": 0.11196322508704458, "grad_norm": 0.2333984375, "learning_rate": 1.8123722831135054e-05, "loss": 0.5325, "step": 4920 }, { "epoch": 0.11219079261770931, "grad_norm": 122.0, "learning_rate": 1.8119078580717075e-05, "loss": 1.1797, "step": 4930 }, { "epoch": 0.11241836014837403, "grad_norm": 106.0, "learning_rate": 1.8114434330299092e-05, "loss": 1.0768, "step": 4940 }, { "epoch": 0.11264592767903875, "grad_norm": 0.0010528564453125, "learning_rate": 1.810979007988111e-05, "loss": 0.6651, "step": 4950 }, { "epoch": 0.11287349520970348, "grad_norm": 0.0005340576171875, "learning_rate": 1.8105145829463126e-05, "loss": 1.1469, "step": 4960 }, { "epoch": 0.1131010627403682, "grad_norm": 17.625, "learning_rate": 1.8100501579045143e-05, "loss": 1.2118, "step": 4970 }, { "epoch": 0.11332863027103293, "grad_norm": 203.0, "learning_rate": 1.8095857328627164e-05, "loss": 0.7032, "step": 4980 }, { "epoch": 0.11355619780169765, "grad_norm": 0.07763671875, "learning_rate": 1.8091213078209177e-05, "loss": 0.9442, "step": 4990 }, { "epoch": 0.11378376533236238, "grad_norm": 120.0, "learning_rate": 1.8086568827791194e-05, "loss": 1.0119, "step": 5000 }, { "epoch": 0.1140113328630271, "grad_norm": 14.8125, "learning_rate": 1.8081924577373215e-05, "loss": 0.4565, "step": 5010 }, { "epoch": 0.11423890039369183, "grad_norm": 113.5, "learning_rate": 1.8077280326955232e-05, "loss": 1.6709, "step": 5020 }, { "epoch": 0.11446646792435655, "grad_norm": 133.0, "learning_rate": 1.807263607653725e-05, "loss": 1.5785, "step": 5030 }, { "epoch": 0.11469403545502128, "grad_norm": 91.5, "learning_rate": 1.8067991826119266e-05, "loss": 0.6575, "step": 5040 }, { "epoch": 0.114921602985686, "grad_norm": 195.0, "learning_rate": 1.8063347575701283e-05, "loss": 0.9251, "step": 5050 }, { "epoch": 0.11514917051635072, "grad_norm": 406.0, "learning_rate": 1.80587033252833e-05, "loss": 1.4432, "step": 5060 }, { "epoch": 0.11537673804701545, "grad_norm": 126.5, "learning_rate": 1.8054059074865317e-05, "loss": 0.572, "step": 5070 }, { "epoch": 0.11560430557768017, "grad_norm": 134.0, "learning_rate": 1.8049414824447334e-05, "loss": 0.4565, "step": 5080 }, { "epoch": 0.1158318731083449, "grad_norm": 131.0, "learning_rate": 1.8044770574029355e-05, "loss": 0.4836, "step": 5090 }, { "epoch": 0.11605944063900962, "grad_norm": 53.75, "learning_rate": 1.804012632361137e-05, "loss": 1.062, "step": 5100 }, { "epoch": 0.11628700816967436, "grad_norm": 76.5, "learning_rate": 1.803548207319339e-05, "loss": 0.9775, "step": 5110 }, { "epoch": 0.11651457570033907, "grad_norm": 79.0, "learning_rate": 1.8030837822775406e-05, "loss": 1.2615, "step": 5120 }, { "epoch": 0.1167421432310038, "grad_norm": 382.0, "learning_rate": 1.8026193572357423e-05, "loss": 1.5911, "step": 5130 }, { "epoch": 0.11696971076166852, "grad_norm": 79.5, "learning_rate": 1.802154932193944e-05, "loss": 0.7531, "step": 5140 }, { "epoch": 0.11719727829233326, "grad_norm": 378.0, "learning_rate": 1.8016905071521457e-05, "loss": 1.6273, "step": 5150 }, { "epoch": 0.11742484582299798, "grad_norm": 16.625, "learning_rate": 1.8012260821103474e-05, "loss": 0.6442, "step": 5160 }, { "epoch": 0.1176524133536627, "grad_norm": 38.25, "learning_rate": 1.8007616570685494e-05, "loss": 1.073, "step": 5170 }, { "epoch": 0.11787998088432743, "grad_norm": 0.00408935546875, "learning_rate": 1.800297232026751e-05, "loss": 0.5841, "step": 5180 }, { "epoch": 0.11810754841499214, "grad_norm": 0.004913330078125, "learning_rate": 1.799832806984953e-05, "loss": 0.6945, "step": 5190 }, { "epoch": 0.11833511594565688, "grad_norm": 0.004791259765625, "learning_rate": 1.7993683819431546e-05, "loss": 0.314, "step": 5200 }, { "epoch": 0.1185626834763216, "grad_norm": 0.00152587890625, "learning_rate": 1.7989039569013563e-05, "loss": 0.8284, "step": 5210 }, { "epoch": 0.11879025100698633, "grad_norm": 22.125, "learning_rate": 1.798439531859558e-05, "loss": 0.9987, "step": 5220 }, { "epoch": 0.11901781853765105, "grad_norm": 272.0, "learning_rate": 1.7979751068177597e-05, "loss": 0.5607, "step": 5230 }, { "epoch": 0.11924538606831578, "grad_norm": 0.07421875, "learning_rate": 1.7975106817759617e-05, "loss": 1.4606, "step": 5240 }, { "epoch": 0.1194729535989805, "grad_norm": 83.0, "learning_rate": 1.7970462567341634e-05, "loss": 1.2152, "step": 5250 }, { "epoch": 0.11970052112964522, "grad_norm": 282.0, "learning_rate": 1.796581831692365e-05, "loss": 1.4405, "step": 5260 }, { "epoch": 0.11992808866030995, "grad_norm": 43.0, "learning_rate": 1.796117406650567e-05, "loss": 0.6858, "step": 5270 }, { "epoch": 0.12015565619097467, "grad_norm": 0.12451171875, "learning_rate": 1.7956529816087685e-05, "loss": 1.086, "step": 5280 }, { "epoch": 0.1203832237216394, "grad_norm": 2720.0, "learning_rate": 1.7951885565669702e-05, "loss": 1.2338, "step": 5290 }, { "epoch": 0.12061079125230412, "grad_norm": 41.5, "learning_rate": 1.794724131525172e-05, "loss": 0.6785, "step": 5300 }, { "epoch": 0.12083835878296885, "grad_norm": 0.9140625, "learning_rate": 1.7942597064833737e-05, "loss": 0.9054, "step": 5310 }, { "epoch": 0.12106592631363357, "grad_norm": 196.0, "learning_rate": 1.7937952814415757e-05, "loss": 1.6345, "step": 5320 }, { "epoch": 0.1212934938442983, "grad_norm": 418.0, "learning_rate": 1.7933308563997774e-05, "loss": 2.2791, "step": 5330 }, { "epoch": 0.12152106137496302, "grad_norm": 0.0220947265625, "learning_rate": 1.7928664313579788e-05, "loss": 0.8189, "step": 5340 }, { "epoch": 0.12174862890562775, "grad_norm": 121.5, "learning_rate": 1.7924020063161808e-05, "loss": 1.2166, "step": 5350 }, { "epoch": 0.12197619643629247, "grad_norm": 48.5, "learning_rate": 1.7919375812743825e-05, "loss": 0.8443, "step": 5360 }, { "epoch": 0.12220376396695719, "grad_norm": 408.0, "learning_rate": 1.7914731562325842e-05, "loss": 0.8392, "step": 5370 }, { "epoch": 0.12243133149762192, "grad_norm": 48.0, "learning_rate": 1.791008731190786e-05, "loss": 0.2791, "step": 5380 }, { "epoch": 0.12265889902828664, "grad_norm": 77.5, "learning_rate": 1.7905443061489876e-05, "loss": 0.7567, "step": 5390 }, { "epoch": 0.12288646655895137, "grad_norm": 170.0, "learning_rate": 1.7900798811071897e-05, "loss": 0.882, "step": 5400 }, { "epoch": 0.12311403408961609, "grad_norm": 408.0, "learning_rate": 1.789615456065391e-05, "loss": 0.9804, "step": 5410 }, { "epoch": 0.12334160162028082, "grad_norm": 94.5, "learning_rate": 1.7891510310235928e-05, "loss": 0.6625, "step": 5420 }, { "epoch": 0.12356916915094554, "grad_norm": 208.0, "learning_rate": 1.7886866059817948e-05, "loss": 0.7562, "step": 5430 }, { "epoch": 0.12379673668161027, "grad_norm": 386.0, "learning_rate": 1.7882221809399965e-05, "loss": 1.4922, "step": 5440 }, { "epoch": 0.12402430421227499, "grad_norm": 169.0, "learning_rate": 1.7877577558981982e-05, "loss": 0.7264, "step": 5450 }, { "epoch": 0.12425187174293972, "grad_norm": 96.5, "learning_rate": 1.7872933308564e-05, "loss": 0.6276, "step": 5460 }, { "epoch": 0.12447943927360444, "grad_norm": 0.08837890625, "learning_rate": 1.7868289058146016e-05, "loss": 0.4965, "step": 5470 }, { "epoch": 0.12470700680426916, "grad_norm": 222.0, "learning_rate": 1.7863644807728033e-05, "loss": 1.4582, "step": 5480 }, { "epoch": 0.12493457433493389, "grad_norm": 211.0, "learning_rate": 1.785900055731005e-05, "loss": 1.0934, "step": 5490 }, { "epoch": 0.12516214186559863, "grad_norm": 282.0, "learning_rate": 1.7854356306892067e-05, "loss": 1.0216, "step": 5500 }, { "epoch": 0.12538970939626334, "grad_norm": 0.11767578125, "learning_rate": 1.7849712056474088e-05, "loss": 0.8858, "step": 5510 }, { "epoch": 0.12561727692692806, "grad_norm": 0.001251220703125, "learning_rate": 1.7845067806056105e-05, "loss": 0.6206, "step": 5520 }, { "epoch": 0.12584484445759278, "grad_norm": 122.0, "learning_rate": 1.7840423555638122e-05, "loss": 0.6628, "step": 5530 }, { "epoch": 0.12607241198825753, "grad_norm": 154.0, "learning_rate": 1.783577930522014e-05, "loss": 1.1301, "step": 5540 }, { "epoch": 0.12629997951892225, "grad_norm": 0.78125, "learning_rate": 1.7831135054802156e-05, "loss": 0.8139, "step": 5550 }, { "epoch": 0.12652754704958696, "grad_norm": 218.0, "learning_rate": 1.7826490804384173e-05, "loss": 1.2704, "step": 5560 }, { "epoch": 0.12675511458025168, "grad_norm": 0.005645751953125, "learning_rate": 1.782184655396619e-05, "loss": 1.1157, "step": 5570 }, { "epoch": 0.12698268211091643, "grad_norm": 3.6875, "learning_rate": 1.781720230354821e-05, "loss": 1.1606, "step": 5580 }, { "epoch": 0.12721024964158115, "grad_norm": 0.0172119140625, "learning_rate": 1.7812558053130228e-05, "loss": 1.1314, "step": 5590 }, { "epoch": 0.12743781717224587, "grad_norm": 91.0, "learning_rate": 1.7807913802712245e-05, "loss": 0.6956, "step": 5600 }, { "epoch": 0.12766538470291058, "grad_norm": 1048.0, "learning_rate": 1.7803269552294262e-05, "loss": 1.0079, "step": 5610 }, { "epoch": 0.1278929522335753, "grad_norm": 39.25, "learning_rate": 1.779862530187628e-05, "loss": 0.4496, "step": 5620 }, { "epoch": 0.12812051976424005, "grad_norm": 71.5, "learning_rate": 1.7793981051458296e-05, "loss": 0.3356, "step": 5630 }, { "epoch": 0.12834808729490477, "grad_norm": 98.5, "learning_rate": 1.7789336801040313e-05, "loss": 1.2306, "step": 5640 }, { "epoch": 0.12857565482556949, "grad_norm": 140.0, "learning_rate": 1.778469255062233e-05, "loss": 1.479, "step": 5650 }, { "epoch": 0.1288032223562342, "grad_norm": 0.0038909912109375, "learning_rate": 1.778004830020435e-05, "loss": 1.1418, "step": 5660 }, { "epoch": 0.12903078988689895, "grad_norm": 112.5, "learning_rate": 1.7775404049786367e-05, "loss": 1.1269, "step": 5670 }, { "epoch": 0.12925835741756367, "grad_norm": 288.0, "learning_rate": 1.7770759799368385e-05, "loss": 1.2111, "step": 5680 }, { "epoch": 0.1294859249482284, "grad_norm": 22.625, "learning_rate": 1.77661155489504e-05, "loss": 0.6762, "step": 5690 }, { "epoch": 0.1297134924788931, "grad_norm": 0.376953125, "learning_rate": 1.776147129853242e-05, "loss": 0.5571, "step": 5700 }, { "epoch": 0.12994106000955782, "grad_norm": 400.0, "learning_rate": 1.7756827048114436e-05, "loss": 0.4836, "step": 5710 }, { "epoch": 0.13016862754022257, "grad_norm": 1.8359375, "learning_rate": 1.7752182797696453e-05, "loss": 0.4536, "step": 5720 }, { "epoch": 0.1303961950708873, "grad_norm": 69.5, "learning_rate": 1.774753854727847e-05, "loss": 1.1546, "step": 5730 }, { "epoch": 0.130623762601552, "grad_norm": 5.65625, "learning_rate": 1.774289429686049e-05, "loss": 1.4627, "step": 5740 }, { "epoch": 0.13085133013221673, "grad_norm": 0.00138092041015625, "learning_rate": 1.7738250046442507e-05, "loss": 1.1748, "step": 5750 }, { "epoch": 0.13107889766288147, "grad_norm": 0.0035400390625, "learning_rate": 1.773360579602452e-05, "loss": 1.0538, "step": 5760 }, { "epoch": 0.1313064651935462, "grad_norm": 540.0, "learning_rate": 1.772896154560654e-05, "loss": 0.5145, "step": 5770 }, { "epoch": 0.1315340327242109, "grad_norm": 21.0, "learning_rate": 1.772431729518856e-05, "loss": 1.1869, "step": 5780 }, { "epoch": 0.13176160025487563, "grad_norm": 241.0, "learning_rate": 1.7719673044770576e-05, "loss": 1.4321, "step": 5790 }, { "epoch": 0.13198916778554035, "grad_norm": 132.0, "learning_rate": 1.7715028794352593e-05, "loss": 0.9871, "step": 5800 }, { "epoch": 0.1322167353162051, "grad_norm": 176.0, "learning_rate": 1.771038454393461e-05, "loss": 1.4919, "step": 5810 }, { "epoch": 0.1324443028468698, "grad_norm": 41.25, "learning_rate": 1.770574029351663e-05, "loss": 0.2899, "step": 5820 }, { "epoch": 0.13267187037753453, "grad_norm": 0.0021514892578125, "learning_rate": 1.7701096043098644e-05, "loss": 0.7219, "step": 5830 }, { "epoch": 0.13289943790819925, "grad_norm": 175.0, "learning_rate": 1.769645179268066e-05, "loss": 0.7068, "step": 5840 }, { "epoch": 0.133127005438864, "grad_norm": 314.0, "learning_rate": 1.769180754226268e-05, "loss": 1.0009, "step": 5850 }, { "epoch": 0.1333545729695287, "grad_norm": 400.0, "learning_rate": 1.7687163291844698e-05, "loss": 1.0511, "step": 5860 }, { "epoch": 0.13358214050019343, "grad_norm": 0.10205078125, "learning_rate": 1.7682519041426715e-05, "loss": 0.7609, "step": 5870 }, { "epoch": 0.13380970803085815, "grad_norm": 0.11181640625, "learning_rate": 1.7677874791008732e-05, "loss": 0.7456, "step": 5880 }, { "epoch": 0.1340372755615229, "grad_norm": 2.609375, "learning_rate": 1.7673230540590753e-05, "loss": 0.9804, "step": 5890 }, { "epoch": 0.1342648430921876, "grad_norm": 1.9921875, "learning_rate": 1.7668586290172766e-05, "loss": 0.8484, "step": 5900 }, { "epoch": 0.13449241062285233, "grad_norm": 37.75, "learning_rate": 1.7663942039754784e-05, "loss": 0.6471, "step": 5910 }, { "epoch": 0.13471997815351705, "grad_norm": 124.5, "learning_rate": 1.76592977893368e-05, "loss": 0.4274, "step": 5920 }, { "epoch": 0.13494754568418177, "grad_norm": 87.0, "learning_rate": 1.765465353891882e-05, "loss": 1.25, "step": 5930 }, { "epoch": 0.13517511321484652, "grad_norm": 126.5, "learning_rate": 1.7650009288500838e-05, "loss": 0.6935, "step": 5940 }, { "epoch": 0.13540268074551123, "grad_norm": 134.0, "learning_rate": 1.7645365038082855e-05, "loss": 1.1282, "step": 5950 }, { "epoch": 0.13563024827617595, "grad_norm": 600.0, "learning_rate": 1.7640720787664872e-05, "loss": 2.2582, "step": 5960 }, { "epoch": 0.13585781580684067, "grad_norm": 63.0, "learning_rate": 1.763607653724689e-05, "loss": 1.2863, "step": 5970 }, { "epoch": 0.13608538333750542, "grad_norm": 0.2392578125, "learning_rate": 1.7631432286828906e-05, "loss": 1.4813, "step": 5980 }, { "epoch": 0.13631295086817014, "grad_norm": 237.0, "learning_rate": 1.7626788036410923e-05, "loss": 1.4184, "step": 5990 }, { "epoch": 0.13654051839883485, "grad_norm": 252.0, "learning_rate": 1.7622143785992944e-05, "loss": 0.9143, "step": 6000 }, { "epoch": 0.13676808592949957, "grad_norm": 92.0, "learning_rate": 1.761749953557496e-05, "loss": 1.3036, "step": 6010 }, { "epoch": 0.1369956534601643, "grad_norm": 266.0, "learning_rate": 1.7612855285156978e-05, "loss": 1.0776, "step": 6020 }, { "epoch": 0.13722322099082904, "grad_norm": 133.0, "learning_rate": 1.7608211034738995e-05, "loss": 1.4665, "step": 6030 }, { "epoch": 0.13745078852149376, "grad_norm": 0.02099609375, "learning_rate": 1.7603566784321012e-05, "loss": 0.7046, "step": 6040 }, { "epoch": 0.13767835605215847, "grad_norm": 0.03369140625, "learning_rate": 1.759892253390303e-05, "loss": 0.7061, "step": 6050 }, { "epoch": 0.1379059235828232, "grad_norm": 219.0, "learning_rate": 1.7594278283485046e-05, "loss": 0.645, "step": 6060 }, { "epoch": 0.13813349111348794, "grad_norm": 194.0, "learning_rate": 1.7589634033067063e-05, "loss": 1.5072, "step": 6070 }, { "epoch": 0.13836105864415266, "grad_norm": 94.5, "learning_rate": 1.7584989782649084e-05, "loss": 1.1925, "step": 6080 }, { "epoch": 0.13858862617481738, "grad_norm": 296.0, "learning_rate": 1.75803455322311e-05, "loss": 0.4612, "step": 6090 }, { "epoch": 0.1388161937054821, "grad_norm": 0.06201171875, "learning_rate": 1.7575701281813118e-05, "loss": 0.6033, "step": 6100 }, { "epoch": 0.13904376123614684, "grad_norm": 215.0, "learning_rate": 1.7571057031395135e-05, "loss": 0.491, "step": 6110 }, { "epoch": 0.13927132876681156, "grad_norm": 268.0, "learning_rate": 1.7566412780977152e-05, "loss": 2.2093, "step": 6120 }, { "epoch": 0.13949889629747628, "grad_norm": 83.5, "learning_rate": 1.756176853055917e-05, "loss": 1.8203, "step": 6130 }, { "epoch": 0.139726463828141, "grad_norm": 0.00921630859375, "learning_rate": 1.7557124280141186e-05, "loss": 0.3302, "step": 6140 }, { "epoch": 0.13995403135880571, "grad_norm": 0.953125, "learning_rate": 1.7552480029723203e-05, "loss": 1.2541, "step": 6150 }, { "epoch": 0.14018159888947046, "grad_norm": 7.28125, "learning_rate": 1.7547835779305223e-05, "loss": 0.7118, "step": 6160 }, { "epoch": 0.14040916642013518, "grad_norm": 0.43359375, "learning_rate": 1.754319152888724e-05, "loss": 0.8478, "step": 6170 }, { "epoch": 0.1406367339507999, "grad_norm": 122.5, "learning_rate": 1.7538547278469254e-05, "loss": 1.0271, "step": 6180 }, { "epoch": 0.14086430148146462, "grad_norm": 0.40625, "learning_rate": 1.7533903028051275e-05, "loss": 0.6457, "step": 6190 }, { "epoch": 0.14109186901212936, "grad_norm": 12.5, "learning_rate": 1.7529258777633292e-05, "loss": 1.44, "step": 6200 }, { "epoch": 0.14131943654279408, "grad_norm": 0.00970458984375, "learning_rate": 1.752461452721531e-05, "loss": 2.0494, "step": 6210 }, { "epoch": 0.1415470040734588, "grad_norm": 76.0, "learning_rate": 1.7519970276797326e-05, "loss": 1.0752, "step": 6220 }, { "epoch": 0.14177457160412352, "grad_norm": 143.0, "learning_rate": 1.7515326026379343e-05, "loss": 1.5948, "step": 6230 }, { "epoch": 0.14200213913478824, "grad_norm": 60.0, "learning_rate": 1.7510681775961363e-05, "loss": 0.8077, "step": 6240 }, { "epoch": 0.14222970666545298, "grad_norm": 0.3515625, "learning_rate": 1.7506037525543377e-05, "loss": 0.4638, "step": 6250 }, { "epoch": 0.1424572741961177, "grad_norm": 176.0, "learning_rate": 1.7501393275125394e-05, "loss": 0.9143, "step": 6260 }, { "epoch": 0.14268484172678242, "grad_norm": 137.0, "learning_rate": 1.7496749024707414e-05, "loss": 0.6196, "step": 6270 }, { "epoch": 0.14291240925744714, "grad_norm": 0.283203125, "learning_rate": 1.749210477428943e-05, "loss": 0.7072, "step": 6280 }, { "epoch": 0.14313997678811188, "grad_norm": 102.5, "learning_rate": 1.748746052387145e-05, "loss": 1.1552, "step": 6290 }, { "epoch": 0.1433675443187766, "grad_norm": 0.015625, "learning_rate": 1.7482816273453466e-05, "loss": 0.1142, "step": 6300 }, { "epoch": 0.14359511184944132, "grad_norm": 30.875, "learning_rate": 1.7478172023035486e-05, "loss": 0.802, "step": 6310 }, { "epoch": 0.14382267938010604, "grad_norm": 169.0, "learning_rate": 1.74735277726175e-05, "loss": 0.5857, "step": 6320 }, { "epoch": 0.14405024691077079, "grad_norm": 102.5, "learning_rate": 1.7468883522199517e-05, "loss": 1.1703, "step": 6330 }, { "epoch": 0.1442778144414355, "grad_norm": 14.375, "learning_rate": 1.7464239271781537e-05, "loss": 0.1739, "step": 6340 }, { "epoch": 0.14450538197210022, "grad_norm": 4.875, "learning_rate": 1.7459595021363554e-05, "loss": 2.0655, "step": 6350 }, { "epoch": 0.14473294950276494, "grad_norm": 60.5, "learning_rate": 1.745495077094557e-05, "loss": 0.4238, "step": 6360 }, { "epoch": 0.14496051703342966, "grad_norm": 152.0, "learning_rate": 1.745030652052759e-05, "loss": 1.2654, "step": 6370 }, { "epoch": 0.1451880845640944, "grad_norm": 21.75, "learning_rate": 1.7445662270109605e-05, "loss": 1.1451, "step": 6380 }, { "epoch": 0.14541565209475912, "grad_norm": 111.5, "learning_rate": 1.7441018019691623e-05, "loss": 0.5242, "step": 6390 }, { "epoch": 0.14564321962542384, "grad_norm": 0.8828125, "learning_rate": 1.743637376927364e-05, "loss": 1.1951, "step": 6400 }, { "epoch": 0.14587078715608856, "grad_norm": 169.0, "learning_rate": 1.7431729518855657e-05, "loss": 1.0785, "step": 6410 }, { "epoch": 0.1460983546867533, "grad_norm": 168.0, "learning_rate": 1.7427085268437677e-05, "loss": 1.0101, "step": 6420 }, { "epoch": 0.14632592221741803, "grad_norm": 0.0029754638671875, "learning_rate": 1.7422441018019694e-05, "loss": 0.5591, "step": 6430 }, { "epoch": 0.14655348974808274, "grad_norm": 0.07763671875, "learning_rate": 1.741779676760171e-05, "loss": 1.203, "step": 6440 }, { "epoch": 0.14678105727874746, "grad_norm": 63.0, "learning_rate": 1.7413152517183728e-05, "loss": 0.6398, "step": 6450 }, { "epoch": 0.14700862480941218, "grad_norm": 63.0, "learning_rate": 1.7408508266765745e-05, "loss": 0.7362, "step": 6460 }, { "epoch": 0.14723619234007693, "grad_norm": 756.0, "learning_rate": 1.7403864016347762e-05, "loss": 1.0576, "step": 6470 }, { "epoch": 0.14746375987074165, "grad_norm": 122.0, "learning_rate": 1.739921976592978e-05, "loss": 0.4032, "step": 6480 }, { "epoch": 0.14769132740140636, "grad_norm": 150.0, "learning_rate": 1.7394575515511796e-05, "loss": 0.8499, "step": 6490 }, { "epoch": 0.14791889493207108, "grad_norm": 0.0185546875, "learning_rate": 1.7389931265093817e-05, "loss": 1.0421, "step": 6500 }, { "epoch": 0.14814646246273583, "grad_norm": 185.0, "learning_rate": 1.7385287014675834e-05, "loss": 1.1967, "step": 6510 }, { "epoch": 0.14837402999340055, "grad_norm": 21.25, "learning_rate": 1.738064276425785e-05, "loss": 0.6081, "step": 6520 }, { "epoch": 0.14860159752406527, "grad_norm": 166.0, "learning_rate": 1.7375998513839868e-05, "loss": 1.1182, "step": 6530 }, { "epoch": 0.14882916505472998, "grad_norm": 160.0, "learning_rate": 1.7371354263421885e-05, "loss": 0.5833, "step": 6540 }, { "epoch": 0.1490567325853947, "grad_norm": 229.0, "learning_rate": 1.7366710013003902e-05, "loss": 0.6007, "step": 6550 }, { "epoch": 0.14928430011605945, "grad_norm": 0.0034942626953125, "learning_rate": 1.736206576258592e-05, "loss": 1.3705, "step": 6560 }, { "epoch": 0.14951186764672417, "grad_norm": 276.0, "learning_rate": 1.7357421512167936e-05, "loss": 1.8626, "step": 6570 }, { "epoch": 0.14973943517738889, "grad_norm": 332.0, "learning_rate": 1.7352777261749957e-05, "loss": 1.2415, "step": 6580 }, { "epoch": 0.1499670027080536, "grad_norm": 203.0, "learning_rate": 1.7348133011331974e-05, "loss": 1.7854, "step": 6590 }, { "epoch": 0.15019457023871835, "grad_norm": 126.5, "learning_rate": 1.7343488760913987e-05, "loss": 0.8788, "step": 6600 }, { "epoch": 0.15042213776938307, "grad_norm": 125.5, "learning_rate": 1.7338844510496008e-05, "loss": 0.5434, "step": 6610 }, { "epoch": 0.1506497053000478, "grad_norm": 213.0, "learning_rate": 1.7334200260078025e-05, "loss": 1.3607, "step": 6620 }, { "epoch": 0.1508772728307125, "grad_norm": 177.0, "learning_rate": 1.7329556009660042e-05, "loss": 1.2645, "step": 6630 }, { "epoch": 0.15110484036137725, "grad_norm": 0.016845703125, "learning_rate": 1.732491175924206e-05, "loss": 0.8235, "step": 6640 }, { "epoch": 0.15133240789204197, "grad_norm": 0.451171875, "learning_rate": 1.732026750882408e-05, "loss": 0.9221, "step": 6650 }, { "epoch": 0.1515599754227067, "grad_norm": 0.004669189453125, "learning_rate": 1.7315623258406097e-05, "loss": 0.6191, "step": 6660 }, { "epoch": 0.1517875429533714, "grad_norm": 128.0, "learning_rate": 1.731097900798811e-05, "loss": 0.5876, "step": 6670 }, { "epoch": 0.15201511048403613, "grad_norm": 245.0, "learning_rate": 1.730633475757013e-05, "loss": 1.759, "step": 6680 }, { "epoch": 0.15224267801470087, "grad_norm": 5.46875, "learning_rate": 1.7301690507152148e-05, "loss": 1.3202, "step": 6690 }, { "epoch": 0.1524702455453656, "grad_norm": 8.25, "learning_rate": 1.7297046256734165e-05, "loss": 0.9397, "step": 6700 }, { "epoch": 0.1526978130760303, "grad_norm": 116.0, "learning_rate": 1.7292402006316182e-05, "loss": 1.0977, "step": 6710 }, { "epoch": 0.15292538060669503, "grad_norm": 0.1708984375, "learning_rate": 1.72877577558982e-05, "loss": 0.5098, "step": 6720 }, { "epoch": 0.15315294813735977, "grad_norm": 194.0, "learning_rate": 1.728311350548022e-05, "loss": 0.6471, "step": 6730 }, { "epoch": 0.1533805156680245, "grad_norm": 150.0, "learning_rate": 1.7278469255062233e-05, "loss": 0.6037, "step": 6740 }, { "epoch": 0.1536080831986892, "grad_norm": 133.0, "learning_rate": 1.727382500464425e-05, "loss": 0.3354, "step": 6750 }, { "epoch": 0.15383565072935393, "grad_norm": 6.0625, "learning_rate": 1.726918075422627e-05, "loss": 1.2476, "step": 6760 }, { "epoch": 0.15406321826001865, "grad_norm": 2.296875, "learning_rate": 1.7264536503808288e-05, "loss": 0.7941, "step": 6770 }, { "epoch": 0.1542907857906834, "grad_norm": 0.004180908203125, "learning_rate": 1.7259892253390305e-05, "loss": 0.7447, "step": 6780 }, { "epoch": 0.1545183533213481, "grad_norm": 126.5, "learning_rate": 1.725524800297232e-05, "loss": 0.386, "step": 6790 }, { "epoch": 0.15474592085201283, "grad_norm": 0.30078125, "learning_rate": 1.725060375255434e-05, "loss": 0.5434, "step": 6800 }, { "epoch": 0.15497348838267755, "grad_norm": 153.0, "learning_rate": 1.7245959502136356e-05, "loss": 0.4657, "step": 6810 }, { "epoch": 0.1552010559133423, "grad_norm": 0.625, "learning_rate": 1.7241315251718373e-05, "loss": 1.4304, "step": 6820 }, { "epoch": 0.155428623444007, "grad_norm": 2.28125, "learning_rate": 1.723667100130039e-05, "loss": 0.7146, "step": 6830 }, { "epoch": 0.15565619097467173, "grad_norm": 184.0, "learning_rate": 1.723202675088241e-05, "loss": 0.5056, "step": 6840 }, { "epoch": 0.15588375850533645, "grad_norm": 0.416015625, "learning_rate": 1.7227382500464427e-05, "loss": 0.4158, "step": 6850 }, { "epoch": 0.1561113260360012, "grad_norm": 0.00019931793212890625, "learning_rate": 1.7222738250046444e-05, "loss": 1.3381, "step": 6860 }, { "epoch": 0.15633889356666592, "grad_norm": 100.5, "learning_rate": 1.721809399962846e-05, "loss": 1.2948, "step": 6870 }, { "epoch": 0.15656646109733063, "grad_norm": 0.0002899169921875, "learning_rate": 1.721344974921048e-05, "loss": 0.4134, "step": 6880 }, { "epoch": 0.15679402862799535, "grad_norm": 0.18359375, "learning_rate": 1.7208805498792496e-05, "loss": 0.514, "step": 6890 }, { "epoch": 0.15702159615866007, "grad_norm": 129.0, "learning_rate": 1.7204161248374513e-05, "loss": 1.7467, "step": 6900 }, { "epoch": 0.15724916368932482, "grad_norm": 136.0, "learning_rate": 1.719951699795653e-05, "loss": 0.0679, "step": 6910 }, { "epoch": 0.15747673121998954, "grad_norm": 115.5, "learning_rate": 1.719487274753855e-05, "loss": 0.8541, "step": 6920 }, { "epoch": 0.15770429875065425, "grad_norm": 0.1416015625, "learning_rate": 1.7190228497120567e-05, "loss": 0.5283, "step": 6930 }, { "epoch": 0.15793186628131897, "grad_norm": 163.0, "learning_rate": 1.7185584246702584e-05, "loss": 2.3244, "step": 6940 }, { "epoch": 0.15815943381198372, "grad_norm": 274.0, "learning_rate": 1.71809399962846e-05, "loss": 1.0178, "step": 6950 }, { "epoch": 0.15838700134264844, "grad_norm": 39.0, "learning_rate": 1.717629574586662e-05, "loss": 1.1244, "step": 6960 }, { "epoch": 0.15861456887331316, "grad_norm": 292.0, "learning_rate": 1.7171651495448635e-05, "loss": 0.7638, "step": 6970 }, { "epoch": 0.15884213640397787, "grad_norm": 0.00244140625, "learning_rate": 1.7167007245030652e-05, "loss": 1.0443, "step": 6980 }, { "epoch": 0.1590697039346426, "grad_norm": 60.25, "learning_rate": 1.7162362994612673e-05, "loss": 0.9193, "step": 6990 }, { "epoch": 0.15929727146530734, "grad_norm": 0.0015716552734375, "learning_rate": 1.715771874419469e-05, "loss": 1.0885, "step": 7000 }, { "epoch": 0.15952483899597206, "grad_norm": 122.5, "learning_rate": 1.7153074493776707e-05, "loss": 0.759, "step": 7010 }, { "epoch": 0.15975240652663678, "grad_norm": 402.0, "learning_rate": 1.7148430243358724e-05, "loss": 1.4976, "step": 7020 }, { "epoch": 0.1599799740573015, "grad_norm": 93.0, "learning_rate": 1.714378599294074e-05, "loss": 0.6965, "step": 7030 }, { "epoch": 0.16020754158796624, "grad_norm": 143.0, "learning_rate": 1.7139141742522758e-05, "loss": 0.5035, "step": 7040 }, { "epoch": 0.16043510911863096, "grad_norm": 197.0, "learning_rate": 1.7134497492104775e-05, "loss": 1.0245, "step": 7050 }, { "epoch": 0.16066267664929568, "grad_norm": 22.625, "learning_rate": 1.7129853241686792e-05, "loss": 1.0365, "step": 7060 }, { "epoch": 0.1608902441799604, "grad_norm": 8.9375, "learning_rate": 1.7125208991268813e-05, "loss": 0.7281, "step": 7070 }, { "epoch": 0.16111781171062514, "grad_norm": 0.00128936767578125, "learning_rate": 1.712056474085083e-05, "loss": 0.2253, "step": 7080 }, { "epoch": 0.16134537924128986, "grad_norm": 0.01544189453125, "learning_rate": 1.7115920490432843e-05, "loss": 0.7205, "step": 7090 }, { "epoch": 0.16157294677195458, "grad_norm": 540.0, "learning_rate": 1.7111276240014864e-05, "loss": 1.5571, "step": 7100 }, { "epoch": 0.1618005143026193, "grad_norm": 0.00506591796875, "learning_rate": 1.710663198959688e-05, "loss": 0.7409, "step": 7110 }, { "epoch": 0.16202808183328402, "grad_norm": 292.0, "learning_rate": 1.7101987739178898e-05, "loss": 1.1149, "step": 7120 }, { "epoch": 0.16225564936394876, "grad_norm": 33.25, "learning_rate": 1.7097343488760915e-05, "loss": 0.8775, "step": 7130 }, { "epoch": 0.16248321689461348, "grad_norm": 9.5625, "learning_rate": 1.7092699238342932e-05, "loss": 1.2986, "step": 7140 }, { "epoch": 0.1627107844252782, "grad_norm": 100.5, "learning_rate": 1.7088054987924953e-05, "loss": 0.2365, "step": 7150 }, { "epoch": 0.16293835195594292, "grad_norm": 0.1005859375, "learning_rate": 1.7083410737506966e-05, "loss": 0.447, "step": 7160 }, { "epoch": 0.16316591948660766, "grad_norm": 80.5, "learning_rate": 1.7078766487088983e-05, "loss": 1.1457, "step": 7170 }, { "epoch": 0.16339348701727238, "grad_norm": 67.5, "learning_rate": 1.7074122236671004e-05, "loss": 1.4269, "step": 7180 }, { "epoch": 0.1636210545479371, "grad_norm": 106.0, "learning_rate": 1.706947798625302e-05, "loss": 0.7253, "step": 7190 }, { "epoch": 0.16384862207860182, "grad_norm": 83.0, "learning_rate": 1.7064833735835038e-05, "loss": 0.8678, "step": 7200 }, { "epoch": 0.16407618960926654, "grad_norm": 312.0, "learning_rate": 1.7060189485417055e-05, "loss": 1.5495, "step": 7210 }, { "epoch": 0.16430375713993128, "grad_norm": 374.0, "learning_rate": 1.7055545234999072e-05, "loss": 0.9757, "step": 7220 }, { "epoch": 0.164531324670596, "grad_norm": 169.0, "learning_rate": 1.705090098458109e-05, "loss": 1.7204, "step": 7230 }, { "epoch": 0.16475889220126072, "grad_norm": 302.0, "learning_rate": 1.7046256734163106e-05, "loss": 1.2606, "step": 7240 }, { "epoch": 0.16498645973192544, "grad_norm": 92.5, "learning_rate": 1.7041612483745123e-05, "loss": 1.2893, "step": 7250 }, { "epoch": 0.16521402726259019, "grad_norm": 6.21875, "learning_rate": 1.7036968233327144e-05, "loss": 0.4885, "step": 7260 }, { "epoch": 0.1654415947932549, "grad_norm": 231.0, "learning_rate": 1.703232398290916e-05, "loss": 0.8655, "step": 7270 }, { "epoch": 0.16566916232391962, "grad_norm": 43.0, "learning_rate": 1.7027679732491178e-05, "loss": 0.7372, "step": 7280 }, { "epoch": 0.16589672985458434, "grad_norm": 115.0, "learning_rate": 1.7023035482073195e-05, "loss": 1.7704, "step": 7290 }, { "epoch": 0.16612429738524906, "grad_norm": 35.75, "learning_rate": 1.7018391231655212e-05, "loss": 0.96, "step": 7300 }, { "epoch": 0.1663518649159138, "grad_norm": 252.0, "learning_rate": 1.701374698123723e-05, "loss": 0.9163, "step": 7310 }, { "epoch": 0.16657943244657852, "grad_norm": 360.0, "learning_rate": 1.7009102730819246e-05, "loss": 1.3474, "step": 7320 }, { "epoch": 0.16680699997724324, "grad_norm": 145.0, "learning_rate": 1.7004458480401266e-05, "loss": 1.0022, "step": 7330 }, { "epoch": 0.16703456750790796, "grad_norm": 235.0, "learning_rate": 1.6999814229983283e-05, "loss": 1.0826, "step": 7340 }, { "epoch": 0.1672621350385727, "grad_norm": 0.0213623046875, "learning_rate": 1.69951699795653e-05, "loss": 0.9586, "step": 7350 }, { "epoch": 0.16748970256923743, "grad_norm": 358.0, "learning_rate": 1.6990525729147318e-05, "loss": 1.6082, "step": 7360 }, { "epoch": 0.16771727009990214, "grad_norm": 0.05322265625, "learning_rate": 1.6985881478729335e-05, "loss": 0.6437, "step": 7370 }, { "epoch": 0.16794483763056686, "grad_norm": 25.125, "learning_rate": 1.698123722831135e-05, "loss": 0.575, "step": 7380 }, { "epoch": 0.1681724051612316, "grad_norm": 0.0004558563232421875, "learning_rate": 1.697659297789337e-05, "loss": 0.375, "step": 7390 }, { "epoch": 0.16839997269189633, "grad_norm": 294.0, "learning_rate": 1.6971948727475386e-05, "loss": 1.0458, "step": 7400 }, { "epoch": 0.16862754022256105, "grad_norm": 0.00057220458984375, "learning_rate": 1.6967304477057406e-05, "loss": 1.5702, "step": 7410 }, { "epoch": 0.16885510775322576, "grad_norm": 89.0, "learning_rate": 1.6962660226639423e-05, "loss": 1.0285, "step": 7420 }, { "epoch": 0.16908267528389048, "grad_norm": 0.06494140625, "learning_rate": 1.695801597622144e-05, "loss": 0.8773, "step": 7430 }, { "epoch": 0.16931024281455523, "grad_norm": 82.5, "learning_rate": 1.6953371725803457e-05, "loss": 1.412, "step": 7440 }, { "epoch": 0.16953781034521995, "grad_norm": 58.25, "learning_rate": 1.6948727475385474e-05, "loss": 1.0848, "step": 7450 }, { "epoch": 0.16976537787588467, "grad_norm": 103.0, "learning_rate": 1.694408322496749e-05, "loss": 0.4552, "step": 7460 }, { "epoch": 0.16999294540654938, "grad_norm": 166.0, "learning_rate": 1.693943897454951e-05, "loss": 0.7059, "step": 7470 }, { "epoch": 0.17022051293721413, "grad_norm": 160.0, "learning_rate": 1.6934794724131526e-05, "loss": 1.3265, "step": 7480 }, { "epoch": 0.17044808046787885, "grad_norm": 92.0, "learning_rate": 1.6930150473713546e-05, "loss": 1.2586, "step": 7490 }, { "epoch": 0.17067564799854357, "grad_norm": 0.04541015625, "learning_rate": 1.6925506223295563e-05, "loss": 1.5763, "step": 7500 }, { "epoch": 0.17090321552920829, "grad_norm": 13.1875, "learning_rate": 1.6920861972877577e-05, "loss": 1.3828, "step": 7510 }, { "epoch": 0.171130783059873, "grad_norm": 36.0, "learning_rate": 1.6916217722459597e-05, "loss": 1.8073, "step": 7520 }, { "epoch": 0.17135835059053775, "grad_norm": 270.0, "learning_rate": 1.6911573472041614e-05, "loss": 1.1041, "step": 7530 }, { "epoch": 0.17158591812120247, "grad_norm": 332.0, "learning_rate": 1.690692922162363e-05, "loss": 1.2955, "step": 7540 }, { "epoch": 0.1718134856518672, "grad_norm": 342.0, "learning_rate": 1.690228497120565e-05, "loss": 1.2396, "step": 7550 }, { "epoch": 0.1720410531825319, "grad_norm": 376.0, "learning_rate": 1.6897640720787665e-05, "loss": 0.9224, "step": 7560 }, { "epoch": 0.17226862071319665, "grad_norm": 133.0, "learning_rate": 1.6892996470369686e-05, "loss": 1.2762, "step": 7570 }, { "epoch": 0.17249618824386137, "grad_norm": 145.0, "learning_rate": 1.68883522199517e-05, "loss": 1.0612, "step": 7580 }, { "epoch": 0.1727237557745261, "grad_norm": 177.0, "learning_rate": 1.6883707969533717e-05, "loss": 0.762, "step": 7590 }, { "epoch": 0.1729513233051908, "grad_norm": 3.75, "learning_rate": 1.6879063719115737e-05, "loss": 0.7186, "step": 7600 }, { "epoch": 0.17317889083585555, "grad_norm": 0.0247802734375, "learning_rate": 1.6874419468697754e-05, "loss": 1.5222, "step": 7610 }, { "epoch": 0.17340645836652027, "grad_norm": 176.0, "learning_rate": 1.686977521827977e-05, "loss": 0.8191, "step": 7620 }, { "epoch": 0.173634025897185, "grad_norm": 77.5, "learning_rate": 1.6865130967861788e-05, "loss": 0.4421, "step": 7630 }, { "epoch": 0.1738615934278497, "grad_norm": 104.5, "learning_rate": 1.686048671744381e-05, "loss": 0.7015, "step": 7640 }, { "epoch": 0.17408916095851443, "grad_norm": 57.5, "learning_rate": 1.6855842467025822e-05, "loss": 0.7043, "step": 7650 }, { "epoch": 0.17431672848917917, "grad_norm": 88.5, "learning_rate": 1.685119821660784e-05, "loss": 0.4255, "step": 7660 }, { "epoch": 0.1745442960198439, "grad_norm": 11.125, "learning_rate": 1.6846553966189856e-05, "loss": 1.2705, "step": 7670 }, { "epoch": 0.1747718635505086, "grad_norm": 15.8125, "learning_rate": 1.6841909715771877e-05, "loss": 0.3141, "step": 7680 }, { "epoch": 0.17499943108117333, "grad_norm": 195.0, "learning_rate": 1.6837265465353894e-05, "loss": 1.3025, "step": 7690 }, { "epoch": 0.17522699861183808, "grad_norm": 0.00070953369140625, "learning_rate": 1.683262121493591e-05, "loss": 0.194, "step": 7700 }, { "epoch": 0.1754545661425028, "grad_norm": 2.328125, "learning_rate": 1.6827976964517928e-05, "loss": 1.2467, "step": 7710 }, { "epoch": 0.1756821336731675, "grad_norm": 17.375, "learning_rate": 1.6823332714099945e-05, "loss": 0.8494, "step": 7720 }, { "epoch": 0.17590970120383223, "grad_norm": 30.875, "learning_rate": 1.6818688463681962e-05, "loss": 0.8112, "step": 7730 }, { "epoch": 0.17613726873449695, "grad_norm": 312.0, "learning_rate": 1.681404421326398e-05, "loss": 1.2374, "step": 7740 }, { "epoch": 0.1763648362651617, "grad_norm": 0.11767578125, "learning_rate": 1.6809399962846e-05, "loss": 0.6607, "step": 7750 }, { "epoch": 0.1765924037958264, "grad_norm": 93.5, "learning_rate": 1.6804755712428017e-05, "loss": 1.3001, "step": 7760 }, { "epoch": 0.17681997132649113, "grad_norm": 86.5, "learning_rate": 1.6800111462010034e-05, "loss": 1.0569, "step": 7770 }, { "epoch": 0.17704753885715585, "grad_norm": 0.00579833984375, "learning_rate": 1.679546721159205e-05, "loss": 0.9398, "step": 7780 }, { "epoch": 0.1772751063878206, "grad_norm": 91.0, "learning_rate": 1.6790822961174068e-05, "loss": 0.9225, "step": 7790 }, { "epoch": 0.17750267391848532, "grad_norm": 0.058349609375, "learning_rate": 1.6786178710756085e-05, "loss": 1.1901, "step": 7800 }, { "epoch": 0.17773024144915003, "grad_norm": 56.0, "learning_rate": 1.6781534460338102e-05, "loss": 0.8603, "step": 7810 }, { "epoch": 0.17795780897981475, "grad_norm": 262.0, "learning_rate": 1.677689020992012e-05, "loss": 1.119, "step": 7820 }, { "epoch": 0.1781853765104795, "grad_norm": 0.0272216796875, "learning_rate": 1.677224595950214e-05, "loss": 1.3608, "step": 7830 }, { "epoch": 0.17841294404114422, "grad_norm": 158.0, "learning_rate": 1.6767601709084156e-05, "loss": 0.6991, "step": 7840 }, { "epoch": 0.17864051157180894, "grad_norm": 120.0, "learning_rate": 1.6762957458666174e-05, "loss": 0.5036, "step": 7850 }, { "epoch": 0.17886807910247365, "grad_norm": 0.62890625, "learning_rate": 1.675831320824819e-05, "loss": 1.0562, "step": 7860 }, { "epoch": 0.17909564663313837, "grad_norm": 0.035888671875, "learning_rate": 1.6753668957830208e-05, "loss": 0.5813, "step": 7870 }, { "epoch": 0.17932321416380312, "grad_norm": 0.00185394287109375, "learning_rate": 1.6749024707412225e-05, "loss": 0.2205, "step": 7880 }, { "epoch": 0.17955078169446784, "grad_norm": 48.25, "learning_rate": 1.6744380456994242e-05, "loss": 0.5209, "step": 7890 }, { "epoch": 0.17977834922513256, "grad_norm": 62.25, "learning_rate": 1.673973620657626e-05, "loss": 1.117, "step": 7900 }, { "epoch": 0.18000591675579727, "grad_norm": 412.0, "learning_rate": 1.673509195615828e-05, "loss": 2.0653, "step": 7910 }, { "epoch": 0.18023348428646202, "grad_norm": 125.0, "learning_rate": 1.6730447705740296e-05, "loss": 0.9558, "step": 7920 }, { "epoch": 0.18046105181712674, "grad_norm": 149.0, "learning_rate": 1.672580345532231e-05, "loss": 0.6131, "step": 7930 }, { "epoch": 0.18068861934779146, "grad_norm": 0.03564453125, "learning_rate": 1.672115920490433e-05, "loss": 0.6854, "step": 7940 }, { "epoch": 0.18091618687845618, "grad_norm": 1.234375, "learning_rate": 1.6716514954486347e-05, "loss": 1.5561, "step": 7950 }, { "epoch": 0.1811437544091209, "grad_norm": 0.07666015625, "learning_rate": 1.6711870704068365e-05, "loss": 0.7332, "step": 7960 }, { "epoch": 0.18137132193978564, "grad_norm": 228.0, "learning_rate": 1.670722645365038e-05, "loss": 1.0131, "step": 7970 }, { "epoch": 0.18159888947045036, "grad_norm": 3.21875, "learning_rate": 1.67025822032324e-05, "loss": 1.1754, "step": 7980 }, { "epoch": 0.18182645700111508, "grad_norm": 204.0, "learning_rate": 1.669793795281442e-05, "loss": 1.5442, "step": 7990 }, { "epoch": 0.1820540245317798, "grad_norm": 9.0625, "learning_rate": 1.6693293702396433e-05, "loss": 1.109, "step": 8000 }, { "epoch": 0.18228159206244454, "grad_norm": 185.0, "learning_rate": 1.668864945197845e-05, "loss": 0.7482, "step": 8010 }, { "epoch": 0.18250915959310926, "grad_norm": 0.0634765625, "learning_rate": 1.668400520156047e-05, "loss": 0.7846, "step": 8020 }, { "epoch": 0.18273672712377398, "grad_norm": 204.0, "learning_rate": 1.6679360951142487e-05, "loss": 2.0096, "step": 8030 }, { "epoch": 0.1829642946544387, "grad_norm": 0.859375, "learning_rate": 1.6674716700724504e-05, "loss": 0.8699, "step": 8040 }, { "epoch": 0.18319186218510342, "grad_norm": 191.0, "learning_rate": 1.667007245030652e-05, "loss": 1.8181, "step": 8050 }, { "epoch": 0.18341942971576816, "grad_norm": 42.5, "learning_rate": 1.6665428199888542e-05, "loss": 0.6636, "step": 8060 }, { "epoch": 0.18364699724643288, "grad_norm": 0.0072021484375, "learning_rate": 1.6660783949470556e-05, "loss": 0.5124, "step": 8070 }, { "epoch": 0.1838745647770976, "grad_norm": 8.3125, "learning_rate": 1.6656139699052573e-05, "loss": 0.3554, "step": 8080 }, { "epoch": 0.18410213230776232, "grad_norm": 0.006591796875, "learning_rate": 1.6651495448634593e-05, "loss": 0.9731, "step": 8090 }, { "epoch": 0.18432969983842706, "grad_norm": 1.7421875, "learning_rate": 1.664685119821661e-05, "loss": 0.0524, "step": 8100 }, { "epoch": 0.18455726736909178, "grad_norm": 0.0087890625, "learning_rate": 1.6642206947798627e-05, "loss": 1.8723, "step": 8110 }, { "epoch": 0.1847848348997565, "grad_norm": 21.25, "learning_rate": 1.6637562697380644e-05, "loss": 0.5502, "step": 8120 }, { "epoch": 0.18501240243042122, "grad_norm": 93.5, "learning_rate": 1.663291844696266e-05, "loss": 1.0929, "step": 8130 }, { "epoch": 0.18523996996108597, "grad_norm": 124.0, "learning_rate": 1.6628274196544678e-05, "loss": 1.3083, "step": 8140 }, { "epoch": 0.18546753749175068, "grad_norm": 205.0, "learning_rate": 1.6623629946126695e-05, "loss": 0.9293, "step": 8150 }, { "epoch": 0.1856951050224154, "grad_norm": 189.0, "learning_rate": 1.6618985695708712e-05, "loss": 0.4026, "step": 8160 }, { "epoch": 0.18592267255308012, "grad_norm": 8.6875, "learning_rate": 1.6614341445290733e-05, "loss": 1.103, "step": 8170 }, { "epoch": 0.18615024008374484, "grad_norm": 0.00024318695068359375, "learning_rate": 1.660969719487275e-05, "loss": 1.4376, "step": 8180 }, { "epoch": 0.18637780761440959, "grad_norm": 35.75, "learning_rate": 1.6605052944454767e-05, "loss": 1.0469, "step": 8190 }, { "epoch": 0.1866053751450743, "grad_norm": 272.0, "learning_rate": 1.6600408694036784e-05, "loss": 0.958, "step": 8200 }, { "epoch": 0.18683294267573902, "grad_norm": 77.0, "learning_rate": 1.65957644436188e-05, "loss": 0.5895, "step": 8210 }, { "epoch": 0.18706051020640374, "grad_norm": 54.0, "learning_rate": 1.6591120193200818e-05, "loss": 0.8982, "step": 8220 }, { "epoch": 0.1872880777370685, "grad_norm": 278.0, "learning_rate": 1.6586475942782835e-05, "loss": 0.7707, "step": 8230 }, { "epoch": 0.1875156452677332, "grad_norm": 0.034912109375, "learning_rate": 1.6581831692364852e-05, "loss": 0.5619, "step": 8240 }, { "epoch": 0.18774321279839792, "grad_norm": 0.375, "learning_rate": 1.6577187441946873e-05, "loss": 0.1071, "step": 8250 }, { "epoch": 0.18797078032906264, "grad_norm": 67.5, "learning_rate": 1.657254319152889e-05, "loss": 1.6867, "step": 8260 }, { "epoch": 0.18819834785972736, "grad_norm": 0.046630859375, "learning_rate": 1.6567898941110907e-05, "loss": 0.9262, "step": 8270 }, { "epoch": 0.1884259153903921, "grad_norm": 0.0033111572265625, "learning_rate": 1.6563254690692924e-05, "loss": 0.8618, "step": 8280 }, { "epoch": 0.18865348292105683, "grad_norm": 2.140625, "learning_rate": 1.655861044027494e-05, "loss": 0.97, "step": 8290 }, { "epoch": 0.18888105045172154, "grad_norm": 226.0, "learning_rate": 1.6553966189856958e-05, "loss": 0.3739, "step": 8300 }, { "epoch": 0.18910861798238626, "grad_norm": 0.000640869140625, "learning_rate": 1.6549321939438975e-05, "loss": 0.6889, "step": 8310 }, { "epoch": 0.189336185513051, "grad_norm": 14.4375, "learning_rate": 1.6544677689020992e-05, "loss": 0.4425, "step": 8320 }, { "epoch": 0.18956375304371573, "grad_norm": 0.1943359375, "learning_rate": 1.6540033438603012e-05, "loss": 0.724, "step": 8330 }, { "epoch": 0.18979132057438045, "grad_norm": 13.4375, "learning_rate": 1.653538918818503e-05, "loss": 0.4163, "step": 8340 }, { "epoch": 0.19001888810504516, "grad_norm": 0.0031890869140625, "learning_rate": 1.6530744937767043e-05, "loss": 1.1748, "step": 8350 }, { "epoch": 0.1902464556357099, "grad_norm": 106.5, "learning_rate": 1.6526100687349064e-05, "loss": 0.6386, "step": 8360 }, { "epoch": 0.19047402316637463, "grad_norm": 29.25, "learning_rate": 1.652145643693108e-05, "loss": 1.3416, "step": 8370 }, { "epoch": 0.19070159069703935, "grad_norm": 4.78125, "learning_rate": 1.6516812186513098e-05, "loss": 0.8009, "step": 8380 }, { "epoch": 0.19092915822770407, "grad_norm": 0.01519775390625, "learning_rate": 1.6512167936095115e-05, "loss": 0.496, "step": 8390 }, { "epoch": 0.19115672575836878, "grad_norm": 53.75, "learning_rate": 1.6507523685677135e-05, "loss": 0.1985, "step": 8400 }, { "epoch": 0.19138429328903353, "grad_norm": 9.125, "learning_rate": 1.6502879435259152e-05, "loss": 0.4983, "step": 8410 }, { "epoch": 0.19161186081969825, "grad_norm": 360.0, "learning_rate": 1.6498235184841166e-05, "loss": 0.8691, "step": 8420 }, { "epoch": 0.19183942835036297, "grad_norm": 148.0, "learning_rate": 1.6493590934423186e-05, "loss": 1.0504, "step": 8430 }, { "epoch": 0.19206699588102769, "grad_norm": 0.0283203125, "learning_rate": 1.6488946684005203e-05, "loss": 1.3706, "step": 8440 }, { "epoch": 0.19229456341169243, "grad_norm": 77.5, "learning_rate": 1.648430243358722e-05, "loss": 0.9144, "step": 8450 }, { "epoch": 0.19252213094235715, "grad_norm": 92.5, "learning_rate": 1.6479658183169238e-05, "loss": 1.2851, "step": 8460 }, { "epoch": 0.19274969847302187, "grad_norm": 0.0050048828125, "learning_rate": 1.6475013932751255e-05, "loss": 0.7395, "step": 8470 }, { "epoch": 0.1929772660036866, "grad_norm": 171.0, "learning_rate": 1.6470369682333275e-05, "loss": 0.8143, "step": 8480 }, { "epoch": 0.1932048335343513, "grad_norm": 83.0, "learning_rate": 1.646572543191529e-05, "loss": 0.8697, "step": 8490 }, { "epoch": 0.19343240106501605, "grad_norm": 187.0, "learning_rate": 1.6461081181497306e-05, "loss": 0.9257, "step": 8500 }, { "epoch": 0.19365996859568077, "grad_norm": 260.0, "learning_rate": 1.6456436931079326e-05, "loss": 0.9098, "step": 8510 }, { "epoch": 0.1938875361263455, "grad_norm": 6.09375, "learning_rate": 1.6451792680661343e-05, "loss": 1.1601, "step": 8520 }, { "epoch": 0.1941151036570102, "grad_norm": 136.0, "learning_rate": 1.644714843024336e-05, "loss": 1.0097, "step": 8530 }, { "epoch": 0.19434267118767495, "grad_norm": 3.90625, "learning_rate": 1.6442504179825377e-05, "loss": 0.9386, "step": 8540 }, { "epoch": 0.19457023871833967, "grad_norm": 0.349609375, "learning_rate": 1.6437859929407394e-05, "loss": 1.0312, "step": 8550 }, { "epoch": 0.1947978062490044, "grad_norm": 0.1025390625, "learning_rate": 1.643321567898941e-05, "loss": 0.3504, "step": 8560 }, { "epoch": 0.1950253737796691, "grad_norm": 154.0, "learning_rate": 1.642857142857143e-05, "loss": 0.6983, "step": 8570 }, { "epoch": 0.19525294131033386, "grad_norm": 2.84375, "learning_rate": 1.6423927178153446e-05, "loss": 0.276, "step": 8580 }, { "epoch": 0.19548050884099857, "grad_norm": 74.0, "learning_rate": 1.6419282927735466e-05, "loss": 0.3187, "step": 8590 }, { "epoch": 0.1957080763716633, "grad_norm": 242.0, "learning_rate": 1.6414638677317483e-05, "loss": 0.971, "step": 8600 }, { "epoch": 0.195935643902328, "grad_norm": 140.0, "learning_rate": 1.64099944268995e-05, "loss": 0.7978, "step": 8610 }, { "epoch": 0.19616321143299273, "grad_norm": 0.0201416015625, "learning_rate": 1.6405350176481517e-05, "loss": 0.8708, "step": 8620 }, { "epoch": 0.19639077896365748, "grad_norm": 24.625, "learning_rate": 1.6400705926063534e-05, "loss": 0.2596, "step": 8630 }, { "epoch": 0.1966183464943222, "grad_norm": 0.0004138946533203125, "learning_rate": 1.639606167564555e-05, "loss": 1.9555, "step": 8640 }, { "epoch": 0.1968459140249869, "grad_norm": 0.466796875, "learning_rate": 1.639141742522757e-05, "loss": 0.9995, "step": 8650 }, { "epoch": 0.19707348155565163, "grad_norm": 1.6328125, "learning_rate": 1.6386773174809585e-05, "loss": 0.8859, "step": 8660 }, { "epoch": 0.19730104908631638, "grad_norm": 57.5, "learning_rate": 1.6382128924391606e-05, "loss": 1.0278, "step": 8670 }, { "epoch": 0.1975286166169811, "grad_norm": 4.25, "learning_rate": 1.6377484673973623e-05, "loss": 1.0874, "step": 8680 }, { "epoch": 0.1977561841476458, "grad_norm": 0.00125885009765625, "learning_rate": 1.637284042355564e-05, "loss": 1.6119, "step": 8690 }, { "epoch": 0.19798375167831053, "grad_norm": 22.875, "learning_rate": 1.6368196173137657e-05, "loss": 0.6031, "step": 8700 }, { "epoch": 0.19821131920897525, "grad_norm": 0.0023345947265625, "learning_rate": 1.6363551922719674e-05, "loss": 0.6826, "step": 8710 }, { "epoch": 0.19843888673964, "grad_norm": 0.76171875, "learning_rate": 1.635890767230169e-05, "loss": 0.9791, "step": 8720 }, { "epoch": 0.19866645427030472, "grad_norm": 81.5, "learning_rate": 1.6354263421883708e-05, "loss": 0.411, "step": 8730 }, { "epoch": 0.19889402180096943, "grad_norm": 0.0023956298828125, "learning_rate": 1.634961917146573e-05, "loss": 0.8363, "step": 8740 }, { "epoch": 0.19912158933163415, "grad_norm": 203.0, "learning_rate": 1.6344974921047746e-05, "loss": 1.0741, "step": 8750 }, { "epoch": 0.1993491568622989, "grad_norm": 69.0, "learning_rate": 1.6340330670629763e-05, "loss": 0.4443, "step": 8760 }, { "epoch": 0.19957672439296362, "grad_norm": 0.125, "learning_rate": 1.633568642021178e-05, "loss": 0.7039, "step": 8770 }, { "epoch": 0.19980429192362834, "grad_norm": 0.0390625, "learning_rate": 1.6331042169793797e-05, "loss": 0.2728, "step": 8780 }, { "epoch": 0.20003185945429305, "grad_norm": 85.0, "learning_rate": 1.6326397919375814e-05, "loss": 0.7663, "step": 8790 }, { "epoch": 0.20025942698495777, "grad_norm": 60.75, "learning_rate": 1.632175366895783e-05, "loss": 0.997, "step": 8800 }, { "epoch": 0.20048699451562252, "grad_norm": 103.5, "learning_rate": 1.6317109418539848e-05, "loss": 0.8035, "step": 8810 }, { "epoch": 0.20071456204628724, "grad_norm": 0.71875, "learning_rate": 1.631246516812187e-05, "loss": 1.3411, "step": 8820 }, { "epoch": 0.20094212957695196, "grad_norm": 0.0030059814453125, "learning_rate": 1.6307820917703886e-05, "loss": 1.5105, "step": 8830 }, { "epoch": 0.20116969710761667, "grad_norm": 23.5, "learning_rate": 1.63031766672859e-05, "loss": 1.1315, "step": 8840 }, { "epoch": 0.20139726463828142, "grad_norm": 79.0, "learning_rate": 1.629853241686792e-05, "loss": 0.5367, "step": 8850 }, { "epoch": 0.20162483216894614, "grad_norm": 348.0, "learning_rate": 1.6293888166449937e-05, "loss": 0.6799, "step": 8860 }, { "epoch": 0.20185239969961086, "grad_norm": 230.0, "learning_rate": 1.6289243916031954e-05, "loss": 0.7966, "step": 8870 }, { "epoch": 0.20207996723027558, "grad_norm": 118.0, "learning_rate": 1.628459966561397e-05, "loss": 0.3831, "step": 8880 }, { "epoch": 0.20230753476094032, "grad_norm": 118.0, "learning_rate": 1.6279955415195988e-05, "loss": 0.4284, "step": 8890 }, { "epoch": 0.20253510229160504, "grad_norm": 245.0, "learning_rate": 1.627531116477801e-05, "loss": 0.8387, "step": 8900 }, { "epoch": 0.20276266982226976, "grad_norm": 904.0, "learning_rate": 1.6270666914360022e-05, "loss": 1.3238, "step": 8910 }, { "epoch": 0.20299023735293448, "grad_norm": 198.0, "learning_rate": 1.626602266394204e-05, "loss": 0.7154, "step": 8920 }, { "epoch": 0.2032178048835992, "grad_norm": 23.375, "learning_rate": 1.626137841352406e-05, "loss": 0.9791, "step": 8930 }, { "epoch": 0.20344537241426394, "grad_norm": 320.0, "learning_rate": 1.6256734163106077e-05, "loss": 0.8169, "step": 8940 }, { "epoch": 0.20367293994492866, "grad_norm": 712.0, "learning_rate": 1.6252089912688094e-05, "loss": 1.1222, "step": 8950 }, { "epoch": 0.20390050747559338, "grad_norm": 276.0, "learning_rate": 1.624744566227011e-05, "loss": 1.2758, "step": 8960 }, { "epoch": 0.2041280750062581, "grad_norm": 286.0, "learning_rate": 1.6242801411852128e-05, "loss": 0.5614, "step": 8970 }, { "epoch": 0.20435564253692284, "grad_norm": 494.0, "learning_rate": 1.6238157161434145e-05, "loss": 1.2251, "step": 8980 }, { "epoch": 0.20458321006758756, "grad_norm": 7.875, "learning_rate": 1.6233512911016162e-05, "loss": 1.6947, "step": 8990 }, { "epoch": 0.20481077759825228, "grad_norm": 1.1328125, "learning_rate": 1.622886866059818e-05, "loss": 1.4567, "step": 9000 }, { "epoch": 0.205038345128917, "grad_norm": 56.75, "learning_rate": 1.62242244101802e-05, "loss": 0.6464, "step": 9010 }, { "epoch": 0.20526591265958172, "grad_norm": 314.0, "learning_rate": 1.6219580159762216e-05, "loss": 0.6796, "step": 9020 }, { "epoch": 0.20549348019024646, "grad_norm": 0.031005859375, "learning_rate": 1.6214935909344233e-05, "loss": 0.5361, "step": 9030 }, { "epoch": 0.20572104772091118, "grad_norm": 0.033447265625, "learning_rate": 1.621029165892625e-05, "loss": 0.9321, "step": 9040 }, { "epoch": 0.2059486152515759, "grad_norm": 60.5, "learning_rate": 1.6205647408508268e-05, "loss": 1.2415, "step": 9050 }, { "epoch": 0.20617618278224062, "grad_norm": 0.1376953125, "learning_rate": 1.6201003158090285e-05, "loss": 0.298, "step": 9060 }, { "epoch": 0.20640375031290537, "grad_norm": 1688.0, "learning_rate": 1.61963589076723e-05, "loss": 1.1027, "step": 9070 }, { "epoch": 0.20663131784357008, "grad_norm": 0.84375, "learning_rate": 1.6191714657254322e-05, "loss": 0.5566, "step": 9080 }, { "epoch": 0.2068588853742348, "grad_norm": 472.0, "learning_rate": 1.618707040683634e-05, "loss": 1.0176, "step": 9090 }, { "epoch": 0.20708645290489952, "grad_norm": 97.0, "learning_rate": 1.6182426156418356e-05, "loss": 1.3551, "step": 9100 }, { "epoch": 0.20731402043556427, "grad_norm": 4.21875, "learning_rate": 1.6177781906000373e-05, "loss": 0.6326, "step": 9110 }, { "epoch": 0.20754158796622899, "grad_norm": 10.875, "learning_rate": 1.617313765558239e-05, "loss": 1.3762, "step": 9120 }, { "epoch": 0.2077691554968937, "grad_norm": 41.25, "learning_rate": 1.6168493405164407e-05, "loss": 1.0091, "step": 9130 }, { "epoch": 0.20799672302755842, "grad_norm": 110.5, "learning_rate": 1.6163849154746424e-05, "loss": 1.857, "step": 9140 }, { "epoch": 0.20822429055822314, "grad_norm": 648.0, "learning_rate": 1.615920490432844e-05, "loss": 1.0509, "step": 9150 }, { "epoch": 0.2084518580888879, "grad_norm": 10.75, "learning_rate": 1.6154560653910462e-05, "loss": 1.3496, "step": 9160 }, { "epoch": 0.2086794256195526, "grad_norm": 0.0033721923828125, "learning_rate": 1.614991640349248e-05, "loss": 1.3452, "step": 9170 }, { "epoch": 0.20890699315021732, "grad_norm": 0.035888671875, "learning_rate": 1.6145272153074496e-05, "loss": 1.2428, "step": 9180 }, { "epoch": 0.20913456068088204, "grad_norm": 133.0, "learning_rate": 1.6140627902656513e-05, "loss": 0.5211, "step": 9190 }, { "epoch": 0.2093621282115468, "grad_norm": 89.5, "learning_rate": 1.613598365223853e-05, "loss": 0.8605, "step": 9200 }, { "epoch": 0.2095896957422115, "grad_norm": 11.3125, "learning_rate": 1.6131339401820547e-05, "loss": 0.5389, "step": 9210 }, { "epoch": 0.20981726327287623, "grad_norm": 99.0, "learning_rate": 1.6126695151402564e-05, "loss": 0.8267, "step": 9220 }, { "epoch": 0.21004483080354094, "grad_norm": 0.546875, "learning_rate": 1.612205090098458e-05, "loss": 0.6988, "step": 9230 }, { "epoch": 0.21027239833420566, "grad_norm": 0.033935546875, "learning_rate": 1.6117406650566602e-05, "loss": 0.8314, "step": 9240 }, { "epoch": 0.2104999658648704, "grad_norm": 0.0322265625, "learning_rate": 1.611276240014862e-05, "loss": 0.4952, "step": 9250 }, { "epoch": 0.21072753339553513, "grad_norm": 113.5, "learning_rate": 1.6108118149730632e-05, "loss": 0.3416, "step": 9260 }, { "epoch": 0.21095510092619985, "grad_norm": 69.5, "learning_rate": 1.6103473899312653e-05, "loss": 1.0139, "step": 9270 }, { "epoch": 0.21118266845686456, "grad_norm": 0.0024566650390625, "learning_rate": 1.609882964889467e-05, "loss": 0.3243, "step": 9280 }, { "epoch": 0.2114102359875293, "grad_norm": 0.77734375, "learning_rate": 1.6094185398476687e-05, "loss": 0.7845, "step": 9290 }, { "epoch": 0.21163780351819403, "grad_norm": 101.5, "learning_rate": 1.6089541148058704e-05, "loss": 1.0799, "step": 9300 }, { "epoch": 0.21186537104885875, "grad_norm": 148.0, "learning_rate": 1.608489689764072e-05, "loss": 0.8988, "step": 9310 }, { "epoch": 0.21209293857952347, "grad_norm": 189.0, "learning_rate": 1.608025264722274e-05, "loss": 1.0435, "step": 9320 }, { "epoch": 0.2123205061101882, "grad_norm": 0.3828125, "learning_rate": 1.6075608396804755e-05, "loss": 0.9045, "step": 9330 }, { "epoch": 0.21254807364085293, "grad_norm": 5.0, "learning_rate": 1.6070964146386772e-05, "loss": 0.3257, "step": 9340 }, { "epoch": 0.21277564117151765, "grad_norm": 148.0, "learning_rate": 1.6066319895968793e-05, "loss": 1.8471, "step": 9350 }, { "epoch": 0.21300320870218237, "grad_norm": 202.0, "learning_rate": 1.606167564555081e-05, "loss": 1.1115, "step": 9360 }, { "epoch": 0.21323077623284709, "grad_norm": 124.5, "learning_rate": 1.6057031395132827e-05, "loss": 1.1629, "step": 9370 }, { "epoch": 0.21345834376351183, "grad_norm": 588.0, "learning_rate": 1.6052387144714844e-05, "loss": 1.0027, "step": 9380 }, { "epoch": 0.21368591129417655, "grad_norm": 62.25, "learning_rate": 1.6047742894296864e-05, "loss": 0.9935, "step": 9390 }, { "epoch": 0.21391347882484127, "grad_norm": 87.5, "learning_rate": 1.6043098643878878e-05, "loss": 1.2058, "step": 9400 }, { "epoch": 0.214141046355506, "grad_norm": 422.0, "learning_rate": 1.6038454393460895e-05, "loss": 0.9817, "step": 9410 }, { "epoch": 0.21436861388617073, "grad_norm": 6.3125, "learning_rate": 1.6033810143042916e-05, "loss": 1.3332, "step": 9420 }, { "epoch": 0.21459618141683545, "grad_norm": 0.005340576171875, "learning_rate": 1.6029165892624933e-05, "loss": 0.4914, "step": 9430 }, { "epoch": 0.21482374894750017, "grad_norm": 6.84375, "learning_rate": 1.602452164220695e-05, "loss": 0.9394, "step": 9440 }, { "epoch": 0.2150513164781649, "grad_norm": 0.0255126953125, "learning_rate": 1.6019877391788967e-05, "loss": 0.455, "step": 9450 }, { "epoch": 0.2152788840088296, "grad_norm": 106.0, "learning_rate": 1.6015233141370984e-05, "loss": 0.3398, "step": 9460 }, { "epoch": 0.21550645153949435, "grad_norm": 476.0, "learning_rate": 1.6010588890953e-05, "loss": 1.4834, "step": 9470 }, { "epoch": 0.21573401907015907, "grad_norm": 0.083984375, "learning_rate": 1.6005944640535018e-05, "loss": 1.0208, "step": 9480 }, { "epoch": 0.2159615866008238, "grad_norm": 182.0, "learning_rate": 1.6001300390117035e-05, "loss": 1.2132, "step": 9490 }, { "epoch": 0.2161891541314885, "grad_norm": 7.5, "learning_rate": 1.5996656139699055e-05, "loss": 0.8619, "step": 9500 }, { "epoch": 0.21641672166215326, "grad_norm": 0.000774383544921875, "learning_rate": 1.5992011889281072e-05, "loss": 0.7331, "step": 9510 }, { "epoch": 0.21664428919281797, "grad_norm": 65.5, "learning_rate": 1.598736763886309e-05, "loss": 0.7816, "step": 9520 }, { "epoch": 0.2168718567234827, "grad_norm": 6.125, "learning_rate": 1.5982723388445107e-05, "loss": 0.4981, "step": 9530 }, { "epoch": 0.2170994242541474, "grad_norm": 31.0, "learning_rate": 1.5978079138027124e-05, "loss": 0.3798, "step": 9540 }, { "epoch": 0.21732699178481216, "grad_norm": 0.0390625, "learning_rate": 1.597343488760914e-05, "loss": 1.3404, "step": 9550 }, { "epoch": 0.21755455931547688, "grad_norm": 147.0, "learning_rate": 1.5968790637191158e-05, "loss": 0.8008, "step": 9560 }, { "epoch": 0.2177821268461416, "grad_norm": 87.5, "learning_rate": 1.5964146386773175e-05, "loss": 1.0854, "step": 9570 }, { "epoch": 0.2180096943768063, "grad_norm": 0.019287109375, "learning_rate": 1.5959502136355195e-05, "loss": 0.4928, "step": 9580 }, { "epoch": 0.21823726190747103, "grad_norm": 286.0, "learning_rate": 1.5954857885937212e-05, "loss": 2.1381, "step": 9590 }, { "epoch": 0.21846482943813578, "grad_norm": 187.0, "learning_rate": 1.595021363551923e-05, "loss": 0.2718, "step": 9600 }, { "epoch": 0.2186923969688005, "grad_norm": 0.0037689208984375, "learning_rate": 1.5945569385101246e-05, "loss": 1.0599, "step": 9610 }, { "epoch": 0.2189199644994652, "grad_norm": 180.0, "learning_rate": 1.5940925134683263e-05, "loss": 0.2528, "step": 9620 }, { "epoch": 0.21914753203012993, "grad_norm": 0.0162353515625, "learning_rate": 1.593628088426528e-05, "loss": 0.5866, "step": 9630 }, { "epoch": 0.21937509956079468, "grad_norm": 20.625, "learning_rate": 1.5931636633847298e-05, "loss": 0.4141, "step": 9640 }, { "epoch": 0.2196026670914594, "grad_norm": 165.0, "learning_rate": 1.5926992383429315e-05, "loss": 0.3983, "step": 9650 }, { "epoch": 0.21983023462212412, "grad_norm": 0.302734375, "learning_rate": 1.5922348133011335e-05, "loss": 0.9579, "step": 9660 }, { "epoch": 0.22005780215278883, "grad_norm": 85.0, "learning_rate": 1.5917703882593352e-05, "loss": 1.6846, "step": 9670 }, { "epoch": 0.22028536968345355, "grad_norm": 47.0, "learning_rate": 1.5913059632175366e-05, "loss": 0.6325, "step": 9680 }, { "epoch": 0.2205129372141183, "grad_norm": 3.171875, "learning_rate": 1.5908415381757386e-05, "loss": 1.1963, "step": 9690 }, { "epoch": 0.22074050474478302, "grad_norm": 294.0, "learning_rate": 1.5903771131339403e-05, "loss": 1.2549, "step": 9700 }, { "epoch": 0.22096807227544774, "grad_norm": 0.0233154296875, "learning_rate": 1.589912688092142e-05, "loss": 0.396, "step": 9710 }, { "epoch": 0.22119563980611245, "grad_norm": 480.0, "learning_rate": 1.5894482630503437e-05, "loss": 0.9566, "step": 9720 }, { "epoch": 0.2214232073367772, "grad_norm": 87.5, "learning_rate": 1.5889838380085458e-05, "loss": 0.6978, "step": 9730 }, { "epoch": 0.22165077486744192, "grad_norm": 74.5, "learning_rate": 1.5885194129667475e-05, "loss": 1.0224, "step": 9740 }, { "epoch": 0.22187834239810664, "grad_norm": 108.5, "learning_rate": 1.588054987924949e-05, "loss": 1.4368, "step": 9750 }, { "epoch": 0.22210590992877136, "grad_norm": 69.0, "learning_rate": 1.5875905628831506e-05, "loss": 0.7704, "step": 9760 }, { "epoch": 0.22233347745943607, "grad_norm": 370.0, "learning_rate": 1.5871261378413526e-05, "loss": 1.8142, "step": 9770 }, { "epoch": 0.22256104499010082, "grad_norm": 32.25, "learning_rate": 1.5866617127995543e-05, "loss": 0.7583, "step": 9780 }, { "epoch": 0.22278861252076554, "grad_norm": 203.0, "learning_rate": 1.586197287757756e-05, "loss": 0.9393, "step": 9790 }, { "epoch": 0.22301618005143026, "grad_norm": 438.0, "learning_rate": 1.5857328627159577e-05, "loss": 1.1138, "step": 9800 }, { "epoch": 0.22324374758209498, "grad_norm": 79.0, "learning_rate": 1.5852684376741598e-05, "loss": 0.5253, "step": 9810 }, { "epoch": 0.22347131511275972, "grad_norm": 46.25, "learning_rate": 1.584804012632361e-05, "loss": 1.057, "step": 9820 }, { "epoch": 0.22369888264342444, "grad_norm": 33.25, "learning_rate": 1.584339587590563e-05, "loss": 0.8265, "step": 9830 }, { "epoch": 0.22392645017408916, "grad_norm": 80.5, "learning_rate": 1.583875162548765e-05, "loss": 0.8869, "step": 9840 }, { "epoch": 0.22415401770475388, "grad_norm": 334.0, "learning_rate": 1.5834107375069666e-05, "loss": 1.4824, "step": 9850 }, { "epoch": 0.22438158523541862, "grad_norm": 346.0, "learning_rate": 1.5829463124651683e-05, "loss": 0.4892, "step": 9860 }, { "epoch": 0.22460915276608334, "grad_norm": 89.5, "learning_rate": 1.58248188742337e-05, "loss": 1.0218, "step": 9870 }, { "epoch": 0.22483672029674806, "grad_norm": 0.0869140625, "learning_rate": 1.5820174623815717e-05, "loss": 0.4897, "step": 9880 }, { "epoch": 0.22506428782741278, "grad_norm": 106.5, "learning_rate": 1.5815530373397734e-05, "loss": 1.1623, "step": 9890 }, { "epoch": 0.2252918553580775, "grad_norm": 71.5, "learning_rate": 1.581088612297975e-05, "loss": 0.3532, "step": 9900 }, { "epoch": 0.22551942288874224, "grad_norm": 75.5, "learning_rate": 1.5806241872561768e-05, "loss": 1.0138, "step": 9910 }, { "epoch": 0.22574699041940696, "grad_norm": 5.40625, "learning_rate": 1.580159762214379e-05, "loss": 1.3322, "step": 9920 }, { "epoch": 0.22597455795007168, "grad_norm": 0.000904083251953125, "learning_rate": 1.5796953371725806e-05, "loss": 0.4156, "step": 9930 }, { "epoch": 0.2262021254807364, "grad_norm": 0.1435546875, "learning_rate": 1.5792309121307823e-05, "loss": 0.457, "step": 9940 }, { "epoch": 0.22642969301140115, "grad_norm": 0.003265380859375, "learning_rate": 1.578766487088984e-05, "loss": 0.3782, "step": 9950 }, { "epoch": 0.22665726054206586, "grad_norm": 476.0, "learning_rate": 1.5783020620471857e-05, "loss": 0.7299, "step": 9960 }, { "epoch": 0.22688482807273058, "grad_norm": 0.1044921875, "learning_rate": 1.5778376370053874e-05, "loss": 1.0067, "step": 9970 }, { "epoch": 0.2271123956033953, "grad_norm": 3.640625, "learning_rate": 1.577373211963589e-05, "loss": 0.5289, "step": 9980 }, { "epoch": 0.22733996313406002, "grad_norm": 105.0, "learning_rate": 1.5769087869217908e-05, "loss": 0.6029, "step": 9990 }, { "epoch": 0.22756753066472477, "grad_norm": 178.0, "learning_rate": 1.576444361879993e-05, "loss": 0.7951, "step": 10000 }, { "epoch": 0.22779509819538948, "grad_norm": 27.125, "learning_rate": 1.5759799368381945e-05, "loss": 0.228, "step": 10010 }, { "epoch": 0.2280226657260542, "grad_norm": 0.0272216796875, "learning_rate": 1.5755155117963963e-05, "loss": 1.0469, "step": 10020 }, { "epoch": 0.22825023325671892, "grad_norm": 215.0, "learning_rate": 1.575051086754598e-05, "loss": 1.8567, "step": 10030 }, { "epoch": 0.22847780078738367, "grad_norm": 70.5, "learning_rate": 1.5745866617127997e-05, "loss": 0.7132, "step": 10040 }, { "epoch": 0.22870536831804839, "grad_norm": 201.0, "learning_rate": 1.5741222366710014e-05, "loss": 0.8474, "step": 10050 }, { "epoch": 0.2289329358487131, "grad_norm": 0.0233154296875, "learning_rate": 1.573657811629203e-05, "loss": 0.4694, "step": 10060 }, { "epoch": 0.22916050337937782, "grad_norm": 180.0, "learning_rate": 1.5731933865874048e-05, "loss": 0.7782, "step": 10070 }, { "epoch": 0.22938807091004257, "grad_norm": 151.0, "learning_rate": 1.5727289615456068e-05, "loss": 0.375, "step": 10080 }, { "epoch": 0.2296156384407073, "grad_norm": 34.25, "learning_rate": 1.5722645365038085e-05, "loss": 0.9511, "step": 10090 }, { "epoch": 0.229843205971372, "grad_norm": 0.08935546875, "learning_rate": 1.57180011146201e-05, "loss": 0.128, "step": 10100 }, { "epoch": 0.23007077350203672, "grad_norm": 45.5, "learning_rate": 1.571335686420212e-05, "loss": 0.7432, "step": 10110 }, { "epoch": 0.23029834103270144, "grad_norm": 114.0, "learning_rate": 1.5708712613784136e-05, "loss": 1.0422, "step": 10120 }, { "epoch": 0.2305259085633662, "grad_norm": 274.0, "learning_rate": 1.5704068363366154e-05, "loss": 0.8241, "step": 10130 }, { "epoch": 0.2307534760940309, "grad_norm": 149.0, "learning_rate": 1.569942411294817e-05, "loss": 1.9021, "step": 10140 }, { "epoch": 0.23098104362469563, "grad_norm": 76.5, "learning_rate": 1.569477986253019e-05, "loss": 0.6726, "step": 10150 }, { "epoch": 0.23120861115536034, "grad_norm": 0.98046875, "learning_rate": 1.5690135612112208e-05, "loss": 0.9766, "step": 10160 }, { "epoch": 0.2314361786860251, "grad_norm": 2.734375, "learning_rate": 1.5685491361694222e-05, "loss": 0.422, "step": 10170 }, { "epoch": 0.2316637462166898, "grad_norm": 151.0, "learning_rate": 1.5680847111276242e-05, "loss": 1.6101, "step": 10180 }, { "epoch": 0.23189131374735453, "grad_norm": 136.0, "learning_rate": 1.567620286085826e-05, "loss": 1.1702, "step": 10190 }, { "epoch": 0.23211888127801925, "grad_norm": 104.0, "learning_rate": 1.5671558610440276e-05, "loss": 0.4103, "step": 10200 }, { "epoch": 0.23234644880868396, "grad_norm": 239.0, "learning_rate": 1.5666914360022293e-05, "loss": 1.6953, "step": 10210 }, { "epoch": 0.2325740163393487, "grad_norm": 302.0, "learning_rate": 1.566227010960431e-05, "loss": 1.4496, "step": 10220 }, { "epoch": 0.23280158387001343, "grad_norm": 0.00121307373046875, "learning_rate": 1.565762585918633e-05, "loss": 1.166, "step": 10230 }, { "epoch": 0.23302915140067815, "grad_norm": 131.0, "learning_rate": 1.5652981608768345e-05, "loss": 0.5258, "step": 10240 }, { "epoch": 0.23325671893134287, "grad_norm": 0.000659942626953125, "learning_rate": 1.564833735835036e-05, "loss": 0.6239, "step": 10250 }, { "epoch": 0.2334842864620076, "grad_norm": 1.625, "learning_rate": 1.5643693107932382e-05, "loss": 0.1668, "step": 10260 }, { "epoch": 0.23371185399267233, "grad_norm": 0.0145263671875, "learning_rate": 1.56390488575144e-05, "loss": 1.8953, "step": 10270 }, { "epoch": 0.23393942152333705, "grad_norm": 0.00186920166015625, "learning_rate": 1.5634404607096416e-05, "loss": 1.2062, "step": 10280 }, { "epoch": 0.23416698905400177, "grad_norm": 228.0, "learning_rate": 1.5629760356678433e-05, "loss": 1.2318, "step": 10290 }, { "epoch": 0.2343945565846665, "grad_norm": 117.5, "learning_rate": 1.562511610626045e-05, "loss": 0.753, "step": 10300 }, { "epoch": 0.23462212411533123, "grad_norm": 102.0, "learning_rate": 1.5620471855842467e-05, "loss": 0.9414, "step": 10310 }, { "epoch": 0.23484969164599595, "grad_norm": 258.0, "learning_rate": 1.5615827605424484e-05, "loss": 0.4186, "step": 10320 }, { "epoch": 0.23507725917666067, "grad_norm": 430.0, "learning_rate": 1.56111833550065e-05, "loss": 1.0574, "step": 10330 }, { "epoch": 0.2353048267073254, "grad_norm": 33.75, "learning_rate": 1.5606539104588522e-05, "loss": 0.3594, "step": 10340 }, { "epoch": 0.23553239423799013, "grad_norm": 1.1640625, "learning_rate": 1.560189485417054e-05, "loss": 1.1954, "step": 10350 }, { "epoch": 0.23575996176865485, "grad_norm": 70.0, "learning_rate": 1.5597250603752556e-05, "loss": 0.8519, "step": 10360 }, { "epoch": 0.23598752929931957, "grad_norm": 119.5, "learning_rate": 1.5592606353334573e-05, "loss": 0.4198, "step": 10370 }, { "epoch": 0.2362150968299843, "grad_norm": 113.5, "learning_rate": 1.558796210291659e-05, "loss": 0.7912, "step": 10380 }, { "epoch": 0.23644266436064904, "grad_norm": 0.041748046875, "learning_rate": 1.5583317852498607e-05, "loss": 0.7607, "step": 10390 }, { "epoch": 0.23667023189131375, "grad_norm": 127.0, "learning_rate": 1.5578673602080624e-05, "loss": 1.0994, "step": 10400 }, { "epoch": 0.23689779942197847, "grad_norm": 102.5, "learning_rate": 1.557402935166264e-05, "loss": 0.9864, "step": 10410 }, { "epoch": 0.2371253669526432, "grad_norm": 164.0, "learning_rate": 1.556938510124466e-05, "loss": 0.8547, "step": 10420 }, { "epoch": 0.2373529344833079, "grad_norm": 3.6875, "learning_rate": 1.556474085082668e-05, "loss": 0.3589, "step": 10430 }, { "epoch": 0.23758050201397266, "grad_norm": 13.5, "learning_rate": 1.5560096600408696e-05, "loss": 0.5556, "step": 10440 }, { "epoch": 0.23780806954463737, "grad_norm": 410.0, "learning_rate": 1.5555452349990713e-05, "loss": 0.9017, "step": 10450 }, { "epoch": 0.2380356370753021, "grad_norm": 0.051513671875, "learning_rate": 1.555080809957273e-05, "loss": 1.1998, "step": 10460 }, { "epoch": 0.2382632046059668, "grad_norm": 326.0, "learning_rate": 1.5546163849154747e-05, "loss": 1.9923, "step": 10470 }, { "epoch": 0.23849077213663156, "grad_norm": 61.5, "learning_rate": 1.5541519598736764e-05, "loss": 1.11, "step": 10480 }, { "epoch": 0.23871833966729628, "grad_norm": 21.125, "learning_rate": 1.5536875348318784e-05, "loss": 0.4778, "step": 10490 }, { "epoch": 0.238945907197961, "grad_norm": 306.0, "learning_rate": 1.55322310979008e-05, "loss": 1.3553, "step": 10500 }, { "epoch": 0.2391734747286257, "grad_norm": 42.5, "learning_rate": 1.552758684748282e-05, "loss": 0.9198, "step": 10510 }, { "epoch": 0.23940104225929043, "grad_norm": 87.5, "learning_rate": 1.5522942597064836e-05, "loss": 0.1433, "step": 10520 }, { "epoch": 0.23962860978995518, "grad_norm": 0.62890625, "learning_rate": 1.5518298346646853e-05, "loss": 0.7744, "step": 10530 }, { "epoch": 0.2398561773206199, "grad_norm": 39.5, "learning_rate": 1.551365409622887e-05, "loss": 1.0875, "step": 10540 }, { "epoch": 0.24008374485128461, "grad_norm": 0.296875, "learning_rate": 1.5509009845810887e-05, "loss": 0.4929, "step": 10550 }, { "epoch": 0.24031131238194933, "grad_norm": 28.75, "learning_rate": 1.5504365595392904e-05, "loss": 0.8594, "step": 10560 }, { "epoch": 0.24053887991261408, "grad_norm": 5.375, "learning_rate": 1.5499721344974924e-05, "loss": 0.6428, "step": 10570 }, { "epoch": 0.2407664474432788, "grad_norm": 61.25, "learning_rate": 1.549507709455694e-05, "loss": 1.1389, "step": 10580 }, { "epoch": 0.24099401497394352, "grad_norm": 191.0, "learning_rate": 1.5490432844138955e-05, "loss": 1.119, "step": 10590 }, { "epoch": 0.24122158250460823, "grad_norm": 141.0, "learning_rate": 1.5485788593720975e-05, "loss": 0.6695, "step": 10600 }, { "epoch": 0.24144915003527298, "grad_norm": 0.01055908203125, "learning_rate": 1.5481144343302992e-05, "loss": 0.4979, "step": 10610 }, { "epoch": 0.2416767175659377, "grad_norm": 76.0, "learning_rate": 1.547650009288501e-05, "loss": 0.9168, "step": 10620 }, { "epoch": 0.24190428509660242, "grad_norm": 110.5, "learning_rate": 1.5471855842467027e-05, "loss": 1.0271, "step": 10630 }, { "epoch": 0.24213185262726714, "grad_norm": 87.5, "learning_rate": 1.5467211592049044e-05, "loss": 0.5534, "step": 10640 }, { "epoch": 0.24235942015793185, "grad_norm": 199.0, "learning_rate": 1.5462567341631064e-05, "loss": 1.3976, "step": 10650 }, { "epoch": 0.2425869876885966, "grad_norm": 0.02099609375, "learning_rate": 1.5457923091213078e-05, "loss": 0.9919, "step": 10660 }, { "epoch": 0.24281455521926132, "grad_norm": 102.0, "learning_rate": 1.5453278840795095e-05, "loss": 0.6294, "step": 10670 }, { "epoch": 0.24304212274992604, "grad_norm": 28.5, "learning_rate": 1.5448634590377115e-05, "loss": 0.808, "step": 10680 }, { "epoch": 0.24326969028059076, "grad_norm": 73.5, "learning_rate": 1.5443990339959132e-05, "loss": 0.5964, "step": 10690 }, { "epoch": 0.2434972578112555, "grad_norm": 55.0, "learning_rate": 1.543934608954115e-05, "loss": 0.9213, "step": 10700 }, { "epoch": 0.24372482534192022, "grad_norm": 105.5, "learning_rate": 1.5434701839123166e-05, "loss": 1.5111, "step": 10710 }, { "epoch": 0.24395239287258494, "grad_norm": 364.0, "learning_rate": 1.5430057588705183e-05, "loss": 1.5324, "step": 10720 }, { "epoch": 0.24417996040324966, "grad_norm": 276.0, "learning_rate": 1.54254133382872e-05, "loss": 1.1884, "step": 10730 }, { "epoch": 0.24440752793391438, "grad_norm": 282.0, "learning_rate": 1.5420769087869218e-05, "loss": 0.736, "step": 10740 }, { "epoch": 0.24463509546457912, "grad_norm": 636.0, "learning_rate": 1.5416124837451235e-05, "loss": 0.5855, "step": 10750 }, { "epoch": 0.24486266299524384, "grad_norm": 44.5, "learning_rate": 1.5411480587033255e-05, "loss": 0.5255, "step": 10760 }, { "epoch": 0.24509023052590856, "grad_norm": 150.0, "learning_rate": 1.5406836336615272e-05, "loss": 0.5835, "step": 10770 }, { "epoch": 0.24531779805657328, "grad_norm": 109.0, "learning_rate": 1.540219208619729e-05, "loss": 0.6366, "step": 10780 }, { "epoch": 0.24554536558723802, "grad_norm": 0.000881195068359375, "learning_rate": 1.5397547835779306e-05, "loss": 1.2602, "step": 10790 }, { "epoch": 0.24577293311790274, "grad_norm": 1.953125, "learning_rate": 1.5392903585361323e-05, "loss": 0.5185, "step": 10800 }, { "epoch": 0.24600050064856746, "grad_norm": 6.03125, "learning_rate": 1.538825933494334e-05, "loss": 1.3522, "step": 10810 }, { "epoch": 0.24622806817923218, "grad_norm": 125.5, "learning_rate": 1.5383615084525357e-05, "loss": 1.024, "step": 10820 }, { "epoch": 0.24645563570989693, "grad_norm": 2.078125, "learning_rate": 1.5378970834107378e-05, "loss": 0.2957, "step": 10830 }, { "epoch": 0.24668320324056164, "grad_norm": 288.0, "learning_rate": 1.5374326583689395e-05, "loss": 0.3122, "step": 10840 }, { "epoch": 0.24691077077122636, "grad_norm": 194.0, "learning_rate": 1.5369682333271412e-05, "loss": 0.7117, "step": 10850 }, { "epoch": 0.24713833830189108, "grad_norm": 0.076171875, "learning_rate": 1.536503808285343e-05, "loss": 0.9811, "step": 10860 }, { "epoch": 0.2473659058325558, "grad_norm": 92.5, "learning_rate": 1.5360393832435446e-05, "loss": 0.6828, "step": 10870 }, { "epoch": 0.24759347336322055, "grad_norm": 0.0001659393310546875, "learning_rate": 1.5355749582017463e-05, "loss": 0.479, "step": 10880 }, { "epoch": 0.24782104089388526, "grad_norm": 274.0, "learning_rate": 1.535110533159948e-05, "loss": 1.662, "step": 10890 }, { "epoch": 0.24804860842454998, "grad_norm": 14.875, "learning_rate": 1.5346461081181497e-05, "loss": 0.6964, "step": 10900 }, { "epoch": 0.2482761759552147, "grad_norm": 67.0, "learning_rate": 1.5341816830763518e-05, "loss": 0.5646, "step": 10910 }, { "epoch": 0.24850374348587945, "grad_norm": 6.9375, "learning_rate": 1.5337172580345535e-05, "loss": 0.2894, "step": 10920 }, { "epoch": 0.24873131101654417, "grad_norm": 121.5, "learning_rate": 1.5332528329927552e-05, "loss": 1.223, "step": 10930 }, { "epoch": 0.24895887854720888, "grad_norm": 0.0012969970703125, "learning_rate": 1.532788407950957e-05, "loss": 0.366, "step": 10940 }, { "epoch": 0.2491864460778736, "grad_norm": 137.0, "learning_rate": 1.5323239829091586e-05, "loss": 0.7015, "step": 10950 }, { "epoch": 0.24941401360853832, "grad_norm": 220.0, "learning_rate": 1.5318595578673603e-05, "loss": 1.6721, "step": 10960 }, { "epoch": 0.24964158113920307, "grad_norm": 78.5, "learning_rate": 1.531395132825562e-05, "loss": 0.9245, "step": 10970 }, { "epoch": 0.24986914866986779, "grad_norm": 0.34375, "learning_rate": 1.5309307077837637e-05, "loss": 0.4509, "step": 10980 }, { "epoch": 0.25009671620053253, "grad_norm": 6.96875, "learning_rate": 1.5304662827419658e-05, "loss": 0.8195, "step": 10990 }, { "epoch": 0.25032428373119725, "grad_norm": 0.000629425048828125, "learning_rate": 1.5300018577001675e-05, "loss": 0.9289, "step": 11000 }, { "epoch": 0.25055185126186197, "grad_norm": 138.0, "learning_rate": 1.5295374326583688e-05, "loss": 1.4042, "step": 11010 }, { "epoch": 0.2507794187925267, "grad_norm": 151.0, "learning_rate": 1.529073007616571e-05, "loss": 0.5587, "step": 11020 }, { "epoch": 0.2510069863231914, "grad_norm": 13.8125, "learning_rate": 1.5286085825747726e-05, "loss": 0.7141, "step": 11030 }, { "epoch": 0.2512345538538561, "grad_norm": 248.0, "learning_rate": 1.5281441575329743e-05, "loss": 0.9725, "step": 11040 }, { "epoch": 0.25146212138452084, "grad_norm": 1.1640625, "learning_rate": 1.527679732491176e-05, "loss": 1.3071, "step": 11050 }, { "epoch": 0.25168968891518556, "grad_norm": 368.0, "learning_rate": 1.5272153074493777e-05, "loss": 1.1907, "step": 11060 }, { "epoch": 0.2519172564458503, "grad_norm": 218.0, "learning_rate": 1.5267508824075797e-05, "loss": 0.5203, "step": 11070 }, { "epoch": 0.25214482397651505, "grad_norm": 0.00118255615234375, "learning_rate": 1.526286457365781e-05, "loss": 0.6737, "step": 11080 }, { "epoch": 0.25237239150717977, "grad_norm": 0.0255126953125, "learning_rate": 1.525822032323983e-05, "loss": 0.822, "step": 11090 }, { "epoch": 0.2525999590378445, "grad_norm": 0.0174560546875, "learning_rate": 1.5253576072821847e-05, "loss": 1.1703, "step": 11100 }, { "epoch": 0.2528275265685092, "grad_norm": 113.5, "learning_rate": 1.5248931822403866e-05, "loss": 0.4113, "step": 11110 }, { "epoch": 0.2530550940991739, "grad_norm": 512.0, "learning_rate": 1.5244287571985883e-05, "loss": 0.5001, "step": 11120 }, { "epoch": 0.25328266162983865, "grad_norm": 0.00408935546875, "learning_rate": 1.5239643321567901e-05, "loss": 0.9808, "step": 11130 }, { "epoch": 0.25351022916050336, "grad_norm": 45.0, "learning_rate": 1.5234999071149918e-05, "loss": 1.0447, "step": 11140 }, { "epoch": 0.2537377966911681, "grad_norm": 136.0, "learning_rate": 1.5230354820731934e-05, "loss": 0.8825, "step": 11150 }, { "epoch": 0.25396536422183286, "grad_norm": 153.0, "learning_rate": 1.5225710570313953e-05, "loss": 0.6856, "step": 11160 }, { "epoch": 0.2541929317524976, "grad_norm": 130.0, "learning_rate": 1.522106631989597e-05, "loss": 0.9714, "step": 11170 }, { "epoch": 0.2544204992831623, "grad_norm": 99.0, "learning_rate": 1.5216422069477987e-05, "loss": 1.7929, "step": 11180 }, { "epoch": 0.254648066813827, "grad_norm": 197.0, "learning_rate": 1.5211777819060005e-05, "loss": 0.5649, "step": 11190 }, { "epoch": 0.25487563434449173, "grad_norm": 53.5, "learning_rate": 1.5207133568642022e-05, "loss": 0.4676, "step": 11200 }, { "epoch": 0.25510320187515645, "grad_norm": 115.5, "learning_rate": 1.5202489318224041e-05, "loss": 0.3084, "step": 11210 }, { "epoch": 0.25533076940582117, "grad_norm": 114.0, "learning_rate": 1.5197845067806057e-05, "loss": 1.4257, "step": 11220 }, { "epoch": 0.2555583369364859, "grad_norm": 0.046875, "learning_rate": 1.5193200817388074e-05, "loss": 1.0898, "step": 11230 }, { "epoch": 0.2557859044671506, "grad_norm": 2.25, "learning_rate": 1.5188556566970092e-05, "loss": 0.2494, "step": 11240 }, { "epoch": 0.2560134719978154, "grad_norm": 152.0, "learning_rate": 1.518391231655211e-05, "loss": 0.7775, "step": 11250 }, { "epoch": 0.2562410395284801, "grad_norm": 374.0, "learning_rate": 1.5179268066134128e-05, "loss": 1.4239, "step": 11260 }, { "epoch": 0.2564686070591448, "grad_norm": 0.0034332275390625, "learning_rate": 1.5174623815716145e-05, "loss": 0.1608, "step": 11270 }, { "epoch": 0.25669617458980953, "grad_norm": 118.5, "learning_rate": 1.5169979565298162e-05, "loss": 0.898, "step": 11280 }, { "epoch": 0.25692374212047425, "grad_norm": 160.0, "learning_rate": 1.516533531488018e-05, "loss": 0.5473, "step": 11290 }, { "epoch": 0.25715130965113897, "grad_norm": 98.0, "learning_rate": 1.5160691064462196e-05, "loss": 0.6661, "step": 11300 }, { "epoch": 0.2573788771818037, "grad_norm": 516.0, "learning_rate": 1.5156046814044213e-05, "loss": 1.9695, "step": 11310 }, { "epoch": 0.2576064447124684, "grad_norm": 312.0, "learning_rate": 1.5151402563626232e-05, "loss": 1.2844, "step": 11320 }, { "epoch": 0.2578340122431331, "grad_norm": 32.75, "learning_rate": 1.514675831320825e-05, "loss": 1.5731, "step": 11330 }, { "epoch": 0.2580615797737979, "grad_norm": 0.0260009765625, "learning_rate": 1.5142114062790268e-05, "loss": 0.1098, "step": 11340 }, { "epoch": 0.2582891473044626, "grad_norm": 23.75, "learning_rate": 1.5137469812372285e-05, "loss": 1.3285, "step": 11350 }, { "epoch": 0.25851671483512734, "grad_norm": 0.41796875, "learning_rate": 1.51328255619543e-05, "loss": 0.7926, "step": 11360 }, { "epoch": 0.25874428236579206, "grad_norm": 0.0047607421875, "learning_rate": 1.512818131153632e-05, "loss": 0.3374, "step": 11370 }, { "epoch": 0.2589718498964568, "grad_norm": 117.5, "learning_rate": 1.5123537061118336e-05, "loss": 1.4112, "step": 11380 }, { "epoch": 0.2591994174271215, "grad_norm": 304.0, "learning_rate": 1.5118892810700353e-05, "loss": 0.8933, "step": 11390 }, { "epoch": 0.2594269849577862, "grad_norm": 86.0, "learning_rate": 1.5114248560282372e-05, "loss": 1.1187, "step": 11400 }, { "epoch": 0.25965455248845093, "grad_norm": 64.0, "learning_rate": 1.5109604309864389e-05, "loss": 0.9441, "step": 11410 }, { "epoch": 0.25988212001911565, "grad_norm": 238.0, "learning_rate": 1.5104960059446408e-05, "loss": 1.0595, "step": 11420 }, { "epoch": 0.2601096875497804, "grad_norm": 7.625, "learning_rate": 1.5100315809028423e-05, "loss": 0.9034, "step": 11430 }, { "epoch": 0.26033725508044514, "grad_norm": 1.3125, "learning_rate": 1.509567155861044e-05, "loss": 0.4803, "step": 11440 }, { "epoch": 0.26056482261110986, "grad_norm": 5.8125, "learning_rate": 1.5091027308192459e-05, "loss": 0.3417, "step": 11450 }, { "epoch": 0.2607923901417746, "grad_norm": 0.00150299072265625, "learning_rate": 1.5086383057774476e-05, "loss": 0.3545, "step": 11460 }, { "epoch": 0.2610199576724393, "grad_norm": 119.0, "learning_rate": 1.5081738807356495e-05, "loss": 0.9882, "step": 11470 }, { "epoch": 0.261247525203104, "grad_norm": 0.00109100341796875, "learning_rate": 1.5077094556938512e-05, "loss": 0.9615, "step": 11480 }, { "epoch": 0.26147509273376873, "grad_norm": 12.1875, "learning_rate": 1.5072450306520529e-05, "loss": 0.8557, "step": 11490 }, { "epoch": 0.26170266026443345, "grad_norm": 0.00188446044921875, "learning_rate": 1.5067806056102546e-05, "loss": 0.9699, "step": 11500 }, { "epoch": 0.26193022779509817, "grad_norm": 146.0, "learning_rate": 1.5063161805684563e-05, "loss": 0.6161, "step": 11510 }, { "epoch": 0.26215779532576294, "grad_norm": 52.0, "learning_rate": 1.505851755526658e-05, "loss": 0.4844, "step": 11520 }, { "epoch": 0.26238536285642766, "grad_norm": 66.5, "learning_rate": 1.5053873304848599e-05, "loss": 0.5849, "step": 11530 }, { "epoch": 0.2626129303870924, "grad_norm": 198.0, "learning_rate": 1.5049229054430616e-05, "loss": 0.8534, "step": 11540 }, { "epoch": 0.2628404979177571, "grad_norm": 60.75, "learning_rate": 1.5044584804012635e-05, "loss": 0.5026, "step": 11550 }, { "epoch": 0.2630680654484218, "grad_norm": 0.11181640625, "learning_rate": 1.5039940553594652e-05, "loss": 0.3563, "step": 11560 }, { "epoch": 0.26329563297908654, "grad_norm": 19.25, "learning_rate": 1.5035296303176667e-05, "loss": 0.6289, "step": 11570 }, { "epoch": 0.26352320050975125, "grad_norm": 88.0, "learning_rate": 1.5030652052758686e-05, "loss": 0.57, "step": 11580 }, { "epoch": 0.263750768040416, "grad_norm": 2.625, "learning_rate": 1.5026007802340703e-05, "loss": 0.5479, "step": 11590 }, { "epoch": 0.2639783355710807, "grad_norm": 0.001739501953125, "learning_rate": 1.5021363551922722e-05, "loss": 0.6788, "step": 11600 }, { "epoch": 0.26420590310174547, "grad_norm": 0.01470947265625, "learning_rate": 1.5016719301504739e-05, "loss": 0.5358, "step": 11610 }, { "epoch": 0.2644334706324102, "grad_norm": 0.000743865966796875, "learning_rate": 1.5012075051086756e-05, "loss": 0.8938, "step": 11620 }, { "epoch": 0.2646610381630749, "grad_norm": 0.006561279296875, "learning_rate": 1.5007430800668774e-05, "loss": 0.0581, "step": 11630 }, { "epoch": 0.2648886056937396, "grad_norm": 60.25, "learning_rate": 1.500278655025079e-05, "loss": 0.4747, "step": 11640 }, { "epoch": 0.26511617322440434, "grad_norm": 163.0, "learning_rate": 1.4998142299832807e-05, "loss": 0.402, "step": 11650 }, { "epoch": 0.26534374075506906, "grad_norm": 147.0, "learning_rate": 1.4993498049414826e-05, "loss": 0.6031, "step": 11660 }, { "epoch": 0.2655713082857338, "grad_norm": 0.12451171875, "learning_rate": 1.4988853798996843e-05, "loss": 0.3656, "step": 11670 }, { "epoch": 0.2657988758163985, "grad_norm": 0.000514984130859375, "learning_rate": 1.4984209548578861e-05, "loss": 0.4934, "step": 11680 }, { "epoch": 0.26602644334706327, "grad_norm": 296.0, "learning_rate": 1.4979565298160878e-05, "loss": 0.776, "step": 11690 }, { "epoch": 0.266254010877728, "grad_norm": 173.0, "learning_rate": 1.4974921047742896e-05, "loss": 0.5252, "step": 11700 }, { "epoch": 0.2664815784083927, "grad_norm": 101.0, "learning_rate": 1.4970276797324913e-05, "loss": 0.9683, "step": 11710 }, { "epoch": 0.2667091459390574, "grad_norm": 21.875, "learning_rate": 1.496563254690693e-05, "loss": 0.5718, "step": 11720 }, { "epoch": 0.26693671346972214, "grad_norm": 0.0002231597900390625, "learning_rate": 1.4960988296488947e-05, "loss": 0.3878, "step": 11730 }, { "epoch": 0.26716428100038686, "grad_norm": 179.0, "learning_rate": 1.4956344046070965e-05, "loss": 1.224, "step": 11740 }, { "epoch": 0.2673918485310516, "grad_norm": 162.0, "learning_rate": 1.4951699795652982e-05, "loss": 0.1164, "step": 11750 }, { "epoch": 0.2676194160617163, "grad_norm": 312.0, "learning_rate": 1.4947055545235001e-05, "loss": 0.9184, "step": 11760 }, { "epoch": 0.267846983592381, "grad_norm": 192.0, "learning_rate": 1.4942411294817018e-05, "loss": 1.1383, "step": 11770 }, { "epoch": 0.2680745511230458, "grad_norm": 76.0, "learning_rate": 1.4937767044399034e-05, "loss": 0.3553, "step": 11780 }, { "epoch": 0.2683021186537105, "grad_norm": 272.0, "learning_rate": 1.4933122793981052e-05, "loss": 0.5806, "step": 11790 }, { "epoch": 0.2685296861843752, "grad_norm": 239.0, "learning_rate": 1.492847854356307e-05, "loss": 1.0596, "step": 11800 }, { "epoch": 0.26875725371503995, "grad_norm": 0.73046875, "learning_rate": 1.4923834293145088e-05, "loss": 2.0093, "step": 11810 }, { "epoch": 0.26898482124570466, "grad_norm": 97.0, "learning_rate": 1.4919190042727105e-05, "loss": 0.6592, "step": 11820 }, { "epoch": 0.2692123887763694, "grad_norm": 0.000171661376953125, "learning_rate": 1.4914545792309122e-05, "loss": 0.7887, "step": 11830 }, { "epoch": 0.2694399563070341, "grad_norm": 55.0, "learning_rate": 1.4909901541891141e-05, "loss": 0.6898, "step": 11840 }, { "epoch": 0.2696675238376988, "grad_norm": 0.8359375, "learning_rate": 1.4905257291473156e-05, "loss": 0.2239, "step": 11850 }, { "epoch": 0.26989509136836354, "grad_norm": 0.0002956390380859375, "learning_rate": 1.4900613041055173e-05, "loss": 0.4225, "step": 11860 }, { "epoch": 0.2701226588990283, "grad_norm": 0.0302734375, "learning_rate": 1.4895968790637192e-05, "loss": 0.7709, "step": 11870 }, { "epoch": 0.27035022642969303, "grad_norm": 202.0, "learning_rate": 1.489132454021921e-05, "loss": 1.5388, "step": 11880 }, { "epoch": 0.27057779396035775, "grad_norm": 190.0, "learning_rate": 1.4886680289801228e-05, "loss": 0.8734, "step": 11890 }, { "epoch": 0.27080536149102247, "grad_norm": 66.0, "learning_rate": 1.4882036039383245e-05, "loss": 0.9225, "step": 11900 }, { "epoch": 0.2710329290216872, "grad_norm": 150.0, "learning_rate": 1.4877391788965264e-05, "loss": 1.9771, "step": 11910 }, { "epoch": 0.2712604965523519, "grad_norm": 15.75, "learning_rate": 1.487274753854728e-05, "loss": 0.6869, "step": 11920 }, { "epoch": 0.2714880640830166, "grad_norm": 0.002655029296875, "learning_rate": 1.4868103288129296e-05, "loss": 0.5966, "step": 11930 }, { "epoch": 0.27171563161368134, "grad_norm": 0.026123046875, "learning_rate": 1.4863459037711313e-05, "loss": 0.7905, "step": 11940 }, { "epoch": 0.27194319914434606, "grad_norm": 46.0, "learning_rate": 1.4858814787293332e-05, "loss": 1.7054, "step": 11950 }, { "epoch": 0.27217076667501083, "grad_norm": 120.5, "learning_rate": 1.4854170536875349e-05, "loss": 0.7836, "step": 11960 }, { "epoch": 0.27239833420567555, "grad_norm": 0.2236328125, "learning_rate": 1.4849526286457368e-05, "loss": 0.6396, "step": 11970 }, { "epoch": 0.27262590173634027, "grad_norm": 0.609375, "learning_rate": 1.4844882036039385e-05, "loss": 0.4424, "step": 11980 }, { "epoch": 0.272853469267005, "grad_norm": 238.0, "learning_rate": 1.48402377856214e-05, "loss": 0.4038, "step": 11990 }, { "epoch": 0.2730810367976697, "grad_norm": 0.00139617919921875, "learning_rate": 1.4835593535203419e-05, "loss": 1.0529, "step": 12000 }, { "epoch": 0.2733086043283344, "grad_norm": 148.0, "learning_rate": 1.4830949284785436e-05, "loss": 0.801, "step": 12010 }, { "epoch": 0.27353617185899914, "grad_norm": 166.0, "learning_rate": 1.4826305034367455e-05, "loss": 0.7908, "step": 12020 }, { "epoch": 0.27376373938966386, "grad_norm": 44.0, "learning_rate": 1.4821660783949472e-05, "loss": 0.9472, "step": 12030 }, { "epoch": 0.2739913069203286, "grad_norm": 344.0, "learning_rate": 1.4817016533531489e-05, "loss": 1.2067, "step": 12040 }, { "epoch": 0.27421887445099336, "grad_norm": 2.21875, "learning_rate": 1.4812372283113508e-05, "loss": 0.7768, "step": 12050 }, { "epoch": 0.2744464419816581, "grad_norm": 0.00872802734375, "learning_rate": 1.4807728032695523e-05, "loss": 1.1369, "step": 12060 }, { "epoch": 0.2746740095123228, "grad_norm": 241.0, "learning_rate": 1.480308378227754e-05, "loss": 0.7804, "step": 12070 }, { "epoch": 0.2749015770429875, "grad_norm": 187.0, "learning_rate": 1.4798439531859559e-05, "loss": 1.5419, "step": 12080 }, { "epoch": 0.27512914457365223, "grad_norm": 169.0, "learning_rate": 1.4793795281441576e-05, "loss": 0.6534, "step": 12090 }, { "epoch": 0.27535671210431695, "grad_norm": 196.0, "learning_rate": 1.4789151031023595e-05, "loss": 1.6335, "step": 12100 }, { "epoch": 0.27558427963498167, "grad_norm": 43.0, "learning_rate": 1.4784506780605612e-05, "loss": 0.3383, "step": 12110 }, { "epoch": 0.2758118471656464, "grad_norm": 2.8125, "learning_rate": 1.477986253018763e-05, "loss": 0.5496, "step": 12120 }, { "epoch": 0.2760394146963111, "grad_norm": 123.0, "learning_rate": 1.4775218279769646e-05, "loss": 0.9482, "step": 12130 }, { "epoch": 0.2762669822269759, "grad_norm": 0.037109375, "learning_rate": 1.4770574029351663e-05, "loss": 0.5666, "step": 12140 }, { "epoch": 0.2764945497576406, "grad_norm": 0.07275390625, "learning_rate": 1.4765929778933682e-05, "loss": 0.689, "step": 12150 }, { "epoch": 0.2767221172883053, "grad_norm": 181.0, "learning_rate": 1.4761285528515699e-05, "loss": 0.7099, "step": 12160 }, { "epoch": 0.27694968481897003, "grad_norm": 83.5, "learning_rate": 1.4756641278097716e-05, "loss": 0.7678, "step": 12170 }, { "epoch": 0.27717725234963475, "grad_norm": 5.03125, "learning_rate": 1.4751997027679734e-05, "loss": 0.9393, "step": 12180 }, { "epoch": 0.27740481988029947, "grad_norm": 0.03662109375, "learning_rate": 1.4747352777261752e-05, "loss": 1.3429, "step": 12190 }, { "epoch": 0.2776323874109642, "grad_norm": 29.875, "learning_rate": 1.4742708526843767e-05, "loss": 1.5743, "step": 12200 }, { "epoch": 0.2778599549416289, "grad_norm": 0.029052734375, "learning_rate": 1.4738064276425786e-05, "loss": 0.4431, "step": 12210 }, { "epoch": 0.2780875224722937, "grad_norm": 0.035888671875, "learning_rate": 1.4733420026007803e-05, "loss": 0.608, "step": 12220 }, { "epoch": 0.2783150900029584, "grad_norm": 2.484375, "learning_rate": 1.4728775775589821e-05, "loss": 0.5203, "step": 12230 }, { "epoch": 0.2785426575336231, "grad_norm": 206.0, "learning_rate": 1.4724131525171839e-05, "loss": 1.1412, "step": 12240 }, { "epoch": 0.27877022506428784, "grad_norm": 111.0, "learning_rate": 1.4719487274753856e-05, "loss": 0.4841, "step": 12250 }, { "epoch": 0.27899779259495255, "grad_norm": 226.0, "learning_rate": 1.4714843024335874e-05, "loss": 1.2142, "step": 12260 }, { "epoch": 0.2792253601256173, "grad_norm": 59.0, "learning_rate": 1.471019877391789e-05, "loss": 1.4668, "step": 12270 }, { "epoch": 0.279452927656282, "grad_norm": 2.21875, "learning_rate": 1.4705554523499907e-05, "loss": 0.6751, "step": 12280 }, { "epoch": 0.2796804951869467, "grad_norm": 150.0, "learning_rate": 1.4700910273081925e-05, "loss": 0.8443, "step": 12290 }, { "epoch": 0.27990806271761143, "grad_norm": 111.0, "learning_rate": 1.4696266022663943e-05, "loss": 0.3593, "step": 12300 }, { "epoch": 0.2801356302482762, "grad_norm": 334.0, "learning_rate": 1.4691621772245961e-05, "loss": 0.529, "step": 12310 }, { "epoch": 0.2803631977789409, "grad_norm": 340.0, "learning_rate": 1.4686977521827978e-05, "loss": 0.5697, "step": 12320 }, { "epoch": 0.28059076530960564, "grad_norm": 76.5, "learning_rate": 1.4682333271409997e-05, "loss": 0.7044, "step": 12330 }, { "epoch": 0.28081833284027036, "grad_norm": 44.5, "learning_rate": 1.4677689020992012e-05, "loss": 0.8766, "step": 12340 }, { "epoch": 0.2810459003709351, "grad_norm": 30.375, "learning_rate": 1.467304477057403e-05, "loss": 0.4032, "step": 12350 }, { "epoch": 0.2812734679015998, "grad_norm": 131.0, "learning_rate": 1.4668400520156048e-05, "loss": 0.7512, "step": 12360 }, { "epoch": 0.2815010354322645, "grad_norm": 0.008544921875, "learning_rate": 1.4663756269738065e-05, "loss": 0.8798, "step": 12370 }, { "epoch": 0.28172860296292923, "grad_norm": 68.5, "learning_rate": 1.4659112019320082e-05, "loss": 0.3226, "step": 12380 }, { "epoch": 0.28195617049359395, "grad_norm": 15.75, "learning_rate": 1.4654467768902101e-05, "loss": 1.0542, "step": 12390 }, { "epoch": 0.2821837380242587, "grad_norm": 0.007415771484375, "learning_rate": 1.4649823518484118e-05, "loss": 0.7764, "step": 12400 }, { "epoch": 0.28241130555492344, "grad_norm": 0.05859375, "learning_rate": 1.4645179268066134e-05, "loss": 0.3243, "step": 12410 }, { "epoch": 0.28263887308558816, "grad_norm": 286.0, "learning_rate": 1.4640535017648152e-05, "loss": 1.1503, "step": 12420 }, { "epoch": 0.2828664406162529, "grad_norm": 59.0, "learning_rate": 1.463589076723017e-05, "loss": 0.4305, "step": 12430 }, { "epoch": 0.2830940081469176, "grad_norm": 150.0, "learning_rate": 1.4631246516812188e-05, "loss": 1.2739, "step": 12440 }, { "epoch": 0.2833215756775823, "grad_norm": 43.0, "learning_rate": 1.4626602266394205e-05, "loss": 0.3754, "step": 12450 }, { "epoch": 0.28354914320824703, "grad_norm": 236.0, "learning_rate": 1.4621958015976224e-05, "loss": 1.2023, "step": 12460 }, { "epoch": 0.28377671073891175, "grad_norm": 0.021240234375, "learning_rate": 1.4617313765558241e-05, "loss": 0.7067, "step": 12470 }, { "epoch": 0.28400427826957647, "grad_norm": 75.0, "learning_rate": 1.4612669515140258e-05, "loss": 1.188, "step": 12480 }, { "epoch": 0.28423184580024125, "grad_norm": 7.375, "learning_rate": 1.4608025264722275e-05, "loss": 0.6633, "step": 12490 }, { "epoch": 0.28445941333090596, "grad_norm": 536.0, "learning_rate": 1.4603381014304292e-05, "loss": 0.5964, "step": 12500 }, { "epoch": 0.2846869808615707, "grad_norm": 0.030029296875, "learning_rate": 1.459873676388631e-05, "loss": 0.4, "step": 12510 }, { "epoch": 0.2849145483922354, "grad_norm": 110.5, "learning_rate": 1.4594092513468328e-05, "loss": 0.4239, "step": 12520 }, { "epoch": 0.2851421159229001, "grad_norm": 84.0, "learning_rate": 1.4589448263050345e-05, "loss": 0.9811, "step": 12530 }, { "epoch": 0.28536968345356484, "grad_norm": 53.75, "learning_rate": 1.4584804012632364e-05, "loss": 0.9984, "step": 12540 }, { "epoch": 0.28559725098422956, "grad_norm": 177.0, "learning_rate": 1.458015976221438e-05, "loss": 0.5117, "step": 12550 }, { "epoch": 0.2858248185148943, "grad_norm": 18.0, "learning_rate": 1.4575515511796396e-05, "loss": 0.356, "step": 12560 }, { "epoch": 0.286052386045559, "grad_norm": 0.047607421875, "learning_rate": 1.4570871261378415e-05, "loss": 0.4773, "step": 12570 }, { "epoch": 0.28627995357622377, "grad_norm": 0.0030975341796875, "learning_rate": 1.4566227010960432e-05, "loss": 0.8755, "step": 12580 }, { "epoch": 0.2865075211068885, "grad_norm": 39.0, "learning_rate": 1.4561582760542449e-05, "loss": 0.6901, "step": 12590 }, { "epoch": 0.2867350886375532, "grad_norm": 85.0, "learning_rate": 1.4556938510124468e-05, "loss": 1.5524, "step": 12600 }, { "epoch": 0.2869626561682179, "grad_norm": 0.0002593994140625, "learning_rate": 1.4552294259706485e-05, "loss": 1.4922, "step": 12610 }, { "epoch": 0.28719022369888264, "grad_norm": 35.25, "learning_rate": 1.4547650009288504e-05, "loss": 0.418, "step": 12620 }, { "epoch": 0.28741779122954736, "grad_norm": 0.384765625, "learning_rate": 1.4543005758870519e-05, "loss": 1.2868, "step": 12630 }, { "epoch": 0.2876453587602121, "grad_norm": 418.0, "learning_rate": 1.4538361508452536e-05, "loss": 0.6271, "step": 12640 }, { "epoch": 0.2878729262908768, "grad_norm": 262.0, "learning_rate": 1.4533717258034555e-05, "loss": 1.1223, "step": 12650 }, { "epoch": 0.28810049382154157, "grad_norm": 74.0, "learning_rate": 1.4529073007616572e-05, "loss": 0.9126, "step": 12660 }, { "epoch": 0.2883280613522063, "grad_norm": 152.0, "learning_rate": 1.452442875719859e-05, "loss": 0.3271, "step": 12670 }, { "epoch": 0.288555628882871, "grad_norm": 8.4375, "learning_rate": 1.4519784506780608e-05, "loss": 0.3825, "step": 12680 }, { "epoch": 0.2887831964135357, "grad_norm": 136.0, "learning_rate": 1.4515140256362625e-05, "loss": 1.1227, "step": 12690 }, { "epoch": 0.28901076394420044, "grad_norm": 92.0, "learning_rate": 1.4510496005944642e-05, "loss": 0.6567, "step": 12700 }, { "epoch": 0.28923833147486516, "grad_norm": 105.5, "learning_rate": 1.4505851755526659e-05, "loss": 0.6709, "step": 12710 }, { "epoch": 0.2894658990055299, "grad_norm": 247.0, "learning_rate": 1.4501207505108676e-05, "loss": 0.8598, "step": 12720 }, { "epoch": 0.2896934665361946, "grad_norm": 364.0, "learning_rate": 1.4496563254690695e-05, "loss": 0.8281, "step": 12730 }, { "epoch": 0.2899210340668593, "grad_norm": 37.75, "learning_rate": 1.4491919004272712e-05, "loss": 0.6952, "step": 12740 }, { "epoch": 0.2901486015975241, "grad_norm": 210.0, "learning_rate": 1.448727475385473e-05, "loss": 0.6784, "step": 12750 }, { "epoch": 0.2903761691281888, "grad_norm": 95.5, "learning_rate": 1.4482630503436747e-05, "loss": 0.3614, "step": 12760 }, { "epoch": 0.29060373665885353, "grad_norm": 77.5, "learning_rate": 1.4477986253018763e-05, "loss": 0.7702, "step": 12770 }, { "epoch": 0.29083130418951825, "grad_norm": 163.0, "learning_rate": 1.4473342002600782e-05, "loss": 1.3064, "step": 12780 }, { "epoch": 0.29105887172018297, "grad_norm": 0.25, "learning_rate": 1.4468697752182799e-05, "loss": 1.1545, "step": 12790 }, { "epoch": 0.2912864392508477, "grad_norm": 146.0, "learning_rate": 1.4464053501764817e-05, "loss": 0.5964, "step": 12800 }, { "epoch": 0.2915140067815124, "grad_norm": 388.0, "learning_rate": 1.4459409251346834e-05, "loss": 0.8254, "step": 12810 }, { "epoch": 0.2917415743121771, "grad_norm": 184.0, "learning_rate": 1.4454765000928851e-05, "loss": 1.1512, "step": 12820 }, { "epoch": 0.29196914184284184, "grad_norm": 143.0, "learning_rate": 1.445012075051087e-05, "loss": 0.5881, "step": 12830 }, { "epoch": 0.2921967093735066, "grad_norm": 55.0, "learning_rate": 1.4445476500092886e-05, "loss": 1.2003, "step": 12840 }, { "epoch": 0.29242427690417133, "grad_norm": 0.0230712890625, "learning_rate": 1.4440832249674903e-05, "loss": 1.1012, "step": 12850 }, { "epoch": 0.29265184443483605, "grad_norm": 0.0019989013671875, "learning_rate": 1.4436187999256921e-05, "loss": 0.5693, "step": 12860 }, { "epoch": 0.29287941196550077, "grad_norm": 193.0, "learning_rate": 1.4431543748838938e-05, "loss": 0.9226, "step": 12870 }, { "epoch": 0.2931069794961655, "grad_norm": 127.0, "learning_rate": 1.4426899498420957e-05, "loss": 1.1348, "step": 12880 }, { "epoch": 0.2933345470268302, "grad_norm": 76.0, "learning_rate": 1.4422255248002974e-05, "loss": 0.3556, "step": 12890 }, { "epoch": 0.2935621145574949, "grad_norm": 0.037353515625, "learning_rate": 1.4417610997584991e-05, "loss": 1.1984, "step": 12900 }, { "epoch": 0.29378968208815964, "grad_norm": 54.75, "learning_rate": 1.4412966747167008e-05, "loss": 0.7554, "step": 12910 }, { "epoch": 0.29401724961882436, "grad_norm": 6.96875, "learning_rate": 1.4408322496749025e-05, "loss": 0.8067, "step": 12920 }, { "epoch": 0.29424481714948914, "grad_norm": 370.0, "learning_rate": 1.4403678246331042e-05, "loss": 1.626, "step": 12930 }, { "epoch": 0.29447238468015385, "grad_norm": 1.1171875, "learning_rate": 1.4399033995913061e-05, "loss": 0.5988, "step": 12940 }, { "epoch": 0.29469995221081857, "grad_norm": 139.0, "learning_rate": 1.4394389745495078e-05, "loss": 1.0072, "step": 12950 }, { "epoch": 0.2949275197414833, "grad_norm": 199.0, "learning_rate": 1.4389745495077097e-05, "loss": 1.3956, "step": 12960 }, { "epoch": 0.295155087272148, "grad_norm": 62.75, "learning_rate": 1.4385101244659114e-05, "loss": 1.3001, "step": 12970 }, { "epoch": 0.2953826548028127, "grad_norm": 0.1748046875, "learning_rate": 1.438045699424113e-05, "loss": 0.5668, "step": 12980 }, { "epoch": 0.29561022233347745, "grad_norm": 207.0, "learning_rate": 1.4375812743823148e-05, "loss": 1.1521, "step": 12990 }, { "epoch": 0.29583778986414216, "grad_norm": 0.045654296875, "learning_rate": 1.4371168493405165e-05, "loss": 0.8196, "step": 13000 }, { "epoch": 0.2960653573948069, "grad_norm": 157.0, "learning_rate": 1.4366524242987184e-05, "loss": 1.2031, "step": 13010 }, { "epoch": 0.29629292492547166, "grad_norm": 74.5, "learning_rate": 1.4361879992569201e-05, "loss": 0.8377, "step": 13020 }, { "epoch": 0.2965204924561364, "grad_norm": 116.5, "learning_rate": 1.4357235742151218e-05, "loss": 0.8797, "step": 13030 }, { "epoch": 0.2967480599868011, "grad_norm": 258.0, "learning_rate": 1.4352591491733237e-05, "loss": 0.7258, "step": 13040 }, { "epoch": 0.2969756275174658, "grad_norm": 2.140625, "learning_rate": 1.4347947241315252e-05, "loss": 0.7343, "step": 13050 }, { "epoch": 0.29720319504813053, "grad_norm": 1.21875, "learning_rate": 1.434330299089727e-05, "loss": 0.7734, "step": 13060 }, { "epoch": 0.29743076257879525, "grad_norm": 14.9375, "learning_rate": 1.4338658740479288e-05, "loss": 1.6472, "step": 13070 }, { "epoch": 0.29765833010945997, "grad_norm": 9.0625, "learning_rate": 1.4334014490061305e-05, "loss": 0.4777, "step": 13080 }, { "epoch": 0.2978858976401247, "grad_norm": 0.001556396484375, "learning_rate": 1.4329370239643324e-05, "loss": 0.5379, "step": 13090 }, { "epoch": 0.2981134651707894, "grad_norm": 171.0, "learning_rate": 1.432472598922534e-05, "loss": 1.0923, "step": 13100 }, { "epoch": 0.2983410327014542, "grad_norm": 34.75, "learning_rate": 1.432008173880736e-05, "loss": 1.0435, "step": 13110 }, { "epoch": 0.2985686002321189, "grad_norm": 0.00732421875, "learning_rate": 1.4315437488389375e-05, "loss": 0.1765, "step": 13120 }, { "epoch": 0.2987961677627836, "grad_norm": 0.0003910064697265625, "learning_rate": 1.4310793237971392e-05, "loss": 1.7982, "step": 13130 }, { "epoch": 0.29902373529344833, "grad_norm": 0.05859375, "learning_rate": 1.4306148987553409e-05, "loss": 1.1083, "step": 13140 }, { "epoch": 0.29925130282411305, "grad_norm": 0.53515625, "learning_rate": 1.4301504737135428e-05, "loss": 1.2239, "step": 13150 }, { "epoch": 0.29947887035477777, "grad_norm": 0.0096435546875, "learning_rate": 1.4296860486717445e-05, "loss": 0.3278, "step": 13160 }, { "epoch": 0.2997064378854425, "grad_norm": 276.0, "learning_rate": 1.4292216236299464e-05, "loss": 0.9661, "step": 13170 }, { "epoch": 0.2999340054161072, "grad_norm": 284.0, "learning_rate": 1.428757198588148e-05, "loss": 0.7225, "step": 13180 }, { "epoch": 0.300161572946772, "grad_norm": 0.005279541015625, "learning_rate": 1.4282927735463496e-05, "loss": 1.3146, "step": 13190 }, { "epoch": 0.3003891404774367, "grad_norm": 0.00885009765625, "learning_rate": 1.4278283485045515e-05, "loss": 0.4293, "step": 13200 }, { "epoch": 0.3006167080081014, "grad_norm": 68.0, "learning_rate": 1.4273639234627532e-05, "loss": 0.6278, "step": 13210 }, { "epoch": 0.30084427553876614, "grad_norm": 100.5, "learning_rate": 1.426899498420955e-05, "loss": 0.6124, "step": 13220 }, { "epoch": 0.30107184306943086, "grad_norm": 247.0, "learning_rate": 1.4264350733791568e-05, "loss": 0.7771, "step": 13230 }, { "epoch": 0.3012994106000956, "grad_norm": 0.296875, "learning_rate": 1.4259706483373585e-05, "loss": 0.6646, "step": 13240 }, { "epoch": 0.3015269781307603, "grad_norm": 0.01275634765625, "learning_rate": 1.4255062232955603e-05, "loss": 1.1415, "step": 13250 }, { "epoch": 0.301754545661425, "grad_norm": 0.0203857421875, "learning_rate": 1.4250417982537619e-05, "loss": 1.4797, "step": 13260 }, { "epoch": 0.30198211319208973, "grad_norm": 0.000583648681640625, "learning_rate": 1.4245773732119636e-05, "loss": 0.2479, "step": 13270 }, { "epoch": 0.3022096807227545, "grad_norm": 146.0, "learning_rate": 1.4241129481701655e-05, "loss": 1.4586, "step": 13280 }, { "epoch": 0.3024372482534192, "grad_norm": 77.5, "learning_rate": 1.4236485231283672e-05, "loss": 0.5281, "step": 13290 }, { "epoch": 0.30266481578408394, "grad_norm": 308.0, "learning_rate": 1.423184098086569e-05, "loss": 1.3738, "step": 13300 }, { "epoch": 0.30289238331474866, "grad_norm": 400.0, "learning_rate": 1.4227196730447707e-05, "loss": 1.9557, "step": 13310 }, { "epoch": 0.3031199508454134, "grad_norm": 90.0, "learning_rate": 1.4222552480029726e-05, "loss": 0.5008, "step": 13320 }, { "epoch": 0.3033475183760781, "grad_norm": 0.0020751953125, "learning_rate": 1.4217908229611742e-05, "loss": 0.7592, "step": 13330 }, { "epoch": 0.3035750859067428, "grad_norm": 181.0, "learning_rate": 1.4213263979193759e-05, "loss": 0.7766, "step": 13340 }, { "epoch": 0.30380265343740753, "grad_norm": 140.0, "learning_rate": 1.4208619728775777e-05, "loss": 1.2421, "step": 13350 }, { "epoch": 0.30403022096807225, "grad_norm": 19.75, "learning_rate": 1.4203975478357794e-05, "loss": 0.4028, "step": 13360 }, { "epoch": 0.304257788498737, "grad_norm": 0.2080078125, "learning_rate": 1.4199331227939811e-05, "loss": 0.9126, "step": 13370 }, { "epoch": 0.30448535602940174, "grad_norm": 95.5, "learning_rate": 1.419468697752183e-05, "loss": 0.6843, "step": 13380 }, { "epoch": 0.30471292356006646, "grad_norm": 0.00970458984375, "learning_rate": 1.4190042727103847e-05, "loss": 1.6616, "step": 13390 }, { "epoch": 0.3049404910907312, "grad_norm": 28.625, "learning_rate": 1.4185398476685863e-05, "loss": 0.4716, "step": 13400 }, { "epoch": 0.3051680586213959, "grad_norm": 90.5, "learning_rate": 1.4180754226267881e-05, "loss": 0.6886, "step": 13410 }, { "epoch": 0.3053956261520606, "grad_norm": 45.25, "learning_rate": 1.4176109975849898e-05, "loss": 0.5729, "step": 13420 }, { "epoch": 0.30562319368272534, "grad_norm": 50.75, "learning_rate": 1.4171465725431917e-05, "loss": 0.2861, "step": 13430 }, { "epoch": 0.30585076121339005, "grad_norm": 0.000698089599609375, "learning_rate": 1.4166821475013934e-05, "loss": 0.8428, "step": 13440 }, { "epoch": 0.3060783287440548, "grad_norm": 216.0, "learning_rate": 1.4162177224595951e-05, "loss": 1.6182, "step": 13450 }, { "epoch": 0.30630589627471955, "grad_norm": 310.0, "learning_rate": 1.415753297417797e-05, "loss": 1.1133, "step": 13460 }, { "epoch": 0.30653346380538427, "grad_norm": 37.25, "learning_rate": 1.4152888723759985e-05, "loss": 0.493, "step": 13470 }, { "epoch": 0.306761031336049, "grad_norm": 85.5, "learning_rate": 1.4148244473342002e-05, "loss": 1.0833, "step": 13480 }, { "epoch": 0.3069885988667137, "grad_norm": 1.1484375, "learning_rate": 1.4143600222924021e-05, "loss": 0.9036, "step": 13490 }, { "epoch": 0.3072161663973784, "grad_norm": 0.0031585693359375, "learning_rate": 1.4138955972506038e-05, "loss": 0.6097, "step": 13500 }, { "epoch": 0.30744373392804314, "grad_norm": 91.5, "learning_rate": 1.4134311722088057e-05, "loss": 1.5184, "step": 13510 }, { "epoch": 0.30767130145870786, "grad_norm": 0.004730224609375, "learning_rate": 1.4129667471670074e-05, "loss": 0.5837, "step": 13520 }, { "epoch": 0.3078988689893726, "grad_norm": 169.0, "learning_rate": 1.4125023221252093e-05, "loss": 1.4995, "step": 13530 }, { "epoch": 0.3081264365200373, "grad_norm": 101.5, "learning_rate": 1.4120378970834108e-05, "loss": 0.7159, "step": 13540 }, { "epoch": 0.30835400405070207, "grad_norm": 0.068359375, "learning_rate": 1.4115734720416125e-05, "loss": 0.5835, "step": 13550 }, { "epoch": 0.3085815715813668, "grad_norm": 0.15234375, "learning_rate": 1.4111090469998144e-05, "loss": 0.6386, "step": 13560 }, { "epoch": 0.3088091391120315, "grad_norm": 0.09765625, "learning_rate": 1.4106446219580161e-05, "loss": 0.0486, "step": 13570 }, { "epoch": 0.3090367066426962, "grad_norm": 113.5, "learning_rate": 1.4101801969162178e-05, "loss": 0.8683, "step": 13580 }, { "epoch": 0.30926427417336094, "grad_norm": 0.796875, "learning_rate": 1.4097157718744197e-05, "loss": 0.8838, "step": 13590 }, { "epoch": 0.30949184170402566, "grad_norm": 17.125, "learning_rate": 1.4092513468326214e-05, "loss": 1.3741, "step": 13600 }, { "epoch": 0.3097194092346904, "grad_norm": 0.2216796875, "learning_rate": 1.408786921790823e-05, "loss": 0.9884, "step": 13610 }, { "epoch": 0.3099469767653551, "grad_norm": 172.0, "learning_rate": 1.4083224967490248e-05, "loss": 0.7601, "step": 13620 }, { "epoch": 0.3101745442960198, "grad_norm": 0.0390625, "learning_rate": 1.4078580717072265e-05, "loss": 1.0375, "step": 13630 }, { "epoch": 0.3104021118266846, "grad_norm": 2.453125, "learning_rate": 1.4073936466654284e-05, "loss": 0.5709, "step": 13640 }, { "epoch": 0.3106296793573493, "grad_norm": 10.625, "learning_rate": 1.4069292216236301e-05, "loss": 0.8745, "step": 13650 }, { "epoch": 0.310857246888014, "grad_norm": 0.041748046875, "learning_rate": 1.406464796581832e-05, "loss": 0.7077, "step": 13660 }, { "epoch": 0.31108481441867875, "grad_norm": 0.0005950927734375, "learning_rate": 1.4060003715400337e-05, "loss": 1.3283, "step": 13670 }, { "epoch": 0.31131238194934346, "grad_norm": 204.0, "learning_rate": 1.4055359464982352e-05, "loss": 1.1514, "step": 13680 }, { "epoch": 0.3115399494800082, "grad_norm": 0.2451171875, "learning_rate": 1.405071521456437e-05, "loss": 0.8503, "step": 13690 }, { "epoch": 0.3117675170106729, "grad_norm": 0.0028533935546875, "learning_rate": 1.4046070964146388e-05, "loss": 0.5315, "step": 13700 }, { "epoch": 0.3119950845413376, "grad_norm": 0.0084228515625, "learning_rate": 1.4041426713728405e-05, "loss": 0.6903, "step": 13710 }, { "epoch": 0.3122226520720024, "grad_norm": 182.0, "learning_rate": 1.4036782463310424e-05, "loss": 1.4152, "step": 13720 }, { "epoch": 0.3124502196026671, "grad_norm": 28.25, "learning_rate": 1.403213821289244e-05, "loss": 0.7724, "step": 13730 }, { "epoch": 0.31267778713333183, "grad_norm": 0.00836181640625, "learning_rate": 1.402749396247446e-05, "loss": 0.4895, "step": 13740 }, { "epoch": 0.31290535466399655, "grad_norm": 1.578125, "learning_rate": 1.4022849712056475e-05, "loss": 0.5659, "step": 13750 }, { "epoch": 0.31313292219466127, "grad_norm": 374.0, "learning_rate": 1.4018205461638492e-05, "loss": 1.6393, "step": 13760 }, { "epoch": 0.313360489725326, "grad_norm": 46.0, "learning_rate": 1.401356121122051e-05, "loss": 1.1746, "step": 13770 }, { "epoch": 0.3135880572559907, "grad_norm": 75.0, "learning_rate": 1.4008916960802528e-05, "loss": 0.4692, "step": 13780 }, { "epoch": 0.3138156247866554, "grad_norm": 0.00836181640625, "learning_rate": 1.4004272710384545e-05, "loss": 1.1364, "step": 13790 }, { "epoch": 0.31404319231732014, "grad_norm": 28.125, "learning_rate": 1.3999628459966563e-05, "loss": 0.4576, "step": 13800 }, { "epoch": 0.3142707598479849, "grad_norm": 0.185546875, "learning_rate": 1.399498420954858e-05, "loss": 1.1306, "step": 13810 }, { "epoch": 0.31449832737864963, "grad_norm": 157.0, "learning_rate": 1.3990339959130596e-05, "loss": 0.6756, "step": 13820 }, { "epoch": 0.31472589490931435, "grad_norm": 0.064453125, "learning_rate": 1.3985695708712615e-05, "loss": 0.7268, "step": 13830 }, { "epoch": 0.31495346243997907, "grad_norm": 212.0, "learning_rate": 1.3981051458294632e-05, "loss": 0.4636, "step": 13840 }, { "epoch": 0.3151810299706438, "grad_norm": 442.0, "learning_rate": 1.397640720787665e-05, "loss": 1.8877, "step": 13850 }, { "epoch": 0.3154085975013085, "grad_norm": 193.0, "learning_rate": 1.3971762957458667e-05, "loss": 1.1208, "step": 13860 }, { "epoch": 0.3156361650319732, "grad_norm": 0.11474609375, "learning_rate": 1.3967118707040686e-05, "loss": 1.0451, "step": 13870 }, { "epoch": 0.31586373256263794, "grad_norm": 73.0, "learning_rate": 1.3962474456622703e-05, "loss": 1.3015, "step": 13880 }, { "epoch": 0.31609130009330266, "grad_norm": 98.0, "learning_rate": 1.3957830206204719e-05, "loss": 1.0351, "step": 13890 }, { "epoch": 0.31631886762396744, "grad_norm": 0.035400390625, "learning_rate": 1.3953185955786737e-05, "loss": 0.6703, "step": 13900 }, { "epoch": 0.31654643515463216, "grad_norm": 668.0, "learning_rate": 1.3948541705368754e-05, "loss": 0.6189, "step": 13910 }, { "epoch": 0.3167740026852969, "grad_norm": 8.1875, "learning_rate": 1.3943897454950772e-05, "loss": 0.8977, "step": 13920 }, { "epoch": 0.3170015702159616, "grad_norm": 10.6875, "learning_rate": 1.393925320453279e-05, "loss": 1.0117, "step": 13930 }, { "epoch": 0.3172291377466263, "grad_norm": 0.00933837890625, "learning_rate": 1.3934608954114807e-05, "loss": 0.579, "step": 13940 }, { "epoch": 0.31745670527729103, "grad_norm": 0.55859375, "learning_rate": 1.3929964703696826e-05, "loss": 0.4619, "step": 13950 }, { "epoch": 0.31768427280795575, "grad_norm": 157.0, "learning_rate": 1.3925320453278841e-05, "loss": 1.2644, "step": 13960 }, { "epoch": 0.31791184033862047, "grad_norm": 0.00106048583984375, "learning_rate": 1.3920676202860858e-05, "loss": 1.1834, "step": 13970 }, { "epoch": 0.3181394078692852, "grad_norm": 302.0, "learning_rate": 1.3916031952442877e-05, "loss": 1.6274, "step": 13980 }, { "epoch": 0.31836697539994996, "grad_norm": 16.125, "learning_rate": 1.3911387702024894e-05, "loss": 0.2547, "step": 13990 }, { "epoch": 0.3185945429306147, "grad_norm": 374.0, "learning_rate": 1.3906743451606913e-05, "loss": 1.3145, "step": 14000 }, { "epoch": 0.3188221104612794, "grad_norm": 0.004974365234375, "learning_rate": 1.390209920118893e-05, "loss": 0.8133, "step": 14010 }, { "epoch": 0.3190496779919441, "grad_norm": 0.09814453125, "learning_rate": 1.3897454950770947e-05, "loss": 0.5336, "step": 14020 }, { "epoch": 0.31927724552260883, "grad_norm": 159.0, "learning_rate": 1.3892810700352962e-05, "loss": 0.6873, "step": 14030 }, { "epoch": 0.31950481305327355, "grad_norm": 0.0294189453125, "learning_rate": 1.3888166449934981e-05, "loss": 1.072, "step": 14040 }, { "epoch": 0.31973238058393827, "grad_norm": 56.5, "learning_rate": 1.3883522199516998e-05, "loss": 0.7856, "step": 14050 }, { "epoch": 0.319959948114603, "grad_norm": 376.0, "learning_rate": 1.3878877949099017e-05, "loss": 0.5478, "step": 14060 }, { "epoch": 0.3201875156452677, "grad_norm": 51.25, "learning_rate": 1.3874233698681034e-05, "loss": 0.5286, "step": 14070 }, { "epoch": 0.3204150831759325, "grad_norm": 128.0, "learning_rate": 1.3869589448263053e-05, "loss": 1.1629, "step": 14080 }, { "epoch": 0.3206426507065972, "grad_norm": 280.0, "learning_rate": 1.386494519784507e-05, "loss": 1.0957, "step": 14090 }, { "epoch": 0.3208702182372619, "grad_norm": 0.022216796875, "learning_rate": 1.3860300947427085e-05, "loss": 0.6363, "step": 14100 }, { "epoch": 0.32109778576792664, "grad_norm": 103.0, "learning_rate": 1.3855656697009104e-05, "loss": 0.6626, "step": 14110 }, { "epoch": 0.32132535329859135, "grad_norm": 438.0, "learning_rate": 1.3851012446591121e-05, "loss": 0.7413, "step": 14120 }, { "epoch": 0.3215529208292561, "grad_norm": 264.0, "learning_rate": 1.3846368196173138e-05, "loss": 0.3535, "step": 14130 }, { "epoch": 0.3217804883599208, "grad_norm": 0.000667572021484375, "learning_rate": 1.3841723945755157e-05, "loss": 1.7759, "step": 14140 }, { "epoch": 0.3220080558905855, "grad_norm": 0.51171875, "learning_rate": 1.3837079695337174e-05, "loss": 1.8171, "step": 14150 }, { "epoch": 0.3222356234212503, "grad_norm": 60.75, "learning_rate": 1.3832435444919193e-05, "loss": 0.8028, "step": 14160 }, { "epoch": 0.322463190951915, "grad_norm": 0.054931640625, "learning_rate": 1.3827791194501208e-05, "loss": 0.7042, "step": 14170 }, { "epoch": 0.3226907584825797, "grad_norm": 340.0, "learning_rate": 1.3823146944083225e-05, "loss": 1.7306, "step": 14180 }, { "epoch": 0.32291832601324444, "grad_norm": 0.005157470703125, "learning_rate": 1.3818502693665244e-05, "loss": 1.1742, "step": 14190 }, { "epoch": 0.32314589354390916, "grad_norm": 386.0, "learning_rate": 1.3813858443247261e-05, "loss": 0.5203, "step": 14200 }, { "epoch": 0.3233734610745739, "grad_norm": 115.0, "learning_rate": 1.380921419282928e-05, "loss": 0.6467, "step": 14210 }, { "epoch": 0.3236010286052386, "grad_norm": 0.003692626953125, "learning_rate": 1.3804569942411297e-05, "loss": 0.9118, "step": 14220 }, { "epoch": 0.3238285961359033, "grad_norm": 235.0, "learning_rate": 1.3799925691993314e-05, "loss": 1.3569, "step": 14230 }, { "epoch": 0.32405616366656803, "grad_norm": 0.0028839111328125, "learning_rate": 1.379528144157533e-05, "loss": 1.1865, "step": 14240 }, { "epoch": 0.3242837311972328, "grad_norm": 32.75, "learning_rate": 1.3790637191157348e-05, "loss": 1.4558, "step": 14250 }, { "epoch": 0.3245112987278975, "grad_norm": 0.3984375, "learning_rate": 1.3785992940739365e-05, "loss": 0.7439, "step": 14260 }, { "epoch": 0.32473886625856224, "grad_norm": 77.0, "learning_rate": 1.3781348690321384e-05, "loss": 0.6287, "step": 14270 }, { "epoch": 0.32496643378922696, "grad_norm": 0.0074462890625, "learning_rate": 1.37767044399034e-05, "loss": 1.001, "step": 14280 }, { "epoch": 0.3251940013198917, "grad_norm": 0.002288818359375, "learning_rate": 1.377206018948542e-05, "loss": 0.6973, "step": 14290 }, { "epoch": 0.3254215688505564, "grad_norm": 284.0, "learning_rate": 1.3767415939067437e-05, "loss": 0.8186, "step": 14300 }, { "epoch": 0.3256491363812211, "grad_norm": 0.482421875, "learning_rate": 1.3762771688649452e-05, "loss": 0.4451, "step": 14310 }, { "epoch": 0.32587670391188583, "grad_norm": 87.5, "learning_rate": 1.375812743823147e-05, "loss": 1.2707, "step": 14320 }, { "epoch": 0.32610427144255055, "grad_norm": 318.0, "learning_rate": 1.3753483187813488e-05, "loss": 1.6368, "step": 14330 }, { "epoch": 0.3263318389732153, "grad_norm": 49.75, "learning_rate": 1.3748838937395505e-05, "loss": 0.5435, "step": 14340 }, { "epoch": 0.32655940650388005, "grad_norm": 0.00811767578125, "learning_rate": 1.3744194686977524e-05, "loss": 0.7773, "step": 14350 }, { "epoch": 0.32678697403454476, "grad_norm": 0.039306640625, "learning_rate": 1.373955043655954e-05, "loss": 0.9554, "step": 14360 }, { "epoch": 0.3270145415652095, "grad_norm": 71.0, "learning_rate": 1.373490618614156e-05, "loss": 0.8848, "step": 14370 }, { "epoch": 0.3272421090958742, "grad_norm": 1.296875, "learning_rate": 1.3730261935723575e-05, "loss": 0.3795, "step": 14380 }, { "epoch": 0.3274696766265389, "grad_norm": 3.34375, "learning_rate": 1.3725617685305592e-05, "loss": 1.2179, "step": 14390 }, { "epoch": 0.32769724415720364, "grad_norm": 243.0, "learning_rate": 1.372097343488761e-05, "loss": 1.0491, "step": 14400 }, { "epoch": 0.32792481168786836, "grad_norm": 229.0, "learning_rate": 1.3716329184469628e-05, "loss": 1.0979, "step": 14410 }, { "epoch": 0.3281523792185331, "grad_norm": 0.11669921875, "learning_rate": 1.3711684934051646e-05, "loss": 0.8361, "step": 14420 }, { "epoch": 0.32837994674919785, "grad_norm": 56.0, "learning_rate": 1.3707040683633663e-05, "loss": 0.9015, "step": 14430 }, { "epoch": 0.32860751427986257, "grad_norm": 0.00384521484375, "learning_rate": 1.370239643321568e-05, "loss": 1.1401, "step": 14440 }, { "epoch": 0.3288350818105273, "grad_norm": 0.00787353515625, "learning_rate": 1.3697752182797697e-05, "loss": 0.7148, "step": 14450 }, { "epoch": 0.329062649341192, "grad_norm": 0.0054931640625, "learning_rate": 1.3693107932379714e-05, "loss": 0.8983, "step": 14460 }, { "epoch": 0.3292902168718567, "grad_norm": 139.0, "learning_rate": 1.3688463681961732e-05, "loss": 2.2297, "step": 14470 }, { "epoch": 0.32951778440252144, "grad_norm": 294.0, "learning_rate": 1.368381943154375e-05, "loss": 0.6347, "step": 14480 }, { "epoch": 0.32974535193318616, "grad_norm": 67.0, "learning_rate": 1.3679175181125767e-05, "loss": 0.333, "step": 14490 }, { "epoch": 0.3299729194638509, "grad_norm": 72.0, "learning_rate": 1.3674530930707786e-05, "loss": 0.8417, "step": 14500 }, { "epoch": 0.3302004869945156, "grad_norm": 51.25, "learning_rate": 1.3669886680289803e-05, "loss": 1.0233, "step": 14510 }, { "epoch": 0.33042805452518037, "grad_norm": 0.0247802734375, "learning_rate": 1.3665242429871819e-05, "loss": 0.5366, "step": 14520 }, { "epoch": 0.3306556220558451, "grad_norm": 284.0, "learning_rate": 1.3660598179453837e-05, "loss": 1.2361, "step": 14530 }, { "epoch": 0.3308831895865098, "grad_norm": 135.0, "learning_rate": 1.3655953929035854e-05, "loss": 0.8806, "step": 14540 }, { "epoch": 0.3311107571171745, "grad_norm": 0.01171875, "learning_rate": 1.3651309678617873e-05, "loss": 0.6159, "step": 14550 }, { "epoch": 0.33133832464783924, "grad_norm": 472.0, "learning_rate": 1.364666542819989e-05, "loss": 0.3933, "step": 14560 }, { "epoch": 0.33156589217850396, "grad_norm": 3.8125, "learning_rate": 1.3642021177781907e-05, "loss": 0.8006, "step": 14570 }, { "epoch": 0.3317934597091687, "grad_norm": 278.0, "learning_rate": 1.3637376927363926e-05, "loss": 0.5984, "step": 14580 }, { "epoch": 0.3320210272398334, "grad_norm": 251.0, "learning_rate": 1.3632732676945941e-05, "loss": 1.2876, "step": 14590 }, { "epoch": 0.3322485947704981, "grad_norm": 0.11083984375, "learning_rate": 1.3628088426527958e-05, "loss": 0.9876, "step": 14600 }, { "epoch": 0.3324761623011629, "grad_norm": 174.0, "learning_rate": 1.3623444176109977e-05, "loss": 0.8395, "step": 14610 }, { "epoch": 0.3327037298318276, "grad_norm": 91.5, "learning_rate": 1.3618799925691994e-05, "loss": 0.427, "step": 14620 }, { "epoch": 0.33293129736249233, "grad_norm": 6.375, "learning_rate": 1.3614155675274013e-05, "loss": 1.1241, "step": 14630 }, { "epoch": 0.33315886489315705, "grad_norm": 217.0, "learning_rate": 1.360951142485603e-05, "loss": 1.4988, "step": 14640 }, { "epoch": 0.33338643242382177, "grad_norm": 264.0, "learning_rate": 1.3604867174438047e-05, "loss": 1.0041, "step": 14650 }, { "epoch": 0.3336139999544865, "grad_norm": 66.5, "learning_rate": 1.3600222924020064e-05, "loss": 0.5737, "step": 14660 }, { "epoch": 0.3338415674851512, "grad_norm": 296.0, "learning_rate": 1.3595578673602081e-05, "loss": 0.6479, "step": 14670 }, { "epoch": 0.3340691350158159, "grad_norm": 272.0, "learning_rate": 1.3590934423184098e-05, "loss": 1.3263, "step": 14680 }, { "epoch": 0.3342967025464807, "grad_norm": 55.75, "learning_rate": 1.3586290172766117e-05, "loss": 1.2984, "step": 14690 }, { "epoch": 0.3345242700771454, "grad_norm": 212.0, "learning_rate": 1.3581645922348134e-05, "loss": 1.2183, "step": 14700 }, { "epoch": 0.33475183760781013, "grad_norm": 0.0005645751953125, "learning_rate": 1.3577001671930153e-05, "loss": 1.0082, "step": 14710 }, { "epoch": 0.33497940513847485, "grad_norm": 136.0, "learning_rate": 1.357235742151217e-05, "loss": 0.6401, "step": 14720 }, { "epoch": 0.33520697266913957, "grad_norm": 199.0, "learning_rate": 1.3567713171094185e-05, "loss": 1.0214, "step": 14730 }, { "epoch": 0.3354345401998043, "grad_norm": 11.25, "learning_rate": 1.3563068920676204e-05, "loss": 0.798, "step": 14740 }, { "epoch": 0.335662107730469, "grad_norm": 0.006988525390625, "learning_rate": 1.3558424670258221e-05, "loss": 0.0564, "step": 14750 }, { "epoch": 0.3358896752611337, "grad_norm": 398.0, "learning_rate": 1.355378041984024e-05, "loss": 1.2777, "step": 14760 }, { "epoch": 0.33611724279179844, "grad_norm": 0.6171875, "learning_rate": 1.3549136169422257e-05, "loss": 0.2351, "step": 14770 }, { "epoch": 0.3363448103224632, "grad_norm": 336.0, "learning_rate": 1.3544491919004274e-05, "loss": 2.5492, "step": 14780 }, { "epoch": 0.33657237785312794, "grad_norm": 0.004791259765625, "learning_rate": 1.3539847668586293e-05, "loss": 0.8467, "step": 14790 }, { "epoch": 0.33679994538379265, "grad_norm": 0.01129150390625, "learning_rate": 1.3535203418168308e-05, "loss": 0.4928, "step": 14800 }, { "epoch": 0.3370275129144574, "grad_norm": 102.0, "learning_rate": 1.3530559167750325e-05, "loss": 0.9173, "step": 14810 }, { "epoch": 0.3372550804451221, "grad_norm": 0.07275390625, "learning_rate": 1.3525914917332344e-05, "loss": 0.2405, "step": 14820 }, { "epoch": 0.3374826479757868, "grad_norm": 0.0027313232421875, "learning_rate": 1.352127066691436e-05, "loss": 0.6303, "step": 14830 }, { "epoch": 0.3377102155064515, "grad_norm": 143.0, "learning_rate": 1.351662641649638e-05, "loss": 0.7115, "step": 14840 }, { "epoch": 0.33793778303711625, "grad_norm": 165.0, "learning_rate": 1.3511982166078397e-05, "loss": 1.4319, "step": 14850 }, { "epoch": 0.33816535056778096, "grad_norm": 7.84375, "learning_rate": 1.3507337915660415e-05, "loss": 0.7734, "step": 14860 }, { "epoch": 0.33839291809844574, "grad_norm": 2.03125, "learning_rate": 1.350269366524243e-05, "loss": 0.3553, "step": 14870 }, { "epoch": 0.33862048562911046, "grad_norm": 1.109375, "learning_rate": 1.3498049414824448e-05, "loss": 1.5449, "step": 14880 }, { "epoch": 0.3388480531597752, "grad_norm": 11.0, "learning_rate": 1.3493405164406465e-05, "loss": 0.1157, "step": 14890 }, { "epoch": 0.3390756206904399, "grad_norm": 42.5, "learning_rate": 1.3488760913988484e-05, "loss": 0.2758, "step": 14900 }, { "epoch": 0.3393031882211046, "grad_norm": 438.0, "learning_rate": 1.34841166635705e-05, "loss": 0.7988, "step": 14910 }, { "epoch": 0.33953075575176933, "grad_norm": 0.018310546875, "learning_rate": 1.347947241315252e-05, "loss": 1.3315, "step": 14920 }, { "epoch": 0.33975832328243405, "grad_norm": 0.0079345703125, "learning_rate": 1.3474828162734536e-05, "loss": 1.3078, "step": 14930 }, { "epoch": 0.33998589081309877, "grad_norm": 142.0, "learning_rate": 1.3470183912316552e-05, "loss": 1.1359, "step": 14940 }, { "epoch": 0.3402134583437635, "grad_norm": 132.0, "learning_rate": 1.346553966189857e-05, "loss": 1.3453, "step": 14950 }, { "epoch": 0.34044102587442826, "grad_norm": 150.0, "learning_rate": 1.3460895411480588e-05, "loss": 1.2023, "step": 14960 }, { "epoch": 0.340668593405093, "grad_norm": 17.5, "learning_rate": 1.3456251161062606e-05, "loss": 0.8902, "step": 14970 }, { "epoch": 0.3408961609357577, "grad_norm": 278.0, "learning_rate": 1.3451606910644623e-05, "loss": 0.9084, "step": 14980 }, { "epoch": 0.3411237284664224, "grad_norm": 0.875, "learning_rate": 1.344696266022664e-05, "loss": 1.4867, "step": 14990 }, { "epoch": 0.34135129599708713, "grad_norm": 24.375, "learning_rate": 1.344231840980866e-05, "loss": 1.5629, "step": 15000 }, { "epoch": 0.34157886352775185, "grad_norm": 0.007598876953125, "learning_rate": 1.3437674159390675e-05, "loss": 0.5011, "step": 15010 }, { "epoch": 0.34180643105841657, "grad_norm": 165.0, "learning_rate": 1.3433029908972692e-05, "loss": 0.4516, "step": 15020 }, { "epoch": 0.3420339985890813, "grad_norm": 71.0, "learning_rate": 1.342838565855471e-05, "loss": 0.3223, "step": 15030 }, { "epoch": 0.342261566119746, "grad_norm": 1.8515625, "learning_rate": 1.3423741408136727e-05, "loss": 0.7779, "step": 15040 }, { "epoch": 0.3424891336504108, "grad_norm": 111.0, "learning_rate": 1.3419097157718746e-05, "loss": 1.4144, "step": 15050 }, { "epoch": 0.3427167011810755, "grad_norm": 128.0, "learning_rate": 1.3414452907300763e-05, "loss": 1.0807, "step": 15060 }, { "epoch": 0.3429442687117402, "grad_norm": 48.0, "learning_rate": 1.3409808656882782e-05, "loss": 0.9897, "step": 15070 }, { "epoch": 0.34317183624240494, "grad_norm": 39.75, "learning_rate": 1.3405164406464797e-05, "loss": 0.6233, "step": 15080 }, { "epoch": 0.34339940377306966, "grad_norm": 4.75, "learning_rate": 1.3400520156046814e-05, "loss": 0.7961, "step": 15090 }, { "epoch": 0.3436269713037344, "grad_norm": 27.625, "learning_rate": 1.3395875905628833e-05, "loss": 0.258, "step": 15100 }, { "epoch": 0.3438545388343991, "grad_norm": 0.01312255859375, "learning_rate": 1.339123165521085e-05, "loss": 0.437, "step": 15110 }, { "epoch": 0.3440821063650638, "grad_norm": 0.00445556640625, "learning_rate": 1.3386587404792867e-05, "loss": 0.9132, "step": 15120 }, { "epoch": 0.3443096738957286, "grad_norm": 50.0, "learning_rate": 1.3381943154374886e-05, "loss": 0.4434, "step": 15130 }, { "epoch": 0.3445372414263933, "grad_norm": 123.5, "learning_rate": 1.3377298903956903e-05, "loss": 1.5043, "step": 15140 }, { "epoch": 0.344764808957058, "grad_norm": 193.0, "learning_rate": 1.3372654653538918e-05, "loss": 0.9092, "step": 15150 }, { "epoch": 0.34499237648772274, "grad_norm": 0.11572265625, "learning_rate": 1.3368010403120937e-05, "loss": 0.5477, "step": 15160 }, { "epoch": 0.34521994401838746, "grad_norm": 22.0, "learning_rate": 1.3363366152702954e-05, "loss": 0.9493, "step": 15170 }, { "epoch": 0.3454475115490522, "grad_norm": 0.01043701171875, "learning_rate": 1.3358721902284973e-05, "loss": 0.5923, "step": 15180 }, { "epoch": 0.3456750790797169, "grad_norm": 233.0, "learning_rate": 1.335407765186699e-05, "loss": 0.4275, "step": 15190 }, { "epoch": 0.3459026466103816, "grad_norm": 0.00121307373046875, "learning_rate": 1.3349433401449009e-05, "loss": 0.8316, "step": 15200 }, { "epoch": 0.34613021414104633, "grad_norm": 0.0054931640625, "learning_rate": 1.3344789151031026e-05, "loss": 0.6201, "step": 15210 }, { "epoch": 0.3463577816717111, "grad_norm": 388.0, "learning_rate": 1.3340144900613041e-05, "loss": 1.2672, "step": 15220 }, { "epoch": 0.3465853492023758, "grad_norm": 140.0, "learning_rate": 1.3335500650195058e-05, "loss": 1.0552, "step": 15230 }, { "epoch": 0.34681291673304054, "grad_norm": 0.09619140625, "learning_rate": 1.3330856399777077e-05, "loss": 0.6529, "step": 15240 }, { "epoch": 0.34704048426370526, "grad_norm": 73.0, "learning_rate": 1.3326212149359094e-05, "loss": 0.3012, "step": 15250 }, { "epoch": 0.34726805179437, "grad_norm": 173.0, "learning_rate": 1.3321567898941113e-05, "loss": 1.1495, "step": 15260 }, { "epoch": 0.3474956193250347, "grad_norm": 386.0, "learning_rate": 1.331692364852313e-05, "loss": 1.9857, "step": 15270 }, { "epoch": 0.3477231868556994, "grad_norm": 135.0, "learning_rate": 1.3312279398105149e-05, "loss": 1.5766, "step": 15280 }, { "epoch": 0.34795075438636414, "grad_norm": 207.0, "learning_rate": 1.3307635147687164e-05, "loss": 0.8038, "step": 15290 }, { "epoch": 0.34817832191702885, "grad_norm": 548.0, "learning_rate": 1.3302990897269181e-05, "loss": 0.6681, "step": 15300 }, { "epoch": 0.34840588944769363, "grad_norm": 402.0, "learning_rate": 1.32983466468512e-05, "loss": 1.1897, "step": 15310 }, { "epoch": 0.34863345697835835, "grad_norm": 0.0184326171875, "learning_rate": 1.3293702396433217e-05, "loss": 0.909, "step": 15320 }, { "epoch": 0.34886102450902307, "grad_norm": 0.03759765625, "learning_rate": 1.3289058146015234e-05, "loss": 0.4867, "step": 15330 }, { "epoch": 0.3490885920396878, "grad_norm": 0.0179443359375, "learning_rate": 1.3284413895597253e-05, "loss": 0.1549, "step": 15340 }, { "epoch": 0.3493161595703525, "grad_norm": 67.5, "learning_rate": 1.327976964517927e-05, "loss": 2.1047, "step": 15350 }, { "epoch": 0.3495437271010172, "grad_norm": 0.003692626953125, "learning_rate": 1.3275125394761285e-05, "loss": 0.5222, "step": 15360 }, { "epoch": 0.34977129463168194, "grad_norm": 179.0, "learning_rate": 1.3270481144343304e-05, "loss": 0.6977, "step": 15370 }, { "epoch": 0.34999886216234666, "grad_norm": 46.25, "learning_rate": 1.326583689392532e-05, "loss": 0.7688, "step": 15380 }, { "epoch": 0.3502264296930114, "grad_norm": 0.00335693359375, "learning_rate": 1.326119264350734e-05, "loss": 0.0765, "step": 15390 }, { "epoch": 0.35045399722367615, "grad_norm": 3.25, "learning_rate": 1.3256548393089357e-05, "loss": 0.9989, "step": 15400 }, { "epoch": 0.35068156475434087, "grad_norm": 245.0, "learning_rate": 1.3251904142671375e-05, "loss": 1.2773, "step": 15410 }, { "epoch": 0.3509091322850056, "grad_norm": 384.0, "learning_rate": 1.3247259892253392e-05, "loss": 0.7924, "step": 15420 }, { "epoch": 0.3511366998156703, "grad_norm": 101.5, "learning_rate": 1.3242615641835408e-05, "loss": 0.5162, "step": 15430 }, { "epoch": 0.351364267346335, "grad_norm": 20.75, "learning_rate": 1.3237971391417427e-05, "loss": 1.9466, "step": 15440 }, { "epoch": 0.35159183487699974, "grad_norm": 246.0, "learning_rate": 1.3233327140999444e-05, "loss": 1.6041, "step": 15450 }, { "epoch": 0.35181940240766446, "grad_norm": 168.0, "learning_rate": 1.322868289058146e-05, "loss": 0.7777, "step": 15460 }, { "epoch": 0.3520469699383292, "grad_norm": 298.0, "learning_rate": 1.322403864016348e-05, "loss": 0.6025, "step": 15470 }, { "epoch": 0.3522745374689939, "grad_norm": 0.0272216796875, "learning_rate": 1.3219394389745496e-05, "loss": 0.895, "step": 15480 }, { "epoch": 0.35250210499965867, "grad_norm": 4.09375, "learning_rate": 1.3214750139327515e-05, "loss": 0.8177, "step": 15490 }, { "epoch": 0.3527296725303234, "grad_norm": 225.0, "learning_rate": 1.321010588890953e-05, "loss": 1.0028, "step": 15500 }, { "epoch": 0.3529572400609881, "grad_norm": 71.5, "learning_rate": 1.3205461638491548e-05, "loss": 0.7376, "step": 15510 }, { "epoch": 0.3531848075916528, "grad_norm": 0.2333984375, "learning_rate": 1.3200817388073566e-05, "loss": 1.1548, "step": 15520 }, { "epoch": 0.35341237512231755, "grad_norm": 1.3984375, "learning_rate": 1.3196173137655583e-05, "loss": 1.4933, "step": 15530 }, { "epoch": 0.35363994265298226, "grad_norm": 8.25, "learning_rate": 1.31915288872376e-05, "loss": 0.7798, "step": 15540 }, { "epoch": 0.353867510183647, "grad_norm": 0.0189208984375, "learning_rate": 1.318688463681962e-05, "loss": 0.4567, "step": 15550 }, { "epoch": 0.3540950777143117, "grad_norm": 0.1357421875, "learning_rate": 1.3182240386401636e-05, "loss": 0.6848, "step": 15560 }, { "epoch": 0.3543226452449764, "grad_norm": 0.01025390625, "learning_rate": 1.3177596135983652e-05, "loss": 1.4235, "step": 15570 }, { "epoch": 0.3545502127756412, "grad_norm": 194.0, "learning_rate": 1.317295188556567e-05, "loss": 0.5879, "step": 15580 }, { "epoch": 0.3547777803063059, "grad_norm": 0.004913330078125, "learning_rate": 1.3168307635147687e-05, "loss": 0.3398, "step": 15590 }, { "epoch": 0.35500534783697063, "grad_norm": 1.3828125, "learning_rate": 1.3163663384729706e-05, "loss": 0.5632, "step": 15600 }, { "epoch": 0.35523291536763535, "grad_norm": 0.019287109375, "learning_rate": 1.3159019134311723e-05, "loss": 0.6453, "step": 15610 }, { "epoch": 0.35546048289830007, "grad_norm": 0.015625, "learning_rate": 1.3154374883893742e-05, "loss": 0.8242, "step": 15620 }, { "epoch": 0.3556880504289648, "grad_norm": 185.0, "learning_rate": 1.3149730633475759e-05, "loss": 1.1618, "step": 15630 }, { "epoch": 0.3559156179596295, "grad_norm": 211.0, "learning_rate": 1.3145086383057774e-05, "loss": 0.3499, "step": 15640 }, { "epoch": 0.3561431854902942, "grad_norm": 210.0, "learning_rate": 1.3140442132639793e-05, "loss": 0.7016, "step": 15650 }, { "epoch": 0.356370753020959, "grad_norm": 0.0025787353515625, "learning_rate": 1.313579788222181e-05, "loss": 0.6714, "step": 15660 }, { "epoch": 0.3565983205516237, "grad_norm": 252.0, "learning_rate": 1.3131153631803827e-05, "loss": 1.6999, "step": 15670 }, { "epoch": 0.35682588808228843, "grad_norm": 182.0, "learning_rate": 1.3126509381385846e-05, "loss": 0.4642, "step": 15680 }, { "epoch": 0.35705345561295315, "grad_norm": 130.0, "learning_rate": 1.3121865130967863e-05, "loss": 1.4242, "step": 15690 }, { "epoch": 0.35728102314361787, "grad_norm": 109.5, "learning_rate": 1.3117220880549882e-05, "loss": 0.0546, "step": 15700 }, { "epoch": 0.3575085906742826, "grad_norm": 2.671875, "learning_rate": 1.3112576630131897e-05, "loss": 0.758, "step": 15710 }, { "epoch": 0.3577361582049473, "grad_norm": 0.4609375, "learning_rate": 1.3107932379713914e-05, "loss": 1.1305, "step": 15720 }, { "epoch": 0.357963725735612, "grad_norm": 286.0, "learning_rate": 1.3103288129295933e-05, "loss": 0.503, "step": 15730 }, { "epoch": 0.35819129326627674, "grad_norm": 0.00213623046875, "learning_rate": 1.309864387887795e-05, "loss": 1.7633, "step": 15740 }, { "epoch": 0.3584188607969415, "grad_norm": 15.5, "learning_rate": 1.3093999628459969e-05, "loss": 0.8071, "step": 15750 }, { "epoch": 0.35864642832760624, "grad_norm": 0.546875, "learning_rate": 1.3089355378041986e-05, "loss": 0.4278, "step": 15760 }, { "epoch": 0.35887399585827096, "grad_norm": 0.0238037109375, "learning_rate": 1.3084711127624003e-05, "loss": 0.9018, "step": 15770 }, { "epoch": 0.3591015633889357, "grad_norm": 98.5, "learning_rate": 1.3080066877206018e-05, "loss": 0.7321, "step": 15780 }, { "epoch": 0.3593291309196004, "grad_norm": 237.0, "learning_rate": 1.3075422626788037e-05, "loss": 0.5638, "step": 15790 }, { "epoch": 0.3595566984502651, "grad_norm": 340.0, "learning_rate": 1.3070778376370054e-05, "loss": 1.0392, "step": 15800 }, { "epoch": 0.35978426598092983, "grad_norm": 163.0, "learning_rate": 1.3066134125952073e-05, "loss": 0.9563, "step": 15810 }, { "epoch": 0.36001183351159455, "grad_norm": 8.440017700195312e-05, "learning_rate": 1.306148987553409e-05, "loss": 1.5171, "step": 15820 }, { "epoch": 0.36023940104225927, "grad_norm": 157.0, "learning_rate": 1.3056845625116109e-05, "loss": 0.4192, "step": 15830 }, { "epoch": 0.36046696857292404, "grad_norm": 148.0, "learning_rate": 1.3052201374698126e-05, "loss": 1.3199, "step": 15840 }, { "epoch": 0.36069453610358876, "grad_norm": 38.0, "learning_rate": 1.3047557124280141e-05, "loss": 1.0828, "step": 15850 }, { "epoch": 0.3609221036342535, "grad_norm": 0.71875, "learning_rate": 1.304291287386216e-05, "loss": 0.3923, "step": 15860 }, { "epoch": 0.3611496711649182, "grad_norm": 174.0, "learning_rate": 1.3038268623444177e-05, "loss": 0.5717, "step": 15870 }, { "epoch": 0.3613772386955829, "grad_norm": 191.0, "learning_rate": 1.3033624373026194e-05, "loss": 0.9921, "step": 15880 }, { "epoch": 0.36160480622624763, "grad_norm": 142.0, "learning_rate": 1.3028980122608213e-05, "loss": 1.1172, "step": 15890 }, { "epoch": 0.36183237375691235, "grad_norm": 0.0006103515625, "learning_rate": 1.302433587219023e-05, "loss": 0.3797, "step": 15900 }, { "epoch": 0.36205994128757707, "grad_norm": 0.0034637451171875, "learning_rate": 1.3019691621772248e-05, "loss": 0.8292, "step": 15910 }, { "epoch": 0.3622875088182418, "grad_norm": 236.0, "learning_rate": 1.3015047371354264e-05, "loss": 1.1466, "step": 15920 }, { "epoch": 0.36251507634890656, "grad_norm": 0.00099945068359375, "learning_rate": 1.3010403120936281e-05, "loss": 0.2944, "step": 15930 }, { "epoch": 0.3627426438795713, "grad_norm": 392.0, "learning_rate": 1.30057588705183e-05, "loss": 0.5735, "step": 15940 }, { "epoch": 0.362970211410236, "grad_norm": 87.0, "learning_rate": 1.3001114620100317e-05, "loss": 0.9794, "step": 15950 }, { "epoch": 0.3631977789409007, "grad_norm": 0.0036468505859375, "learning_rate": 1.2996470369682335e-05, "loss": 1.6257, "step": 15960 }, { "epoch": 0.36342534647156544, "grad_norm": 161.0, "learning_rate": 1.2991826119264352e-05, "loss": 1.2111, "step": 15970 }, { "epoch": 0.36365291400223015, "grad_norm": 33.0, "learning_rate": 1.298718186884637e-05, "loss": 0.8252, "step": 15980 }, { "epoch": 0.3638804815328949, "grad_norm": 0.00946044921875, "learning_rate": 1.2982537618428387e-05, "loss": 0.5141, "step": 15990 }, { "epoch": 0.3641080490635596, "grad_norm": 170.0, "learning_rate": 1.2977893368010404e-05, "loss": 0.4366, "step": 16000 }, { "epoch": 0.3643356165942243, "grad_norm": 76.0, "learning_rate": 1.297324911759242e-05, "loss": 0.2637, "step": 16010 }, { "epoch": 0.3645631841248891, "grad_norm": 53.5, "learning_rate": 1.296860486717444e-05, "loss": 1.5652, "step": 16020 }, { "epoch": 0.3647907516555538, "grad_norm": 0.0008392333984375, "learning_rate": 1.2963960616756456e-05, "loss": 0.4035, "step": 16030 }, { "epoch": 0.3650183191862185, "grad_norm": 107.0, "learning_rate": 1.2959316366338475e-05, "loss": 0.9229, "step": 16040 }, { "epoch": 0.36524588671688324, "grad_norm": 376.0, "learning_rate": 1.2954672115920492e-05, "loss": 0.3609, "step": 16050 }, { "epoch": 0.36547345424754796, "grad_norm": 160.0, "learning_rate": 1.2950027865502508e-05, "loss": 0.8992, "step": 16060 }, { "epoch": 0.3657010217782127, "grad_norm": 200.0, "learning_rate": 1.2945383615084526e-05, "loss": 0.5738, "step": 16070 }, { "epoch": 0.3659285893088774, "grad_norm": 0.01483154296875, "learning_rate": 1.2940739364666543e-05, "loss": 0.7051, "step": 16080 }, { "epoch": 0.3661561568395421, "grad_norm": 177.0, "learning_rate": 1.293609511424856e-05, "loss": 1.3206, "step": 16090 }, { "epoch": 0.36638372437020683, "grad_norm": 47.0, "learning_rate": 1.293145086383058e-05, "loss": 0.4525, "step": 16100 }, { "epoch": 0.3666112919008716, "grad_norm": 24.25, "learning_rate": 1.2926806613412596e-05, "loss": 0.5652, "step": 16110 }, { "epoch": 0.3668388594315363, "grad_norm": 0.1396484375, "learning_rate": 1.2922162362994615e-05, "loss": 0.312, "step": 16120 }, { "epoch": 0.36706642696220104, "grad_norm": 422.0, "learning_rate": 1.291751811257663e-05, "loss": 0.9173, "step": 16130 }, { "epoch": 0.36729399449286576, "grad_norm": 208.0, "learning_rate": 1.2912873862158647e-05, "loss": 1.8416, "step": 16140 }, { "epoch": 0.3675215620235305, "grad_norm": 0.0245361328125, "learning_rate": 1.2908229611740666e-05, "loss": 0.6649, "step": 16150 }, { "epoch": 0.3677491295541952, "grad_norm": 280.0, "learning_rate": 1.2903585361322683e-05, "loss": 0.4746, "step": 16160 }, { "epoch": 0.3679766970848599, "grad_norm": 82.0, "learning_rate": 1.2898941110904702e-05, "loss": 1.9868, "step": 16170 }, { "epoch": 0.36820426461552463, "grad_norm": 0.000858306884765625, "learning_rate": 1.2894296860486719e-05, "loss": 0.3607, "step": 16180 }, { "epoch": 0.3684318321461894, "grad_norm": 17.5, "learning_rate": 1.2889652610068736e-05, "loss": 0.0988, "step": 16190 }, { "epoch": 0.3686593996768541, "grad_norm": 231.0, "learning_rate": 1.2885008359650753e-05, "loss": 0.5711, "step": 16200 }, { "epoch": 0.36888696720751885, "grad_norm": 0.001190185546875, "learning_rate": 1.288036410923277e-05, "loss": 0.4288, "step": 16210 }, { "epoch": 0.36911453473818356, "grad_norm": 75.5, "learning_rate": 1.2875719858814787e-05, "loss": 0.7394, "step": 16220 }, { "epoch": 0.3693421022688483, "grad_norm": 0.11962890625, "learning_rate": 1.2871075608396806e-05, "loss": 1.2715, "step": 16230 }, { "epoch": 0.369569669799513, "grad_norm": 40.75, "learning_rate": 1.2866431357978823e-05, "loss": 0.9984, "step": 16240 }, { "epoch": 0.3697972373301777, "grad_norm": 256.0, "learning_rate": 1.2861787107560842e-05, "loss": 1.1072, "step": 16250 }, { "epoch": 0.37002480486084244, "grad_norm": 105.0, "learning_rate": 1.2857142857142859e-05, "loss": 0.6409, "step": 16260 }, { "epoch": 0.37025237239150716, "grad_norm": 1664.0, "learning_rate": 1.2852498606724874e-05, "loss": 1.2922, "step": 16270 }, { "epoch": 0.37047993992217193, "grad_norm": 0.458984375, "learning_rate": 1.2847854356306893e-05, "loss": 1.2397, "step": 16280 }, { "epoch": 0.37070750745283665, "grad_norm": 0.002105712890625, "learning_rate": 1.284321010588891e-05, "loss": 0.5196, "step": 16290 }, { "epoch": 0.37093507498350137, "grad_norm": 11.4375, "learning_rate": 1.2838565855470929e-05, "loss": 1.0441, "step": 16300 }, { "epoch": 0.3711626425141661, "grad_norm": 61.75, "learning_rate": 1.2833921605052946e-05, "loss": 0.3559, "step": 16310 }, { "epoch": 0.3713902100448308, "grad_norm": 0.0732421875, "learning_rate": 1.2829277354634963e-05, "loss": 1.1553, "step": 16320 }, { "epoch": 0.3716177775754955, "grad_norm": 159.0, "learning_rate": 1.2824633104216982e-05, "loss": 1.2631, "step": 16330 }, { "epoch": 0.37184534510616024, "grad_norm": 111.5, "learning_rate": 1.2819988853798997e-05, "loss": 0.7138, "step": 16340 }, { "epoch": 0.37207291263682496, "grad_norm": 1.3125, "learning_rate": 1.2815344603381014e-05, "loss": 0.3249, "step": 16350 }, { "epoch": 0.3723004801674897, "grad_norm": 37.75, "learning_rate": 1.2810700352963033e-05, "loss": 0.6052, "step": 16360 }, { "epoch": 0.37252804769815445, "grad_norm": 28.625, "learning_rate": 1.280605610254505e-05, "loss": 0.8426, "step": 16370 }, { "epoch": 0.37275561522881917, "grad_norm": 79.0, "learning_rate": 1.2801411852127069e-05, "loss": 0.9621, "step": 16380 }, { "epoch": 0.3729831827594839, "grad_norm": 144.0, "learning_rate": 1.2796767601709086e-05, "loss": 1.7628, "step": 16390 }, { "epoch": 0.3732107502901486, "grad_norm": 87.5, "learning_rate": 1.2792123351291103e-05, "loss": 0.7972, "step": 16400 }, { "epoch": 0.3734383178208133, "grad_norm": 190.0, "learning_rate": 1.278747910087312e-05, "loss": 0.6429, "step": 16410 }, { "epoch": 0.37366588535147804, "grad_norm": 87.5, "learning_rate": 1.2782834850455137e-05, "loss": 0.9118, "step": 16420 }, { "epoch": 0.37389345288214276, "grad_norm": 54.5, "learning_rate": 1.2778190600037154e-05, "loss": 0.3812, "step": 16430 }, { "epoch": 0.3741210204128075, "grad_norm": 0.006195068359375, "learning_rate": 1.2773546349619173e-05, "loss": 0.7153, "step": 16440 }, { "epoch": 0.3743485879434722, "grad_norm": 0.0289306640625, "learning_rate": 1.276890209920119e-05, "loss": 0.8649, "step": 16450 }, { "epoch": 0.374576155474137, "grad_norm": 168.0, "learning_rate": 1.2764257848783208e-05, "loss": 0.9267, "step": 16460 }, { "epoch": 0.3748037230048017, "grad_norm": 322.0, "learning_rate": 1.2759613598365226e-05, "loss": 0.9965, "step": 16470 }, { "epoch": 0.3750312905354664, "grad_norm": 215.0, "learning_rate": 1.2754969347947241e-05, "loss": 1.285, "step": 16480 }, { "epoch": 0.37525885806613113, "grad_norm": 16.75, "learning_rate": 1.275032509752926e-05, "loss": 0.913, "step": 16490 }, { "epoch": 0.37548642559679585, "grad_norm": 211.0, "learning_rate": 1.2745680847111277e-05, "loss": 0.7308, "step": 16500 }, { "epoch": 0.37571399312746057, "grad_norm": 34.25, "learning_rate": 1.2741036596693295e-05, "loss": 0.7919, "step": 16510 }, { "epoch": 0.3759415606581253, "grad_norm": 73.0, "learning_rate": 1.2736392346275313e-05, "loss": 0.397, "step": 16520 }, { "epoch": 0.37616912818879, "grad_norm": 61.25, "learning_rate": 1.273174809585733e-05, "loss": 0.6596, "step": 16530 }, { "epoch": 0.3763966957194547, "grad_norm": 144.0, "learning_rate": 1.2727103845439348e-05, "loss": 1.3412, "step": 16540 }, { "epoch": 0.3766242632501195, "grad_norm": 0.055908203125, "learning_rate": 1.2722459595021364e-05, "loss": 0.1395, "step": 16550 }, { "epoch": 0.3768518307807842, "grad_norm": 0.068359375, "learning_rate": 1.271781534460338e-05, "loss": 0.3578, "step": 16560 }, { "epoch": 0.37707939831144893, "grad_norm": 123.0, "learning_rate": 1.27131710941854e-05, "loss": 0.4674, "step": 16570 }, { "epoch": 0.37730696584211365, "grad_norm": 96.0, "learning_rate": 1.2708526843767417e-05, "loss": 0.7044, "step": 16580 }, { "epoch": 0.37753453337277837, "grad_norm": 4.96875, "learning_rate": 1.2703882593349435e-05, "loss": 0.2028, "step": 16590 }, { "epoch": 0.3777621009034431, "grad_norm": 91.5, "learning_rate": 1.2699238342931452e-05, "loss": 1.0424, "step": 16600 }, { "epoch": 0.3779896684341078, "grad_norm": 38.0, "learning_rate": 1.2694594092513471e-05, "loss": 0.4607, "step": 16610 }, { "epoch": 0.3782172359647725, "grad_norm": 0.00043487548828125, "learning_rate": 1.2689949842095486e-05, "loss": 1.3067, "step": 16620 }, { "epoch": 0.3784448034954373, "grad_norm": 39.75, "learning_rate": 1.2685305591677504e-05, "loss": 0.8811, "step": 16630 }, { "epoch": 0.378672371026102, "grad_norm": 0.0169677734375, "learning_rate": 1.2680661341259522e-05, "loss": 0.3443, "step": 16640 }, { "epoch": 0.37889993855676674, "grad_norm": 93.0, "learning_rate": 1.267601709084154e-05, "loss": 0.8494, "step": 16650 }, { "epoch": 0.37912750608743145, "grad_norm": 91.0, "learning_rate": 1.2671372840423556e-05, "loss": 1.0365, "step": 16660 }, { "epoch": 0.3793550736180962, "grad_norm": 2.546875, "learning_rate": 1.2666728590005575e-05, "loss": 0.6799, "step": 16670 }, { "epoch": 0.3795826411487609, "grad_norm": 0.00103759765625, "learning_rate": 1.2662084339587592e-05, "loss": 0.259, "step": 16680 }, { "epoch": 0.3798102086794256, "grad_norm": 249.0, "learning_rate": 1.2657440089169608e-05, "loss": 0.7308, "step": 16690 }, { "epoch": 0.38003777621009033, "grad_norm": 268.0, "learning_rate": 1.2652795838751626e-05, "loss": 0.9347, "step": 16700 }, { "epoch": 0.38026534374075505, "grad_norm": 151.0, "learning_rate": 1.2648151588333643e-05, "loss": 0.8446, "step": 16710 }, { "epoch": 0.3804929112714198, "grad_norm": 0.00032806396484375, "learning_rate": 1.2643507337915662e-05, "loss": 0.1795, "step": 16720 }, { "epoch": 0.38072047880208454, "grad_norm": 5.9375, "learning_rate": 1.2638863087497679e-05, "loss": 0.8374, "step": 16730 }, { "epoch": 0.38094804633274926, "grad_norm": 21.5, "learning_rate": 1.2634218837079696e-05, "loss": 1.006, "step": 16740 }, { "epoch": 0.381175613863414, "grad_norm": 0.85546875, "learning_rate": 1.2629574586661715e-05, "loss": 1.1695, "step": 16750 }, { "epoch": 0.3814031813940787, "grad_norm": 0.0019989013671875, "learning_rate": 1.262493033624373e-05, "loss": 0.7222, "step": 16760 }, { "epoch": 0.3816307489247434, "grad_norm": 127.5, "learning_rate": 1.2620286085825747e-05, "loss": 0.71, "step": 16770 }, { "epoch": 0.38185831645540813, "grad_norm": 57.0, "learning_rate": 1.2615641835407766e-05, "loss": 1.4763, "step": 16780 }, { "epoch": 0.38208588398607285, "grad_norm": 61.5, "learning_rate": 1.2610997584989783e-05, "loss": 0.6749, "step": 16790 }, { "epoch": 0.38231345151673757, "grad_norm": 6.03125, "learning_rate": 1.2606353334571802e-05, "loss": 0.6963, "step": 16800 }, { "epoch": 0.38254101904740234, "grad_norm": 0.080078125, "learning_rate": 1.2601709084153819e-05, "loss": 0.7662, "step": 16810 }, { "epoch": 0.38276858657806706, "grad_norm": 84.5, "learning_rate": 1.2597064833735838e-05, "loss": 0.5923, "step": 16820 }, { "epoch": 0.3829961541087318, "grad_norm": 104.0, "learning_rate": 1.2592420583317853e-05, "loss": 1.259, "step": 16830 }, { "epoch": 0.3832237216393965, "grad_norm": 91.5, "learning_rate": 1.258777633289987e-05, "loss": 1.4961, "step": 16840 }, { "epoch": 0.3834512891700612, "grad_norm": 151.0, "learning_rate": 1.2583132082481889e-05, "loss": 0.8115, "step": 16850 }, { "epoch": 0.38367885670072593, "grad_norm": 27.0, "learning_rate": 1.2578487832063906e-05, "loss": 0.8399, "step": 16860 }, { "epoch": 0.38390642423139065, "grad_norm": 0.052978515625, "learning_rate": 1.2573843581645923e-05, "loss": 0.6785, "step": 16870 }, { "epoch": 0.38413399176205537, "grad_norm": 128.0, "learning_rate": 1.2569199331227942e-05, "loss": 1.0389, "step": 16880 }, { "epoch": 0.3843615592927201, "grad_norm": 0.0079345703125, "learning_rate": 1.2564555080809959e-05, "loss": 1.2284, "step": 16890 }, { "epoch": 0.38458912682338486, "grad_norm": 3.015625, "learning_rate": 1.2559910830391974e-05, "loss": 0.882, "step": 16900 }, { "epoch": 0.3848166943540496, "grad_norm": 95.0, "learning_rate": 1.2555266579973993e-05, "loss": 0.9268, "step": 16910 }, { "epoch": 0.3850442618847143, "grad_norm": 4.71875, "learning_rate": 1.255062232955601e-05, "loss": 0.3932, "step": 16920 }, { "epoch": 0.385271829415379, "grad_norm": 0.01953125, "learning_rate": 1.2545978079138029e-05, "loss": 0.3272, "step": 16930 }, { "epoch": 0.38549939694604374, "grad_norm": 0.578125, "learning_rate": 1.2541333828720046e-05, "loss": 0.9968, "step": 16940 }, { "epoch": 0.38572696447670846, "grad_norm": 0.3984375, "learning_rate": 1.2536689578302065e-05, "loss": 0.4099, "step": 16950 }, { "epoch": 0.3859545320073732, "grad_norm": 354.0, "learning_rate": 1.2532045327884082e-05, "loss": 1.3693, "step": 16960 }, { "epoch": 0.3861820995380379, "grad_norm": 486.0, "learning_rate": 1.2527401077466097e-05, "loss": 1.1435, "step": 16970 }, { "epoch": 0.3864096670687026, "grad_norm": 55.5, "learning_rate": 1.2522756827048114e-05, "loss": 0.6448, "step": 16980 }, { "epoch": 0.3866372345993674, "grad_norm": 135.0, "learning_rate": 1.2518112576630133e-05, "loss": 0.5282, "step": 16990 }, { "epoch": 0.3868648021300321, "grad_norm": 171.0, "learning_rate": 1.251346832621215e-05, "loss": 1.2159, "step": 17000 }, { "epoch": 0.3870923696606968, "grad_norm": 350.0, "learning_rate": 1.2508824075794169e-05, "loss": 0.8245, "step": 17010 }, { "epoch": 0.38731993719136154, "grad_norm": 0.00015926361083984375, "learning_rate": 1.2504179825376186e-05, "loss": 0.5061, "step": 17020 }, { "epoch": 0.38754750472202626, "grad_norm": 141.0, "learning_rate": 1.2499535574958204e-05, "loss": 0.6716, "step": 17030 }, { "epoch": 0.387775072252691, "grad_norm": 94.5, "learning_rate": 1.249489132454022e-05, "loss": 0.7097, "step": 17040 }, { "epoch": 0.3880026397833557, "grad_norm": 0.00060272216796875, "learning_rate": 1.2490247074122237e-05, "loss": 0.7681, "step": 17050 }, { "epoch": 0.3882302073140204, "grad_norm": 253.0, "learning_rate": 1.2485602823704256e-05, "loss": 0.8042, "step": 17060 }, { "epoch": 0.38845777484468513, "grad_norm": 54.25, "learning_rate": 1.2480958573286273e-05, "loss": 0.8486, "step": 17070 }, { "epoch": 0.3886853423753499, "grad_norm": 0.001007080078125, "learning_rate": 1.247631432286829e-05, "loss": 0.7343, "step": 17080 }, { "epoch": 0.3889129099060146, "grad_norm": 133.0, "learning_rate": 1.2471670072450308e-05, "loss": 0.6121, "step": 17090 }, { "epoch": 0.38914047743667934, "grad_norm": 99.5, "learning_rate": 1.2467025822032325e-05, "loss": 0.3583, "step": 17100 }, { "epoch": 0.38936804496734406, "grad_norm": 0.0751953125, "learning_rate": 1.246238157161434e-05, "loss": 1.037, "step": 17110 }, { "epoch": 0.3895956124980088, "grad_norm": 3.640625, "learning_rate": 1.245773732119636e-05, "loss": 0.7493, "step": 17120 }, { "epoch": 0.3898231800286735, "grad_norm": 0.08544921875, "learning_rate": 1.2453093070778377e-05, "loss": 0.138, "step": 17130 }, { "epoch": 0.3900507475593382, "grad_norm": 97.0, "learning_rate": 1.2448448820360395e-05, "loss": 1.4574, "step": 17140 }, { "epoch": 0.39027831509000294, "grad_norm": 81.0, "learning_rate": 1.2443804569942412e-05, "loss": 0.9045, "step": 17150 }, { "epoch": 0.3905058826206677, "grad_norm": 0.013671875, "learning_rate": 1.2439160319524431e-05, "loss": 0.5006, "step": 17160 }, { "epoch": 0.39073345015133243, "grad_norm": 0.0040283203125, "learning_rate": 1.2434516069106448e-05, "loss": 0.8424, "step": 17170 }, { "epoch": 0.39096101768199715, "grad_norm": 1.0625, "learning_rate": 1.2429871818688464e-05, "loss": 1.3774, "step": 17180 }, { "epoch": 0.39118858521266187, "grad_norm": 25.875, "learning_rate": 1.2425227568270482e-05, "loss": 0.7609, "step": 17190 }, { "epoch": 0.3914161527433266, "grad_norm": 89.0, "learning_rate": 1.24205833178525e-05, "loss": 0.6416, "step": 17200 }, { "epoch": 0.3916437202739913, "grad_norm": 22.875, "learning_rate": 1.2415939067434516e-05, "loss": 0.2466, "step": 17210 }, { "epoch": 0.391871287804656, "grad_norm": 0.0001354217529296875, "learning_rate": 1.2411294817016535e-05, "loss": 0.8901, "step": 17220 }, { "epoch": 0.39209885533532074, "grad_norm": 354.0, "learning_rate": 1.2406650566598552e-05, "loss": 1.217, "step": 17230 }, { "epoch": 0.39232642286598546, "grad_norm": 0.11865234375, "learning_rate": 1.2402006316180571e-05, "loss": 0.9975, "step": 17240 }, { "epoch": 0.39255399039665023, "grad_norm": 170.0, "learning_rate": 1.2397362065762586e-05, "loss": 0.2857, "step": 17250 }, { "epoch": 0.39278155792731495, "grad_norm": 0.002777099609375, "learning_rate": 1.2392717815344603e-05, "loss": 0.2327, "step": 17260 }, { "epoch": 0.39300912545797967, "grad_norm": 272.0, "learning_rate": 1.2388073564926622e-05, "loss": 1.4568, "step": 17270 }, { "epoch": 0.3932366929886444, "grad_norm": 51.75, "learning_rate": 1.238342931450864e-05, "loss": 1.0954, "step": 17280 }, { "epoch": 0.3934642605193091, "grad_norm": 120.0, "learning_rate": 1.2378785064090656e-05, "loss": 1.5833, "step": 17290 }, { "epoch": 0.3936918280499738, "grad_norm": 466.0, "learning_rate": 1.2374140813672675e-05, "loss": 1.3734, "step": 17300 }, { "epoch": 0.39391939558063854, "grad_norm": 0.005218505859375, "learning_rate": 1.2369496563254692e-05, "loss": 1.0348, "step": 17310 }, { "epoch": 0.39414696311130326, "grad_norm": 146.0, "learning_rate": 1.2364852312836707e-05, "loss": 1.0445, "step": 17320 }, { "epoch": 0.394374530641968, "grad_norm": 0.004730224609375, "learning_rate": 1.2360208062418726e-05, "loss": 0.541, "step": 17330 }, { "epoch": 0.39460209817263275, "grad_norm": 124.0, "learning_rate": 1.2355563812000743e-05, "loss": 0.8074, "step": 17340 }, { "epoch": 0.39482966570329747, "grad_norm": 7.75, "learning_rate": 1.2350919561582762e-05, "loss": 0.9686, "step": 17350 }, { "epoch": 0.3950572332339622, "grad_norm": 83.0, "learning_rate": 1.2346275311164779e-05, "loss": 1.1944, "step": 17360 }, { "epoch": 0.3952848007646269, "grad_norm": 90.0, "learning_rate": 1.2341631060746798e-05, "loss": 0.4076, "step": 17370 }, { "epoch": 0.3955123682952916, "grad_norm": 205.0, "learning_rate": 1.2336986810328815e-05, "loss": 1.4992, "step": 17380 }, { "epoch": 0.39573993582595635, "grad_norm": 169.0, "learning_rate": 1.233234255991083e-05, "loss": 0.8125, "step": 17390 }, { "epoch": 0.39596750335662106, "grad_norm": 0.00469970703125, "learning_rate": 1.2327698309492849e-05, "loss": 1.4201, "step": 17400 }, { "epoch": 0.3961950708872858, "grad_norm": 0.000423431396484375, "learning_rate": 1.2323054059074866e-05, "loss": 0.3692, "step": 17410 }, { "epoch": 0.3964226384179505, "grad_norm": 81.5, "learning_rate": 1.2318409808656883e-05, "loss": 0.7759, "step": 17420 }, { "epoch": 0.3966502059486153, "grad_norm": 32.25, "learning_rate": 1.2313765558238902e-05, "loss": 0.708, "step": 17430 }, { "epoch": 0.39687777347928, "grad_norm": 0.0223388671875, "learning_rate": 1.2309121307820919e-05, "loss": 1.0007, "step": 17440 }, { "epoch": 0.3971053410099447, "grad_norm": 68.5, "learning_rate": 1.2304477057402938e-05, "loss": 0.1409, "step": 17450 }, { "epoch": 0.39733290854060943, "grad_norm": 134.0, "learning_rate": 1.2299832806984953e-05, "loss": 0.705, "step": 17460 }, { "epoch": 0.39756047607127415, "grad_norm": 0.001495361328125, "learning_rate": 1.229518855656697e-05, "loss": 0.7679, "step": 17470 }, { "epoch": 0.39778804360193887, "grad_norm": 544.0, "learning_rate": 1.2290544306148989e-05, "loss": 0.9176, "step": 17480 }, { "epoch": 0.3980156111326036, "grad_norm": 156.0, "learning_rate": 1.2285900055731006e-05, "loss": 1.0391, "step": 17490 }, { "epoch": 0.3982431786632683, "grad_norm": 0.0133056640625, "learning_rate": 1.2281255805313025e-05, "loss": 1.1167, "step": 17500 }, { "epoch": 0.398470746193933, "grad_norm": 83.0, "learning_rate": 1.2276611554895042e-05, "loss": 0.3517, "step": 17510 }, { "epoch": 0.3986983137245978, "grad_norm": 0.0018157958984375, "learning_rate": 1.2271967304477059e-05, "loss": 0.3915, "step": 17520 }, { "epoch": 0.3989258812552625, "grad_norm": 118.5, "learning_rate": 1.2267323054059074e-05, "loss": 0.8845, "step": 17530 }, { "epoch": 0.39915344878592723, "grad_norm": 159.0, "learning_rate": 1.2262678803641093e-05, "loss": 0.8344, "step": 17540 }, { "epoch": 0.39938101631659195, "grad_norm": 262.0, "learning_rate": 1.225803455322311e-05, "loss": 0.8815, "step": 17550 }, { "epoch": 0.39960858384725667, "grad_norm": 348.0, "learning_rate": 1.2253390302805129e-05, "loss": 0.8194, "step": 17560 }, { "epoch": 0.3998361513779214, "grad_norm": 38.5, "learning_rate": 1.2248746052387146e-05, "loss": 0.6572, "step": 17570 }, { "epoch": 0.4000637189085861, "grad_norm": 0.0010528564453125, "learning_rate": 1.2244101801969164e-05, "loss": 0.1868, "step": 17580 }, { "epoch": 0.4002912864392508, "grad_norm": 0.0091552734375, "learning_rate": 1.2239457551551181e-05, "loss": 0.4269, "step": 17590 }, { "epoch": 0.40051885396991554, "grad_norm": 2.0625, "learning_rate": 1.2234813301133197e-05, "loss": 0.9737, "step": 17600 }, { "epoch": 0.4007464215005803, "grad_norm": 0.5390625, "learning_rate": 1.2230169050715216e-05, "loss": 0.7785, "step": 17610 }, { "epoch": 0.40097398903124504, "grad_norm": 4.59375, "learning_rate": 1.2225524800297233e-05, "loss": 0.7506, "step": 17620 }, { "epoch": 0.40120155656190976, "grad_norm": 294.0, "learning_rate": 1.222088054987925e-05, "loss": 1.9791, "step": 17630 }, { "epoch": 0.4014291240925745, "grad_norm": 1296.0, "learning_rate": 1.2216236299461268e-05, "loss": 1.2024, "step": 17640 }, { "epoch": 0.4016566916232392, "grad_norm": 1.2265625, "learning_rate": 1.2211592049043285e-05, "loss": 0.1902, "step": 17650 }, { "epoch": 0.4018842591539039, "grad_norm": 14.0625, "learning_rate": 1.2206947798625304e-05, "loss": 0.7949, "step": 17660 }, { "epoch": 0.40211182668456863, "grad_norm": 0.00244140625, "learning_rate": 1.220230354820732e-05, "loss": 0.4611, "step": 17670 }, { "epoch": 0.40233939421523335, "grad_norm": 0.00592041015625, "learning_rate": 1.2197659297789337e-05, "loss": 1.031, "step": 17680 }, { "epoch": 0.4025669617458981, "grad_norm": 1.3984375, "learning_rate": 1.2193015047371355e-05, "loss": 1.5722, "step": 17690 }, { "epoch": 0.40279452927656284, "grad_norm": 0.0006561279296875, "learning_rate": 1.2188370796953372e-05, "loss": 0.6163, "step": 17700 }, { "epoch": 0.40302209680722756, "grad_norm": 127.0, "learning_rate": 1.2183726546535391e-05, "loss": 0.3253, "step": 17710 }, { "epoch": 0.4032496643378923, "grad_norm": 76.5, "learning_rate": 1.2179082296117408e-05, "loss": 0.4038, "step": 17720 }, { "epoch": 0.403477231868557, "grad_norm": 53.0, "learning_rate": 1.2174438045699425e-05, "loss": 0.9599, "step": 17730 }, { "epoch": 0.4037047993992217, "grad_norm": 139.0, "learning_rate": 1.2169793795281442e-05, "loss": 0.7909, "step": 17740 }, { "epoch": 0.40393236692988643, "grad_norm": 0.197265625, "learning_rate": 1.216514954486346e-05, "loss": 0.7193, "step": 17750 }, { "epoch": 0.40415993446055115, "grad_norm": 0.0084228515625, "learning_rate": 1.2160505294445476e-05, "loss": 0.7582, "step": 17760 }, { "epoch": 0.40438750199121587, "grad_norm": 65.0, "learning_rate": 1.2155861044027495e-05, "loss": 0.6586, "step": 17770 }, { "epoch": 0.40461506952188064, "grad_norm": 0.00225830078125, "learning_rate": 1.2151216793609512e-05, "loss": 0.9581, "step": 17780 }, { "epoch": 0.40484263705254536, "grad_norm": 0.006317138671875, "learning_rate": 1.2146572543191531e-05, "loss": 0.2644, "step": 17790 }, { "epoch": 0.4050702045832101, "grad_norm": 1.46875, "learning_rate": 1.2141928292773548e-05, "loss": 0.8513, "step": 17800 }, { "epoch": 0.4052977721138748, "grad_norm": 232.0, "learning_rate": 1.2137284042355563e-05, "loss": 1.0833, "step": 17810 }, { "epoch": 0.4055253396445395, "grad_norm": 37.75, "learning_rate": 1.2132639791937582e-05, "loss": 0.9023, "step": 17820 }, { "epoch": 0.40575290717520424, "grad_norm": 0.001556396484375, "learning_rate": 1.21279955415196e-05, "loss": 0.6931, "step": 17830 }, { "epoch": 0.40598047470586895, "grad_norm": 161.0, "learning_rate": 1.2123351291101616e-05, "loss": 0.3631, "step": 17840 }, { "epoch": 0.4062080422365337, "grad_norm": 91.5, "learning_rate": 1.2118707040683635e-05, "loss": 1.409, "step": 17850 }, { "epoch": 0.4064356097671984, "grad_norm": 51.5, "learning_rate": 1.2114062790265652e-05, "loss": 1.5302, "step": 17860 }, { "epoch": 0.40666317729786317, "grad_norm": 226.0, "learning_rate": 1.210941853984767e-05, "loss": 0.906, "step": 17870 }, { "epoch": 0.4068907448285279, "grad_norm": 0.1484375, "learning_rate": 1.2104774289429686e-05, "loss": 0.4803, "step": 17880 }, { "epoch": 0.4071183123591926, "grad_norm": 0.3671875, "learning_rate": 1.2100130039011703e-05, "loss": 0.3903, "step": 17890 }, { "epoch": 0.4073458798898573, "grad_norm": 202.0, "learning_rate": 1.2095485788593722e-05, "loss": 0.6071, "step": 17900 }, { "epoch": 0.40757344742052204, "grad_norm": 486.0, "learning_rate": 1.2090841538175739e-05, "loss": 1.4786, "step": 17910 }, { "epoch": 0.40780101495118676, "grad_norm": 17.375, "learning_rate": 1.2086197287757758e-05, "loss": 0.3555, "step": 17920 }, { "epoch": 0.4080285824818515, "grad_norm": 198.0, "learning_rate": 1.2081553037339775e-05, "loss": 0.5354, "step": 17930 }, { "epoch": 0.4082561500125162, "grad_norm": 39.5, "learning_rate": 1.2076908786921792e-05, "loss": 0.9613, "step": 17940 }, { "epoch": 0.4084837175431809, "grad_norm": 0.04052734375, "learning_rate": 1.2072264536503809e-05, "loss": 0.9074, "step": 17950 }, { "epoch": 0.4087112850738457, "grad_norm": 169.0, "learning_rate": 1.2067620286085826e-05, "loss": 0.5313, "step": 17960 }, { "epoch": 0.4089388526045104, "grad_norm": 178.0, "learning_rate": 1.2062976035667843e-05, "loss": 1.4092, "step": 17970 }, { "epoch": 0.4091664201351751, "grad_norm": 93.0, "learning_rate": 1.2058331785249862e-05, "loss": 1.6036, "step": 17980 }, { "epoch": 0.40939398766583984, "grad_norm": 12.875, "learning_rate": 1.2053687534831879e-05, "loss": 1.1937, "step": 17990 }, { "epoch": 0.40962155519650456, "grad_norm": 0.0031585693359375, "learning_rate": 1.2049043284413898e-05, "loss": 0.5844, "step": 18000 }, { "epoch": 0.4098491227271693, "grad_norm": 139.0, "learning_rate": 1.2044399033995915e-05, "loss": 0.9902, "step": 18010 }, { "epoch": 0.410076690257834, "grad_norm": 125.0, "learning_rate": 1.203975478357793e-05, "loss": 0.5762, "step": 18020 }, { "epoch": 0.4103042577884987, "grad_norm": 96.5, "learning_rate": 1.2035110533159949e-05, "loss": 0.7045, "step": 18030 }, { "epoch": 0.41053182531916343, "grad_norm": 38.25, "learning_rate": 1.2030466282741966e-05, "loss": 0.6972, "step": 18040 }, { "epoch": 0.4107593928498282, "grad_norm": 0.00531005859375, "learning_rate": 1.2025822032323985e-05, "loss": 0.7883, "step": 18050 }, { "epoch": 0.4109869603804929, "grad_norm": 0.013671875, "learning_rate": 1.2021177781906002e-05, "loss": 1.0242, "step": 18060 }, { "epoch": 0.41121452791115765, "grad_norm": 32.75, "learning_rate": 1.2016533531488019e-05, "loss": 0.7443, "step": 18070 }, { "epoch": 0.41144209544182236, "grad_norm": 2.640625, "learning_rate": 1.2011889281070037e-05, "loss": 0.5149, "step": 18080 }, { "epoch": 0.4116696629724871, "grad_norm": 306.0, "learning_rate": 1.2007245030652053e-05, "loss": 0.6479, "step": 18090 }, { "epoch": 0.4118972305031518, "grad_norm": 3.234375, "learning_rate": 1.200260078023407e-05, "loss": 0.5257, "step": 18100 }, { "epoch": 0.4121247980338165, "grad_norm": 0.00095367431640625, "learning_rate": 1.1997956529816089e-05, "loss": 0.5818, "step": 18110 }, { "epoch": 0.41235236556448124, "grad_norm": 121.5, "learning_rate": 1.1993312279398106e-05, "loss": 0.5247, "step": 18120 }, { "epoch": 0.412579933095146, "grad_norm": 187.0, "learning_rate": 1.1988668028980124e-05, "loss": 1.983, "step": 18130 }, { "epoch": 0.41280750062581073, "grad_norm": 1.03125, "learning_rate": 1.1984023778562141e-05, "loss": 1.2094, "step": 18140 }, { "epoch": 0.41303506815647545, "grad_norm": 2.453125, "learning_rate": 1.197937952814416e-05, "loss": 0.6829, "step": 18150 }, { "epoch": 0.41326263568714017, "grad_norm": 54.25, "learning_rate": 1.1974735277726176e-05, "loss": 1.4937, "step": 18160 }, { "epoch": 0.4134902032178049, "grad_norm": 0.00299072265625, "learning_rate": 1.1970091027308193e-05, "loss": 0.6557, "step": 18170 }, { "epoch": 0.4137177707484696, "grad_norm": 192.0, "learning_rate": 1.196544677689021e-05, "loss": 0.9217, "step": 18180 }, { "epoch": 0.4139453382791343, "grad_norm": 0.0634765625, "learning_rate": 1.1960802526472228e-05, "loss": 1.0493, "step": 18190 }, { "epoch": 0.41417290580979904, "grad_norm": 105.0, "learning_rate": 1.1956158276054246e-05, "loss": 0.4544, "step": 18200 }, { "epoch": 0.41440047334046376, "grad_norm": 0.0299072265625, "learning_rate": 1.1951514025636264e-05, "loss": 0.8943, "step": 18210 }, { "epoch": 0.41462804087112853, "grad_norm": 0.0084228515625, "learning_rate": 1.1946869775218281e-05, "loss": 0.5173, "step": 18220 }, { "epoch": 0.41485560840179325, "grad_norm": 52.5, "learning_rate": 1.1942225524800297e-05, "loss": 0.9757, "step": 18230 }, { "epoch": 0.41508317593245797, "grad_norm": 56.75, "learning_rate": 1.1937581274382315e-05, "loss": 1.082, "step": 18240 }, { "epoch": 0.4153107434631227, "grad_norm": 390.0, "learning_rate": 1.1932937023964332e-05, "loss": 1.3435, "step": 18250 }, { "epoch": 0.4155383109937874, "grad_norm": 118.0, "learning_rate": 1.1928292773546351e-05, "loss": 0.4047, "step": 18260 }, { "epoch": 0.4157658785244521, "grad_norm": 0.00640869140625, "learning_rate": 1.1923648523128368e-05, "loss": 0.8279, "step": 18270 }, { "epoch": 0.41599344605511684, "grad_norm": 0.0067138671875, "learning_rate": 1.1919004272710385e-05, "loss": 0.513, "step": 18280 }, { "epoch": 0.41622101358578156, "grad_norm": 90.0, "learning_rate": 1.1914360022292404e-05, "loss": 0.9149, "step": 18290 }, { "epoch": 0.4164485811164463, "grad_norm": 67.0, "learning_rate": 1.190971577187442e-05, "loss": 0.5235, "step": 18300 }, { "epoch": 0.41667614864711106, "grad_norm": 197.0, "learning_rate": 1.1905071521456436e-05, "loss": 0.8308, "step": 18310 }, { "epoch": 0.4169037161777758, "grad_norm": 0.02490234375, "learning_rate": 1.1900427271038455e-05, "loss": 0.6498, "step": 18320 }, { "epoch": 0.4171312837084405, "grad_norm": 0.2216796875, "learning_rate": 1.1895783020620472e-05, "loss": 0.4057, "step": 18330 }, { "epoch": 0.4173588512391052, "grad_norm": 13.6875, "learning_rate": 1.1891138770202491e-05, "loss": 0.4702, "step": 18340 }, { "epoch": 0.41758641876976993, "grad_norm": 103.5, "learning_rate": 1.1886494519784508e-05, "loss": 0.5368, "step": 18350 }, { "epoch": 0.41781398630043465, "grad_norm": 137.0, "learning_rate": 1.1881850269366527e-05, "loss": 0.5947, "step": 18360 }, { "epoch": 0.41804155383109937, "grad_norm": 0.060546875, "learning_rate": 1.1877206018948542e-05, "loss": 0.5945, "step": 18370 }, { "epoch": 0.4182691213617641, "grad_norm": 39.5, "learning_rate": 1.187256176853056e-05, "loss": 1.4374, "step": 18380 }, { "epoch": 0.4184966888924288, "grad_norm": 168.0, "learning_rate": 1.1867917518112578e-05, "loss": 0.523, "step": 18390 }, { "epoch": 0.4187242564230936, "grad_norm": 0.0111083984375, "learning_rate": 1.1863273267694595e-05, "loss": 0.4927, "step": 18400 }, { "epoch": 0.4189518239537583, "grad_norm": 11.75, "learning_rate": 1.1858629017276612e-05, "loss": 0.5641, "step": 18410 }, { "epoch": 0.419179391484423, "grad_norm": 71.0, "learning_rate": 1.1853984766858631e-05, "loss": 0.9872, "step": 18420 }, { "epoch": 0.41940695901508773, "grad_norm": 4.375, "learning_rate": 1.1849340516440648e-05, "loss": 0.6054, "step": 18430 }, { "epoch": 0.41963452654575245, "grad_norm": 167.0, "learning_rate": 1.1844696266022663e-05, "loss": 0.4379, "step": 18440 }, { "epoch": 0.41986209407641717, "grad_norm": 0.00119781494140625, "learning_rate": 1.1840052015604682e-05, "loss": 0.1155, "step": 18450 }, { "epoch": 0.4200896616070819, "grad_norm": 0.400390625, "learning_rate": 1.1835407765186699e-05, "loss": 0.9431, "step": 18460 }, { "epoch": 0.4203172291377466, "grad_norm": 0.01470947265625, "learning_rate": 1.1830763514768718e-05, "loss": 0.6564, "step": 18470 }, { "epoch": 0.4205447966684113, "grad_norm": 40.25, "learning_rate": 1.1826119264350735e-05, "loss": 0.7337, "step": 18480 }, { "epoch": 0.4207723641990761, "grad_norm": 318.0, "learning_rate": 1.1821475013932752e-05, "loss": 0.5047, "step": 18490 }, { "epoch": 0.4209999317297408, "grad_norm": 82.0, "learning_rate": 1.181683076351477e-05, "loss": 0.7341, "step": 18500 }, { "epoch": 0.42122749926040554, "grad_norm": 1.2421875, "learning_rate": 1.1812186513096786e-05, "loss": 0.7416, "step": 18510 }, { "epoch": 0.42145506679107025, "grad_norm": 292.0, "learning_rate": 1.1807542262678803e-05, "loss": 0.8252, "step": 18520 }, { "epoch": 0.421682634321735, "grad_norm": 0.07958984375, "learning_rate": 1.1802898012260822e-05, "loss": 0.3252, "step": 18530 }, { "epoch": 0.4219102018523997, "grad_norm": 282.0, "learning_rate": 1.1798253761842839e-05, "loss": 1.8573, "step": 18540 }, { "epoch": 0.4221377693830644, "grad_norm": 0.0101318359375, "learning_rate": 1.1793609511424858e-05, "loss": 0.4494, "step": 18550 }, { "epoch": 0.42236533691372913, "grad_norm": 139.0, "learning_rate": 1.1788965261006875e-05, "loss": 0.8999, "step": 18560 }, { "epoch": 0.42259290444439385, "grad_norm": 177.0, "learning_rate": 1.1784321010588893e-05, "loss": 1.5469, "step": 18570 }, { "epoch": 0.4228204719750586, "grad_norm": 37.0, "learning_rate": 1.1779676760170909e-05, "loss": 0.716, "step": 18580 }, { "epoch": 0.42304803950572334, "grad_norm": 54.25, "learning_rate": 1.1775032509752926e-05, "loss": 1.7722, "step": 18590 }, { "epoch": 0.42327560703638806, "grad_norm": 0.001922607421875, "learning_rate": 1.1770388259334945e-05, "loss": 1.4776, "step": 18600 }, { "epoch": 0.4235031745670528, "grad_norm": 141.0, "learning_rate": 1.1765744008916962e-05, "loss": 1.3288, "step": 18610 }, { "epoch": 0.4237307420977175, "grad_norm": 165.0, "learning_rate": 1.1761099758498979e-05, "loss": 1.0274, "step": 18620 }, { "epoch": 0.4239583096283822, "grad_norm": 0.1591796875, "learning_rate": 1.1756455508080998e-05, "loss": 0.8924, "step": 18630 }, { "epoch": 0.42418587715904693, "grad_norm": 1.7421875, "learning_rate": 1.1751811257663015e-05, "loss": 0.6444, "step": 18640 }, { "epoch": 0.42441344468971165, "grad_norm": 276.0, "learning_rate": 1.174716700724503e-05, "loss": 1.1335, "step": 18650 }, { "epoch": 0.4246410122203764, "grad_norm": 0.00148773193359375, "learning_rate": 1.1742522756827049e-05, "loss": 1.1421, "step": 18660 }, { "epoch": 0.42486857975104114, "grad_norm": 89.5, "learning_rate": 1.1737878506409066e-05, "loss": 0.5837, "step": 18670 }, { "epoch": 0.42509614728170586, "grad_norm": 185.0, "learning_rate": 1.1733234255991084e-05, "loss": 0.5884, "step": 18680 }, { "epoch": 0.4253237148123706, "grad_norm": 276.0, "learning_rate": 1.1728590005573102e-05, "loss": 0.8734, "step": 18690 }, { "epoch": 0.4255512823430353, "grad_norm": 17.375, "learning_rate": 1.172394575515512e-05, "loss": 0.3176, "step": 18700 }, { "epoch": 0.4257788498737, "grad_norm": 159.0, "learning_rate": 1.1719301504737137e-05, "loss": 1.1005, "step": 18710 }, { "epoch": 0.42600641740436473, "grad_norm": 352.0, "learning_rate": 1.1714657254319153e-05, "loss": 0.9345, "step": 18720 }, { "epoch": 0.42623398493502945, "grad_norm": 172.0, "learning_rate": 1.171001300390117e-05, "loss": 0.925, "step": 18730 }, { "epoch": 0.42646155246569417, "grad_norm": 113.5, "learning_rate": 1.1705368753483188e-05, "loss": 1.256, "step": 18740 }, { "epoch": 0.42668911999635895, "grad_norm": 111.5, "learning_rate": 1.1700724503065206e-05, "loss": 1.2663, "step": 18750 }, { "epoch": 0.42691668752702366, "grad_norm": 0.0732421875, "learning_rate": 1.1696080252647224e-05, "loss": 0.3866, "step": 18760 }, { "epoch": 0.4271442550576884, "grad_norm": 133.0, "learning_rate": 1.1691436002229241e-05, "loss": 1.3242, "step": 18770 }, { "epoch": 0.4273718225883531, "grad_norm": 0.0072021484375, "learning_rate": 1.168679175181126e-05, "loss": 1.0018, "step": 18780 }, { "epoch": 0.4275993901190178, "grad_norm": 130.0, "learning_rate": 1.1682147501393275e-05, "loss": 1.1158, "step": 18790 }, { "epoch": 0.42782695764968254, "grad_norm": 63.25, "learning_rate": 1.1677503250975293e-05, "loss": 0.4795, "step": 18800 }, { "epoch": 0.42805452518034726, "grad_norm": 266.0, "learning_rate": 1.1672859000557311e-05, "loss": 0.6352, "step": 18810 }, { "epoch": 0.428282092711012, "grad_norm": 136.0, "learning_rate": 1.1668214750139328e-05, "loss": 0.6358, "step": 18820 }, { "epoch": 0.4285096602416767, "grad_norm": 61.25, "learning_rate": 1.1663570499721345e-05, "loss": 0.9743, "step": 18830 }, { "epoch": 0.42873722777234147, "grad_norm": 0.51953125, "learning_rate": 1.1658926249303364e-05, "loss": 0.4739, "step": 18840 }, { "epoch": 0.4289647953030062, "grad_norm": 78.0, "learning_rate": 1.1654281998885381e-05, "loss": 0.664, "step": 18850 }, { "epoch": 0.4291923628336709, "grad_norm": 40.5, "learning_rate": 1.1649637748467397e-05, "loss": 0.5024, "step": 18860 }, { "epoch": 0.4294199303643356, "grad_norm": 1.25, "learning_rate": 1.1644993498049415e-05, "loss": 1.2693, "step": 18870 }, { "epoch": 0.42964749789500034, "grad_norm": 57.0, "learning_rate": 1.1640349247631432e-05, "loss": 0.455, "step": 18880 }, { "epoch": 0.42987506542566506, "grad_norm": 0.00061798095703125, "learning_rate": 1.1635704997213451e-05, "loss": 1.2344, "step": 18890 }, { "epoch": 0.4301026329563298, "grad_norm": 2.625, "learning_rate": 1.1631060746795468e-05, "loss": 0.3564, "step": 18900 }, { "epoch": 0.4303302004869945, "grad_norm": 75.5, "learning_rate": 1.1626416496377487e-05, "loss": 1.2859, "step": 18910 }, { "epoch": 0.4305577680176592, "grad_norm": 0.0140380859375, "learning_rate": 1.1621772245959504e-05, "loss": 1.2539, "step": 18920 }, { "epoch": 0.430785335548324, "grad_norm": 184.0, "learning_rate": 1.161712799554152e-05, "loss": 0.4846, "step": 18930 }, { "epoch": 0.4310129030789887, "grad_norm": 15.5625, "learning_rate": 1.1612483745123538e-05, "loss": 0.4321, "step": 18940 }, { "epoch": 0.4312404706096534, "grad_norm": 304.0, "learning_rate": 1.1607839494705555e-05, "loss": 1.1572, "step": 18950 }, { "epoch": 0.43146803814031814, "grad_norm": 169.0, "learning_rate": 1.1603195244287572e-05, "loss": 0.7285, "step": 18960 }, { "epoch": 0.43169560567098286, "grad_norm": 163.0, "learning_rate": 1.1598550993869591e-05, "loss": 0.9689, "step": 18970 }, { "epoch": 0.4319231732016476, "grad_norm": 0.06982421875, "learning_rate": 1.1593906743451608e-05, "loss": 0.8096, "step": 18980 }, { "epoch": 0.4321507407323123, "grad_norm": 300.0, "learning_rate": 1.1589262493033627e-05, "loss": 0.9258, "step": 18990 }, { "epoch": 0.432378308262977, "grad_norm": 17.375, "learning_rate": 1.1584618242615642e-05, "loss": 0.4611, "step": 19000 }, { "epoch": 0.43260587579364174, "grad_norm": 828.0, "learning_rate": 1.1579973992197659e-05, "loss": 0.8808, "step": 19010 }, { "epoch": 0.4328334433243065, "grad_norm": 0.01080322265625, "learning_rate": 1.1575329741779678e-05, "loss": 0.5733, "step": 19020 }, { "epoch": 0.43306101085497123, "grad_norm": 127.5, "learning_rate": 1.1570685491361695e-05, "loss": 0.8962, "step": 19030 }, { "epoch": 0.43328857838563595, "grad_norm": 0.0296630859375, "learning_rate": 1.1566041240943712e-05, "loss": 0.7101, "step": 19040 }, { "epoch": 0.43351614591630067, "grad_norm": 28.125, "learning_rate": 1.156139699052573e-05, "loss": 0.9459, "step": 19050 }, { "epoch": 0.4337437134469654, "grad_norm": 0.0185546875, "learning_rate": 1.1556752740107748e-05, "loss": 1.2666, "step": 19060 }, { "epoch": 0.4339712809776301, "grad_norm": 0.06396484375, "learning_rate": 1.1552108489689763e-05, "loss": 1.1724, "step": 19070 }, { "epoch": 0.4341988485082948, "grad_norm": 73.0, "learning_rate": 1.1547464239271782e-05, "loss": 0.938, "step": 19080 }, { "epoch": 0.43442641603895954, "grad_norm": 75.0, "learning_rate": 1.1542819988853799e-05, "loss": 0.6739, "step": 19090 }, { "epoch": 0.4346539835696243, "grad_norm": 101.0, "learning_rate": 1.1538175738435818e-05, "loss": 0.4455, "step": 19100 }, { "epoch": 0.43488155110028903, "grad_norm": 79.5, "learning_rate": 1.1533531488017835e-05, "loss": 0.7655, "step": 19110 }, { "epoch": 0.43510911863095375, "grad_norm": 294.0, "learning_rate": 1.1528887237599854e-05, "loss": 1.2181, "step": 19120 }, { "epoch": 0.43533668616161847, "grad_norm": 0.0010986328125, "learning_rate": 1.152424298718187e-05, "loss": 0.8817, "step": 19130 }, { "epoch": 0.4355642536922832, "grad_norm": 52.25, "learning_rate": 1.1519598736763886e-05, "loss": 0.5193, "step": 19140 }, { "epoch": 0.4357918212229479, "grad_norm": 55.0, "learning_rate": 1.1514954486345905e-05, "loss": 1.073, "step": 19150 }, { "epoch": 0.4360193887536126, "grad_norm": 980.0, "learning_rate": 1.1510310235927922e-05, "loss": 1.5026, "step": 19160 }, { "epoch": 0.43624695628427734, "grad_norm": 34.5, "learning_rate": 1.1505665985509939e-05, "loss": 0.2761, "step": 19170 }, { "epoch": 0.43647452381494206, "grad_norm": 1.8359375, "learning_rate": 1.1501021735091958e-05, "loss": 0.431, "step": 19180 }, { "epoch": 0.43670209134560684, "grad_norm": 0.58203125, "learning_rate": 1.1496377484673975e-05, "loss": 0.4126, "step": 19190 }, { "epoch": 0.43692965887627155, "grad_norm": 127.0, "learning_rate": 1.1491733234255993e-05, "loss": 0.2884, "step": 19200 }, { "epoch": 0.4371572264069363, "grad_norm": 150.0, "learning_rate": 1.1487088983838009e-05, "loss": 1.3961, "step": 19210 }, { "epoch": 0.437384793937601, "grad_norm": 246.0, "learning_rate": 1.1482444733420026e-05, "loss": 1.74, "step": 19220 }, { "epoch": 0.4376123614682657, "grad_norm": 0.00147247314453125, "learning_rate": 1.1477800483002045e-05, "loss": 0.3923, "step": 19230 }, { "epoch": 0.4378399289989304, "grad_norm": 64.5, "learning_rate": 1.1473156232584062e-05, "loss": 0.6952, "step": 19240 }, { "epoch": 0.43806749652959515, "grad_norm": 127.0, "learning_rate": 1.146851198216608e-05, "loss": 1.0268, "step": 19250 }, { "epoch": 0.43829506406025986, "grad_norm": 104.0, "learning_rate": 1.1463867731748097e-05, "loss": 0.3342, "step": 19260 }, { "epoch": 0.4385226315909246, "grad_norm": 149.0, "learning_rate": 1.1459223481330114e-05, "loss": 1.2058, "step": 19270 }, { "epoch": 0.43875019912158936, "grad_norm": 0.11962890625, "learning_rate": 1.1454579230912131e-05, "loss": 0.5729, "step": 19280 }, { "epoch": 0.4389777666522541, "grad_norm": 184.0, "learning_rate": 1.1449934980494149e-05, "loss": 0.6421, "step": 19290 }, { "epoch": 0.4392053341829188, "grad_norm": 358.0, "learning_rate": 1.1445290730076166e-05, "loss": 0.8291, "step": 19300 }, { "epoch": 0.4394329017135835, "grad_norm": 96.5, "learning_rate": 1.1440646479658184e-05, "loss": 0.6095, "step": 19310 }, { "epoch": 0.43966046924424823, "grad_norm": 89.0, "learning_rate": 1.1436002229240201e-05, "loss": 0.8361, "step": 19320 }, { "epoch": 0.43988803677491295, "grad_norm": 386.0, "learning_rate": 1.143135797882222e-05, "loss": 0.7607, "step": 19330 }, { "epoch": 0.44011560430557767, "grad_norm": 119.0, "learning_rate": 1.1426713728404237e-05, "loss": 0.7973, "step": 19340 }, { "epoch": 0.4403431718362424, "grad_norm": 239.0, "learning_rate": 1.1422069477986253e-05, "loss": 1.4038, "step": 19350 }, { "epoch": 0.4405707393669071, "grad_norm": 146.0, "learning_rate": 1.1417425227568271e-05, "loss": 0.5638, "step": 19360 }, { "epoch": 0.4407983068975719, "grad_norm": 0.00201416015625, "learning_rate": 1.1412780977150288e-05, "loss": 0.829, "step": 19370 }, { "epoch": 0.4410258744282366, "grad_norm": 42.25, "learning_rate": 1.1408136726732305e-05, "loss": 0.5206, "step": 19380 }, { "epoch": 0.4412534419589013, "grad_norm": 166.0, "learning_rate": 1.1403492476314324e-05, "loss": 1.0747, "step": 19390 }, { "epoch": 0.44148100948956603, "grad_norm": 0.21875, "learning_rate": 1.1398848225896341e-05, "loss": 0.6069, "step": 19400 }, { "epoch": 0.44170857702023075, "grad_norm": 94.5, "learning_rate": 1.139420397547836e-05, "loss": 0.3724, "step": 19410 }, { "epoch": 0.44193614455089547, "grad_norm": 85.0, "learning_rate": 1.1389559725060375e-05, "loss": 0.8044, "step": 19420 }, { "epoch": 0.4421637120815602, "grad_norm": 428.0, "learning_rate": 1.1384915474642392e-05, "loss": 0.7198, "step": 19430 }, { "epoch": 0.4423912796122249, "grad_norm": 0.031494140625, "learning_rate": 1.1380271224224411e-05, "loss": 1.1538, "step": 19440 }, { "epoch": 0.4426188471428896, "grad_norm": 84.5, "learning_rate": 1.1375626973806428e-05, "loss": 0.3187, "step": 19450 }, { "epoch": 0.4428464146735544, "grad_norm": 0.0115966796875, "learning_rate": 1.1370982723388447e-05, "loss": 1.1246, "step": 19460 }, { "epoch": 0.4430739822042191, "grad_norm": 109.5, "learning_rate": 1.1366338472970464e-05, "loss": 0.7567, "step": 19470 }, { "epoch": 0.44330154973488384, "grad_norm": 31.875, "learning_rate": 1.1361694222552481e-05, "loss": 0.4627, "step": 19480 }, { "epoch": 0.44352911726554856, "grad_norm": 2.578125, "learning_rate": 1.1357049972134498e-05, "loss": 0.6854, "step": 19490 }, { "epoch": 0.4437566847962133, "grad_norm": 73.5, "learning_rate": 1.1352405721716515e-05, "loss": 0.5787, "step": 19500 }, { "epoch": 0.443984252326878, "grad_norm": 107.5, "learning_rate": 1.1347761471298532e-05, "loss": 0.7934, "step": 19510 }, { "epoch": 0.4442118198575427, "grad_norm": 272.0, "learning_rate": 1.1343117220880551e-05, "loss": 0.7152, "step": 19520 }, { "epoch": 0.44443938738820743, "grad_norm": 145.0, "learning_rate": 1.1338472970462568e-05, "loss": 0.8625, "step": 19530 }, { "epoch": 0.44466695491887215, "grad_norm": 97.0, "learning_rate": 1.1333828720044587e-05, "loss": 0.4079, "step": 19540 }, { "epoch": 0.4448945224495369, "grad_norm": 1048.0, "learning_rate": 1.1329184469626604e-05, "loss": 1.1945, "step": 19550 }, { "epoch": 0.44512208998020164, "grad_norm": 146.0, "learning_rate": 1.132454021920862e-05, "loss": 0.9906, "step": 19560 }, { "epoch": 0.44534965751086636, "grad_norm": 76.5, "learning_rate": 1.1319895968790638e-05, "loss": 0.7638, "step": 19570 }, { "epoch": 0.4455772250415311, "grad_norm": 76.5, "learning_rate": 1.1315251718372655e-05, "loss": 1.132, "step": 19580 }, { "epoch": 0.4458047925721958, "grad_norm": 131.0, "learning_rate": 1.1310607467954674e-05, "loss": 0.5304, "step": 19590 }, { "epoch": 0.4460323601028605, "grad_norm": 99.5, "learning_rate": 1.130596321753669e-05, "loss": 1.0953, "step": 19600 }, { "epoch": 0.44625992763352523, "grad_norm": 184.0, "learning_rate": 1.1301318967118708e-05, "loss": 0.5375, "step": 19610 }, { "epoch": 0.44648749516418995, "grad_norm": 203.0, "learning_rate": 1.1296674716700727e-05, "loss": 0.4984, "step": 19620 }, { "epoch": 0.4467150626948547, "grad_norm": 0.00066375732421875, "learning_rate": 1.1292030466282742e-05, "loss": 1.0009, "step": 19630 }, { "epoch": 0.44694263022551944, "grad_norm": 139.0, "learning_rate": 1.1287386215864759e-05, "loss": 1.4666, "step": 19640 }, { "epoch": 0.44717019775618416, "grad_norm": 34.0, "learning_rate": 1.1282741965446778e-05, "loss": 0.1788, "step": 19650 }, { "epoch": 0.4473977652868489, "grad_norm": 0.0123291015625, "learning_rate": 1.1278097715028795e-05, "loss": 0.1462, "step": 19660 }, { "epoch": 0.4476253328175136, "grad_norm": 0.169921875, "learning_rate": 1.1273453464610814e-05, "loss": 0.8998, "step": 19670 }, { "epoch": 0.4478529003481783, "grad_norm": 232.0, "learning_rate": 1.126880921419283e-05, "loss": 0.9663, "step": 19680 }, { "epoch": 0.44808046787884304, "grad_norm": 948.0, "learning_rate": 1.1264164963774848e-05, "loss": 0.8919, "step": 19690 }, { "epoch": 0.44830803540950775, "grad_norm": 0.08642578125, "learning_rate": 1.1259520713356865e-05, "loss": 0.7653, "step": 19700 }, { "epoch": 0.4485356029401725, "grad_norm": 1.46875, "learning_rate": 1.1254876462938882e-05, "loss": 0.6203, "step": 19710 }, { "epoch": 0.44876317047083725, "grad_norm": 103.5, "learning_rate": 1.1250232212520899e-05, "loss": 0.8217, "step": 19720 }, { "epoch": 0.44899073800150197, "grad_norm": 0.00927734375, "learning_rate": 1.1245587962102918e-05, "loss": 0.8687, "step": 19730 }, { "epoch": 0.4492183055321667, "grad_norm": 213.0, "learning_rate": 1.1240943711684935e-05, "loss": 0.5222, "step": 19740 }, { "epoch": 0.4494458730628314, "grad_norm": 75.0, "learning_rate": 1.1236299461266953e-05, "loss": 0.7772, "step": 19750 }, { "epoch": 0.4496734405934961, "grad_norm": 0.0032958984375, "learning_rate": 1.123165521084897e-05, "loss": 1.6588, "step": 19760 }, { "epoch": 0.44990100812416084, "grad_norm": 31.75, "learning_rate": 1.1227010960430986e-05, "loss": 0.8505, "step": 19770 }, { "epoch": 0.45012857565482556, "grad_norm": 548.0, "learning_rate": 1.1222366710013005e-05, "loss": 1.2073, "step": 19780 }, { "epoch": 0.4503561431854903, "grad_norm": 180.0, "learning_rate": 1.1217722459595022e-05, "loss": 1.5482, "step": 19790 }, { "epoch": 0.450583710716155, "grad_norm": 0.00592041015625, "learning_rate": 1.121307820917704e-05, "loss": 1.486, "step": 19800 }, { "epoch": 0.45081127824681977, "grad_norm": 217.0, "learning_rate": 1.1208433958759057e-05, "loss": 0.7636, "step": 19810 }, { "epoch": 0.4510388457774845, "grad_norm": 26.75, "learning_rate": 1.1203789708341074e-05, "loss": 0.3005, "step": 19820 }, { "epoch": 0.4512664133081492, "grad_norm": 172.0, "learning_rate": 1.1199145457923093e-05, "loss": 1.0971, "step": 19830 }, { "epoch": 0.4514939808388139, "grad_norm": 143.0, "learning_rate": 1.1194501207505109e-05, "loss": 1.3792, "step": 19840 }, { "epoch": 0.45172154836947864, "grad_norm": 23.0, "learning_rate": 1.1189856957087126e-05, "loss": 0.8751, "step": 19850 }, { "epoch": 0.45194911590014336, "grad_norm": 240.0, "learning_rate": 1.1185212706669144e-05, "loss": 0.6203, "step": 19860 }, { "epoch": 0.4521766834308081, "grad_norm": 0.00921630859375, "learning_rate": 1.1180568456251161e-05, "loss": 0.9038, "step": 19870 }, { "epoch": 0.4524042509614728, "grad_norm": 18.5, "learning_rate": 1.117592420583318e-05, "loss": 0.559, "step": 19880 }, { "epoch": 0.4526318184921375, "grad_norm": 340.0, "learning_rate": 1.1171279955415197e-05, "loss": 1.44, "step": 19890 }, { "epoch": 0.4528593860228023, "grad_norm": 0.0025177001953125, "learning_rate": 1.1166635704997216e-05, "loss": 0.3392, "step": 19900 }, { "epoch": 0.453086953553467, "grad_norm": 144.0, "learning_rate": 1.1161991454579231e-05, "loss": 0.5448, "step": 19910 }, { "epoch": 0.4533145210841317, "grad_norm": 174.0, "learning_rate": 1.1157347204161248e-05, "loss": 0.3945, "step": 19920 }, { "epoch": 0.45354208861479645, "grad_norm": 103.5, "learning_rate": 1.1152702953743265e-05, "loss": 0.8188, "step": 19930 }, { "epoch": 0.45376965614546116, "grad_norm": 161.0, "learning_rate": 1.1148058703325284e-05, "loss": 0.8045, "step": 19940 }, { "epoch": 0.4539972236761259, "grad_norm": 117.0, "learning_rate": 1.1143414452907301e-05, "loss": 0.4256, "step": 19950 }, { "epoch": 0.4542247912067906, "grad_norm": 114.5, "learning_rate": 1.113877020248932e-05, "loss": 0.3405, "step": 19960 }, { "epoch": 0.4544523587374553, "grad_norm": 180.0, "learning_rate": 1.1134125952071337e-05, "loss": 0.8737, "step": 19970 }, { "epoch": 0.45467992626812004, "grad_norm": 1.0859375, "learning_rate": 1.1129481701653352e-05, "loss": 0.7511, "step": 19980 }, { "epoch": 0.4549074937987848, "grad_norm": 0.0074462890625, "learning_rate": 1.1124837451235371e-05, "loss": 0.4989, "step": 19990 }, { "epoch": 0.45513506132944953, "grad_norm": 159.0, "learning_rate": 1.1120193200817388e-05, "loss": 0.7811, "step": 20000 }, { "epoch": 0.45536262886011425, "grad_norm": 136.0, "learning_rate": 1.1115548950399407e-05, "loss": 0.9338, "step": 20010 }, { "epoch": 0.45559019639077897, "grad_norm": 0.039794921875, "learning_rate": 1.1110904699981424e-05, "loss": 0.6635, "step": 20020 }, { "epoch": 0.4558177639214437, "grad_norm": 100.0, "learning_rate": 1.1106260449563441e-05, "loss": 1.1098, "step": 20030 }, { "epoch": 0.4560453314521084, "grad_norm": 276.0, "learning_rate": 1.110161619914546e-05, "loss": 1.3526, "step": 20040 }, { "epoch": 0.4562728989827731, "grad_norm": 59.25, "learning_rate": 1.1096971948727475e-05, "loss": 0.9512, "step": 20050 }, { "epoch": 0.45650046651343784, "grad_norm": 10.375, "learning_rate": 1.1092327698309492e-05, "loss": 0.1804, "step": 20060 }, { "epoch": 0.45672803404410256, "grad_norm": 182.0, "learning_rate": 1.1087683447891511e-05, "loss": 0.8179, "step": 20070 }, { "epoch": 0.45695560157476733, "grad_norm": 158.0, "learning_rate": 1.1083039197473528e-05, "loss": 0.9248, "step": 20080 }, { "epoch": 0.45718316910543205, "grad_norm": 39.5, "learning_rate": 1.1078394947055547e-05, "loss": 0.8427, "step": 20090 }, { "epoch": 0.45741073663609677, "grad_norm": 0.1875, "learning_rate": 1.1073750696637564e-05, "loss": 0.7494, "step": 20100 }, { "epoch": 0.4576383041667615, "grad_norm": 0.041015625, "learning_rate": 1.1069106446219583e-05, "loss": 0.3357, "step": 20110 }, { "epoch": 0.4578658716974262, "grad_norm": 85.0, "learning_rate": 1.10644621958016e-05, "loss": 0.7039, "step": 20120 }, { "epoch": 0.4580934392280909, "grad_norm": 102.5, "learning_rate": 1.1059817945383615e-05, "loss": 0.3867, "step": 20130 }, { "epoch": 0.45832100675875564, "grad_norm": 130.0, "learning_rate": 1.1055173694965634e-05, "loss": 0.3962, "step": 20140 }, { "epoch": 0.45854857428942036, "grad_norm": 340.0, "learning_rate": 1.105052944454765e-05, "loss": 0.782, "step": 20150 }, { "epoch": 0.45877614182008514, "grad_norm": 11.9375, "learning_rate": 1.1045885194129668e-05, "loss": 0.9465, "step": 20160 }, { "epoch": 0.45900370935074986, "grad_norm": 0.095703125, "learning_rate": 1.1041240943711687e-05, "loss": 0.9731, "step": 20170 }, { "epoch": 0.4592312768814146, "grad_norm": 136.0, "learning_rate": 1.1036596693293704e-05, "loss": 0.7633, "step": 20180 }, { "epoch": 0.4594588444120793, "grad_norm": 268.0, "learning_rate": 1.1031952442875722e-05, "loss": 1.1034, "step": 20190 }, { "epoch": 0.459686411942744, "grad_norm": 8.75, "learning_rate": 1.1027308192457738e-05, "loss": 0.3671, "step": 20200 }, { "epoch": 0.45991397947340873, "grad_norm": 142.0, "learning_rate": 1.1022663942039755e-05, "loss": 1.1564, "step": 20210 }, { "epoch": 0.46014154700407345, "grad_norm": 43.25, "learning_rate": 1.1018019691621774e-05, "loss": 1.7516, "step": 20220 }, { "epoch": 0.46036911453473817, "grad_norm": 0.06103515625, "learning_rate": 1.101337544120379e-05, "loss": 1.276, "step": 20230 }, { "epoch": 0.4605966820654029, "grad_norm": 0.345703125, "learning_rate": 1.1008731190785808e-05, "loss": 0.8124, "step": 20240 }, { "epoch": 0.46082424959606766, "grad_norm": 164.0, "learning_rate": 1.1004086940367826e-05, "loss": 0.6578, "step": 20250 }, { "epoch": 0.4610518171267324, "grad_norm": 57.0, "learning_rate": 1.0999442689949844e-05, "loss": 1.6422, "step": 20260 }, { "epoch": 0.4612793846573971, "grad_norm": 0.002044677734375, "learning_rate": 1.0994798439531859e-05, "loss": 0.2822, "step": 20270 }, { "epoch": 0.4615069521880618, "grad_norm": 0.0034027099609375, "learning_rate": 1.0990154189113878e-05, "loss": 0.1614, "step": 20280 }, { "epoch": 0.46173451971872653, "grad_norm": 21.625, "learning_rate": 1.0985509938695895e-05, "loss": 0.6907, "step": 20290 }, { "epoch": 0.46196208724939125, "grad_norm": 199.0, "learning_rate": 1.0980865688277913e-05, "loss": 0.4411, "step": 20300 }, { "epoch": 0.46218965478005597, "grad_norm": 146.0, "learning_rate": 1.097622143785993e-05, "loss": 0.9095, "step": 20310 }, { "epoch": 0.4624172223107207, "grad_norm": 760.0, "learning_rate": 1.097157718744195e-05, "loss": 0.4325, "step": 20320 }, { "epoch": 0.4626447898413854, "grad_norm": 31.375, "learning_rate": 1.0966932937023966e-05, "loss": 0.4029, "step": 20330 }, { "epoch": 0.4628723573720502, "grad_norm": 9.0, "learning_rate": 1.0962288686605982e-05, "loss": 0.9176, "step": 20340 }, { "epoch": 0.4630999249027149, "grad_norm": 121.0, "learning_rate": 1.0957644436188e-05, "loss": 1.0816, "step": 20350 }, { "epoch": 0.4633274924333796, "grad_norm": 136.0, "learning_rate": 1.0953000185770017e-05, "loss": 0.8189, "step": 20360 }, { "epoch": 0.46355505996404434, "grad_norm": 0.0023651123046875, "learning_rate": 1.0948355935352035e-05, "loss": 0.6467, "step": 20370 }, { "epoch": 0.46378262749470905, "grad_norm": 224.0, "learning_rate": 1.0943711684934053e-05, "loss": 1.314, "step": 20380 }, { "epoch": 0.4640101950253738, "grad_norm": 17.125, "learning_rate": 1.093906743451607e-05, "loss": 0.368, "step": 20390 }, { "epoch": 0.4642377625560385, "grad_norm": 0.0255126953125, "learning_rate": 1.0934423184098089e-05, "loss": 0.7684, "step": 20400 }, { "epoch": 0.4644653300867032, "grad_norm": 0.000606536865234375, "learning_rate": 1.0929778933680104e-05, "loss": 0.7655, "step": 20410 }, { "epoch": 0.46469289761736793, "grad_norm": 0.009521484375, "learning_rate": 1.0925134683262121e-05, "loss": 0.5796, "step": 20420 }, { "epoch": 0.4649204651480327, "grad_norm": 93.5, "learning_rate": 1.092049043284414e-05, "loss": 1.0314, "step": 20430 }, { "epoch": 0.4651480326786974, "grad_norm": 67.0, "learning_rate": 1.0915846182426157e-05, "loss": 0.9501, "step": 20440 }, { "epoch": 0.46537560020936214, "grad_norm": 65.5, "learning_rate": 1.0911201932008176e-05, "loss": 0.9243, "step": 20450 }, { "epoch": 0.46560316774002686, "grad_norm": 189.0, "learning_rate": 1.0906557681590193e-05, "loss": 0.9611, "step": 20460 }, { "epoch": 0.4658307352706916, "grad_norm": 8.1875, "learning_rate": 1.090191343117221e-05, "loss": 0.9024, "step": 20470 }, { "epoch": 0.4660583028013563, "grad_norm": 0.0003566741943359375, "learning_rate": 1.0897269180754226e-05, "loss": 0.132, "step": 20480 }, { "epoch": 0.466285870332021, "grad_norm": 0.36328125, "learning_rate": 1.0892624930336244e-05, "loss": 0.8165, "step": 20490 }, { "epoch": 0.46651343786268573, "grad_norm": 0.002716064453125, "learning_rate": 1.0887980679918261e-05, "loss": 0.9115, "step": 20500 }, { "epoch": 0.46674100539335045, "grad_norm": 264.0, "learning_rate": 1.088333642950028e-05, "loss": 1.2762, "step": 20510 }, { "epoch": 0.4669685729240152, "grad_norm": 448.0, "learning_rate": 1.0878692179082297e-05, "loss": 0.8869, "step": 20520 }, { "epoch": 0.46719614045467994, "grad_norm": 91.0, "learning_rate": 1.0874047928664316e-05, "loss": 0.3867, "step": 20530 }, { "epoch": 0.46742370798534466, "grad_norm": 11.75, "learning_rate": 1.0869403678246333e-05, "loss": 0.4353, "step": 20540 }, { "epoch": 0.4676512755160094, "grad_norm": 0.0022430419921875, "learning_rate": 1.0864759427828348e-05, "loss": 0.5758, "step": 20550 }, { "epoch": 0.4678788430466741, "grad_norm": 147.0, "learning_rate": 1.0860115177410367e-05, "loss": 1.1936, "step": 20560 }, { "epoch": 0.4681064105773388, "grad_norm": 0.00116729736328125, "learning_rate": 1.0855470926992384e-05, "loss": 1.103, "step": 20570 }, { "epoch": 0.46833397810800353, "grad_norm": 98.0, "learning_rate": 1.0850826676574401e-05, "loss": 0.6932, "step": 20580 }, { "epoch": 0.46856154563866825, "grad_norm": 0.004180908203125, "learning_rate": 1.084618242615642e-05, "loss": 1.0805, "step": 20590 }, { "epoch": 0.468789113169333, "grad_norm": 8.0, "learning_rate": 1.0841538175738437e-05, "loss": 0.0198, "step": 20600 }, { "epoch": 0.46901668069999775, "grad_norm": 0.2734375, "learning_rate": 1.0836893925320456e-05, "loss": 0.8611, "step": 20610 }, { "epoch": 0.46924424823066246, "grad_norm": 0.05517578125, "learning_rate": 1.0832249674902471e-05, "loss": 0.9615, "step": 20620 }, { "epoch": 0.4694718157613272, "grad_norm": 0.9375, "learning_rate": 1.0827605424484488e-05, "loss": 0.6407, "step": 20630 }, { "epoch": 0.4696993832919919, "grad_norm": 13.125, "learning_rate": 1.0822961174066507e-05, "loss": 0.7758, "step": 20640 }, { "epoch": 0.4699269508226566, "grad_norm": 75.5, "learning_rate": 1.0818316923648524e-05, "loss": 1.1411, "step": 20650 }, { "epoch": 0.47015451835332134, "grad_norm": 0.140625, "learning_rate": 1.0813672673230543e-05, "loss": 0.6177, "step": 20660 }, { "epoch": 0.47038208588398606, "grad_norm": 59.25, "learning_rate": 1.080902842281256e-05, "loss": 0.4813, "step": 20670 }, { "epoch": 0.4706096534146508, "grad_norm": 233.0, "learning_rate": 1.0804384172394577e-05, "loss": 0.7685, "step": 20680 }, { "epoch": 0.47083722094531555, "grad_norm": 80.5, "learning_rate": 1.0799739921976594e-05, "loss": 2.3902, "step": 20690 }, { "epoch": 0.47106478847598027, "grad_norm": 195.0, "learning_rate": 1.0795095671558611e-05, "loss": 2.2526, "step": 20700 }, { "epoch": 0.471292356006645, "grad_norm": 103.5, "learning_rate": 1.0790451421140628e-05, "loss": 0.8075, "step": 20710 }, { "epoch": 0.4715199235373097, "grad_norm": 159.0, "learning_rate": 1.0785807170722647e-05, "loss": 0.2975, "step": 20720 }, { "epoch": 0.4717474910679744, "grad_norm": 0.0191650390625, "learning_rate": 1.0781162920304664e-05, "loss": 0.3904, "step": 20730 }, { "epoch": 0.47197505859863914, "grad_norm": 94.0, "learning_rate": 1.0776518669886682e-05, "loss": 0.9472, "step": 20740 }, { "epoch": 0.47220262612930386, "grad_norm": 147.0, "learning_rate": 1.07718744194687e-05, "loss": 1.5331, "step": 20750 }, { "epoch": 0.4724301936599686, "grad_norm": 23.0, "learning_rate": 1.0767230169050715e-05, "loss": 0.7488, "step": 20760 }, { "epoch": 0.4726577611906333, "grad_norm": 56.0, "learning_rate": 1.0762585918632734e-05, "loss": 0.7023, "step": 20770 }, { "epoch": 0.47288532872129807, "grad_norm": 428.0, "learning_rate": 1.075794166821475e-05, "loss": 1.1723, "step": 20780 }, { "epoch": 0.4731128962519628, "grad_norm": 5.75, "learning_rate": 1.075329741779677e-05, "loss": 1.0845, "step": 20790 }, { "epoch": 0.4733404637826275, "grad_norm": 380.0, "learning_rate": 1.0748653167378787e-05, "loss": 0.8337, "step": 20800 }, { "epoch": 0.4735680313132922, "grad_norm": 136.0, "learning_rate": 1.0744008916960804e-05, "loss": 1.8259, "step": 20810 }, { "epoch": 0.47379559884395694, "grad_norm": 43.0, "learning_rate": 1.0739364666542822e-05, "loss": 0.8661, "step": 20820 }, { "epoch": 0.47402316637462166, "grad_norm": 138.0, "learning_rate": 1.0734720416124838e-05, "loss": 0.3491, "step": 20830 }, { "epoch": 0.4742507339052864, "grad_norm": 93.5, "learning_rate": 1.0730076165706855e-05, "loss": 0.1836, "step": 20840 }, { "epoch": 0.4744783014359511, "grad_norm": 0.2119140625, "learning_rate": 1.0725431915288873e-05, "loss": 0.5908, "step": 20850 }, { "epoch": 0.4747058689666158, "grad_norm": 54.0, "learning_rate": 1.072078766487089e-05, "loss": 0.2546, "step": 20860 }, { "epoch": 0.4749334364972806, "grad_norm": 0.003814697265625, "learning_rate": 1.071614341445291e-05, "loss": 0.2712, "step": 20870 }, { "epoch": 0.4751610040279453, "grad_norm": 0.5, "learning_rate": 1.0711499164034926e-05, "loss": 0.6389, "step": 20880 }, { "epoch": 0.47538857155861003, "grad_norm": 0.259765625, "learning_rate": 1.0706854913616943e-05, "loss": 0.2218, "step": 20890 }, { "epoch": 0.47561613908927475, "grad_norm": 0.01080322265625, "learning_rate": 1.070221066319896e-05, "loss": 1.0831, "step": 20900 }, { "epoch": 0.47584370661993947, "grad_norm": 104.0, "learning_rate": 1.0697566412780978e-05, "loss": 0.3012, "step": 20910 }, { "epoch": 0.4760712741506042, "grad_norm": 0.0703125, "learning_rate": 1.0692922162362995e-05, "loss": 0.4566, "step": 20920 }, { "epoch": 0.4762988416812689, "grad_norm": 452.0, "learning_rate": 1.0688277911945013e-05, "loss": 0.811, "step": 20930 }, { "epoch": 0.4765264092119336, "grad_norm": 206.0, "learning_rate": 1.068363366152703e-05, "loss": 0.4731, "step": 20940 }, { "epoch": 0.47675397674259834, "grad_norm": 418.0, "learning_rate": 1.0678989411109049e-05, "loss": 1.0727, "step": 20950 }, { "epoch": 0.4769815442732631, "grad_norm": 0.1103515625, "learning_rate": 1.0674345160691066e-05, "loss": 0.9641, "step": 20960 }, { "epoch": 0.47720911180392783, "grad_norm": 0.03564453125, "learning_rate": 1.0669700910273082e-05, "loss": 0.7316, "step": 20970 }, { "epoch": 0.47743667933459255, "grad_norm": 106.5, "learning_rate": 1.06650566598551e-05, "loss": 0.7342, "step": 20980 }, { "epoch": 0.47766424686525727, "grad_norm": 55.25, "learning_rate": 1.0660412409437117e-05, "loss": 0.4166, "step": 20990 }, { "epoch": 0.477891814395922, "grad_norm": 5.4375, "learning_rate": 1.0655768159019136e-05, "loss": 0.6077, "step": 21000 }, { "epoch": 0.4781193819265867, "grad_norm": 62.5, "learning_rate": 1.0651123908601153e-05, "loss": 0.9724, "step": 21010 }, { "epoch": 0.4783469494572514, "grad_norm": 9.5625, "learning_rate": 1.064647965818317e-05, "loss": 0.6352, "step": 21020 }, { "epoch": 0.47857451698791614, "grad_norm": 32.25, "learning_rate": 1.0641835407765189e-05, "loss": 0.9509, "step": 21030 }, { "epoch": 0.47880208451858086, "grad_norm": 123.0, "learning_rate": 1.0637191157347204e-05, "loss": 0.8612, "step": 21040 }, { "epoch": 0.47902965204924564, "grad_norm": 0.004669189453125, "learning_rate": 1.0632546906929221e-05, "loss": 1.062, "step": 21050 }, { "epoch": 0.47925721957991035, "grad_norm": 0.205078125, "learning_rate": 1.062790265651124e-05, "loss": 0.4264, "step": 21060 }, { "epoch": 0.4794847871105751, "grad_norm": 124.0, "learning_rate": 1.0623258406093257e-05, "loss": 1.4206, "step": 21070 }, { "epoch": 0.4797123546412398, "grad_norm": 131.0, "learning_rate": 1.0618614155675276e-05, "loss": 1.0404, "step": 21080 }, { "epoch": 0.4799399221719045, "grad_norm": 0.89453125, "learning_rate": 1.0613969905257293e-05, "loss": 0.5529, "step": 21090 }, { "epoch": 0.48016748970256923, "grad_norm": 62.25, "learning_rate": 1.0609325654839312e-05, "loss": 0.6335, "step": 21100 }, { "epoch": 0.48039505723323395, "grad_norm": 226.0, "learning_rate": 1.0604681404421327e-05, "loss": 1.5953, "step": 21110 }, { "epoch": 0.48062262476389866, "grad_norm": 84.0, "learning_rate": 1.0600037154003344e-05, "loss": 0.4039, "step": 21120 }, { "epoch": 0.48085019229456344, "grad_norm": 0.01226806640625, "learning_rate": 1.0595392903585361e-05, "loss": 0.494, "step": 21130 }, { "epoch": 0.48107775982522816, "grad_norm": 4.28125, "learning_rate": 1.059074865316738e-05, "loss": 1.2988, "step": 21140 }, { "epoch": 0.4813053273558929, "grad_norm": 105.5, "learning_rate": 1.0586104402749397e-05, "loss": 0.6353, "step": 21150 }, { "epoch": 0.4815328948865576, "grad_norm": 193.0, "learning_rate": 1.0581460152331416e-05, "loss": 0.756, "step": 21160 }, { "epoch": 0.4817604624172223, "grad_norm": 114.5, "learning_rate": 1.0576815901913433e-05, "loss": 0.8195, "step": 21170 }, { "epoch": 0.48198802994788703, "grad_norm": 0.0024261474609375, "learning_rate": 1.0572171651495448e-05, "loss": 0.6462, "step": 21180 }, { "epoch": 0.48221559747855175, "grad_norm": 1.5390625, "learning_rate": 1.0567527401077467e-05, "loss": 0.3883, "step": 21190 }, { "epoch": 0.48244316500921647, "grad_norm": 239.0, "learning_rate": 1.0562883150659484e-05, "loss": 0.9778, "step": 21200 }, { "epoch": 0.4826707325398812, "grad_norm": 35.5, "learning_rate": 1.0558238900241503e-05, "loss": 1.1536, "step": 21210 }, { "epoch": 0.48289830007054596, "grad_norm": 1.0078125, "learning_rate": 1.055359464982352e-05, "loss": 0.6755, "step": 21220 }, { "epoch": 0.4831258676012107, "grad_norm": 109.0, "learning_rate": 1.0548950399405537e-05, "loss": 0.97, "step": 21230 }, { "epoch": 0.4833534351318754, "grad_norm": 0.546875, "learning_rate": 1.0544306148987556e-05, "loss": 0.0725, "step": 21240 }, { "epoch": 0.4835810026625401, "grad_norm": 0.01007080078125, "learning_rate": 1.0539661898569571e-05, "loss": 0.9112, "step": 21250 }, { "epoch": 0.48380857019320483, "grad_norm": 142.0, "learning_rate": 1.0535017648151588e-05, "loss": 0.4495, "step": 21260 }, { "epoch": 0.48403613772386955, "grad_norm": 98.5, "learning_rate": 1.0530373397733607e-05, "loss": 0.84, "step": 21270 }, { "epoch": 0.48426370525453427, "grad_norm": 66.5, "learning_rate": 1.0525729147315624e-05, "loss": 0.9419, "step": 21280 }, { "epoch": 0.484491272785199, "grad_norm": 620.0, "learning_rate": 1.0521084896897643e-05, "loss": 1.346, "step": 21290 }, { "epoch": 0.4847188403158637, "grad_norm": 12.25, "learning_rate": 1.051644064647966e-05, "loss": 0.5872, "step": 21300 }, { "epoch": 0.4849464078465285, "grad_norm": 243.0, "learning_rate": 1.0511796396061678e-05, "loss": 1.2269, "step": 21310 }, { "epoch": 0.4851739753771932, "grad_norm": 286.0, "learning_rate": 1.0507152145643694e-05, "loss": 1.1672, "step": 21320 }, { "epoch": 0.4854015429078579, "grad_norm": 408.0, "learning_rate": 1.050250789522571e-05, "loss": 0.8389, "step": 21330 }, { "epoch": 0.48562911043852264, "grad_norm": 392.0, "learning_rate": 1.049786364480773e-05, "loss": 0.988, "step": 21340 }, { "epoch": 0.48585667796918736, "grad_norm": 60.0, "learning_rate": 1.0493219394389747e-05, "loss": 1.0203, "step": 21350 }, { "epoch": 0.4860842454998521, "grad_norm": 0.02197265625, "learning_rate": 1.0488575143971764e-05, "loss": 0.4647, "step": 21360 }, { "epoch": 0.4863118130305168, "grad_norm": 11.0625, "learning_rate": 1.0483930893553782e-05, "loss": 0.3202, "step": 21370 }, { "epoch": 0.4865393805611815, "grad_norm": 199.0, "learning_rate": 1.04792866431358e-05, "loss": 1.0421, "step": 21380 }, { "epoch": 0.48676694809184623, "grad_norm": 115.5, "learning_rate": 1.0474642392717815e-05, "loss": 0.2598, "step": 21390 }, { "epoch": 0.486994515622511, "grad_norm": 0.23828125, "learning_rate": 1.0469998142299834e-05, "loss": 0.2841, "step": 21400 }, { "epoch": 0.4872220831531757, "grad_norm": 118.0, "learning_rate": 1.046535389188185e-05, "loss": 0.2243, "step": 21410 }, { "epoch": 0.48744965068384044, "grad_norm": 240.0, "learning_rate": 1.046070964146387e-05, "loss": 1.147, "step": 21420 }, { "epoch": 0.48767721821450516, "grad_norm": 88.0, "learning_rate": 1.0456065391045886e-05, "loss": 0.5047, "step": 21430 }, { "epoch": 0.4879047857451699, "grad_norm": 193.0, "learning_rate": 1.0451421140627903e-05, "loss": 1.4793, "step": 21440 }, { "epoch": 0.4881323532758346, "grad_norm": 0.236328125, "learning_rate": 1.0446776890209922e-05, "loss": 0.3805, "step": 21450 }, { "epoch": 0.4883599208064993, "grad_norm": 0.002716064453125, "learning_rate": 1.0442132639791938e-05, "loss": 0.4939, "step": 21460 }, { "epoch": 0.48858748833716403, "grad_norm": 0.02001953125, "learning_rate": 1.0437488389373955e-05, "loss": 0.6053, "step": 21470 }, { "epoch": 0.48881505586782875, "grad_norm": 0.84375, "learning_rate": 1.0432844138955973e-05, "loss": 0.8262, "step": 21480 }, { "epoch": 0.4890426233984935, "grad_norm": 91.0, "learning_rate": 1.042819988853799e-05, "loss": 0.8544, "step": 21490 }, { "epoch": 0.48927019092915824, "grad_norm": 132.0, "learning_rate": 1.042355563812001e-05, "loss": 0.6098, "step": 21500 }, { "epoch": 0.48949775845982296, "grad_norm": 308.0, "learning_rate": 1.0418911387702026e-05, "loss": 1.414, "step": 21510 }, { "epoch": 0.4897253259904877, "grad_norm": 216.0, "learning_rate": 1.0414267137284045e-05, "loss": 0.759, "step": 21520 }, { "epoch": 0.4899528935211524, "grad_norm": 370.0, "learning_rate": 1.040962288686606e-05, "loss": 0.8881, "step": 21530 }, { "epoch": 0.4901804610518171, "grad_norm": 187.0, "learning_rate": 1.0404978636448077e-05, "loss": 0.6893, "step": 21540 }, { "epoch": 0.49040802858248184, "grad_norm": 0.6875, "learning_rate": 1.0400334386030096e-05, "loss": 0.6515, "step": 21550 }, { "epoch": 0.49063559611314655, "grad_norm": 51.0, "learning_rate": 1.0395690135612113e-05, "loss": 1.3642, "step": 21560 }, { "epoch": 0.4908631636438113, "grad_norm": 0.0009002685546875, "learning_rate": 1.039104588519413e-05, "loss": 0.1521, "step": 21570 }, { "epoch": 0.49109073117447605, "grad_norm": 244.0, "learning_rate": 1.0386401634776149e-05, "loss": 1.5877, "step": 21580 }, { "epoch": 0.49131829870514077, "grad_norm": 184.0, "learning_rate": 1.0381757384358166e-05, "loss": 0.7904, "step": 21590 }, { "epoch": 0.4915458662358055, "grad_norm": 0.1923828125, "learning_rate": 1.0377113133940181e-05, "loss": 1.0193, "step": 21600 }, { "epoch": 0.4917734337664702, "grad_norm": 155.0, "learning_rate": 1.03724688835222e-05, "loss": 1.4354, "step": 21610 }, { "epoch": 0.4920010012971349, "grad_norm": 180.0, "learning_rate": 1.0367824633104217e-05, "loss": 1.8819, "step": 21620 }, { "epoch": 0.49222856882779964, "grad_norm": 47.5, "learning_rate": 1.0363180382686236e-05, "loss": 0.6474, "step": 21630 }, { "epoch": 0.49245613635846436, "grad_norm": 0.0380859375, "learning_rate": 1.0358536132268253e-05, "loss": 0.4997, "step": 21640 }, { "epoch": 0.4926837038891291, "grad_norm": 75.0, "learning_rate": 1.0353891881850272e-05, "loss": 0.9377, "step": 21650 }, { "epoch": 0.49291127141979385, "grad_norm": 286.0, "learning_rate": 1.0349247631432289e-05, "loss": 0.8414, "step": 21660 }, { "epoch": 0.49313883895045857, "grad_norm": 23.25, "learning_rate": 1.0344603381014304e-05, "loss": 0.5696, "step": 21670 }, { "epoch": 0.4933664064811233, "grad_norm": 0.29296875, "learning_rate": 1.0339959130596321e-05, "loss": 0.6264, "step": 21680 }, { "epoch": 0.493593974011788, "grad_norm": 0.01458740234375, "learning_rate": 1.033531488017834e-05, "loss": 1.57, "step": 21690 }, { "epoch": 0.4938215415424527, "grad_norm": 0.796875, "learning_rate": 1.0330670629760357e-05, "loss": 1.0515, "step": 21700 }, { "epoch": 0.49404910907311744, "grad_norm": 0.00439453125, "learning_rate": 1.0326026379342376e-05, "loss": 0.8273, "step": 21710 }, { "epoch": 0.49427667660378216, "grad_norm": 251.0, "learning_rate": 1.0321382128924393e-05, "loss": 1.6267, "step": 21720 }, { "epoch": 0.4945042441344469, "grad_norm": 6.03125, "learning_rate": 1.0316737878506412e-05, "loss": 0.6707, "step": 21730 }, { "epoch": 0.4947318116651116, "grad_norm": 10.1875, "learning_rate": 1.0312093628088427e-05, "loss": 0.8452, "step": 21740 }, { "epoch": 0.49495937919577637, "grad_norm": 228.0, "learning_rate": 1.0307449377670444e-05, "loss": 0.5863, "step": 21750 }, { "epoch": 0.4951869467264411, "grad_norm": 7.28125, "learning_rate": 1.0302805127252463e-05, "loss": 1.1342, "step": 21760 }, { "epoch": 0.4954145142571058, "grad_norm": 0.0014801025390625, "learning_rate": 1.029816087683448e-05, "loss": 0.5463, "step": 21770 }, { "epoch": 0.4956420817877705, "grad_norm": 0.5625, "learning_rate": 1.0293516626416497e-05, "loss": 1.4651, "step": 21780 }, { "epoch": 0.49586964931843525, "grad_norm": 17.875, "learning_rate": 1.0288872375998516e-05, "loss": 0.5195, "step": 21790 }, { "epoch": 0.49609721684909996, "grad_norm": 5.46875, "learning_rate": 1.0284228125580533e-05, "loss": 0.5849, "step": 21800 }, { "epoch": 0.4963247843797647, "grad_norm": 202.0, "learning_rate": 1.0279583875162548e-05, "loss": 0.4254, "step": 21810 }, { "epoch": 0.4965523519104294, "grad_norm": 0.000896453857421875, "learning_rate": 1.0274939624744567e-05, "loss": 1.0013, "step": 21820 }, { "epoch": 0.4967799194410941, "grad_norm": 74.5, "learning_rate": 1.0270295374326584e-05, "loss": 0.94, "step": 21830 }, { "epoch": 0.4970074869717589, "grad_norm": 0.00089263916015625, "learning_rate": 1.0265651123908603e-05, "loss": 0.7385, "step": 21840 }, { "epoch": 0.4972350545024236, "grad_norm": 100.0, "learning_rate": 1.026100687349062e-05, "loss": 0.3209, "step": 21850 }, { "epoch": 0.49746262203308833, "grad_norm": 51.25, "learning_rate": 1.0256362623072638e-05, "loss": 0.7604, "step": 21860 }, { "epoch": 0.49769018956375305, "grad_norm": 142.0, "learning_rate": 1.0251718372654655e-05, "loss": 0.6284, "step": 21870 }, { "epoch": 0.49791775709441777, "grad_norm": 195.0, "learning_rate": 1.024707412223667e-05, "loss": 0.6945, "step": 21880 }, { "epoch": 0.4981453246250825, "grad_norm": 172.0, "learning_rate": 1.024242987181869e-05, "loss": 1.3041, "step": 21890 }, { "epoch": 0.4983728921557472, "grad_norm": 132.0, "learning_rate": 1.0237785621400707e-05, "loss": 0.7655, "step": 21900 }, { "epoch": 0.4986004596864119, "grad_norm": 194.0, "learning_rate": 1.0233141370982724e-05, "loss": 0.5915, "step": 21910 }, { "epoch": 0.49882802721707664, "grad_norm": 80.0, "learning_rate": 1.0228497120564742e-05, "loss": 0.4883, "step": 21920 }, { "epoch": 0.4990555947477414, "grad_norm": 134.0, "learning_rate": 1.022385287014676e-05, "loss": 0.647, "step": 21930 }, { "epoch": 0.49928316227840613, "grad_norm": 194.0, "learning_rate": 1.0219208619728778e-05, "loss": 1.392, "step": 21940 }, { "epoch": 0.49951072980907085, "grad_norm": 72.5, "learning_rate": 1.0214564369310794e-05, "loss": 0.6214, "step": 21950 }, { "epoch": 0.49973829733973557, "grad_norm": 221.0, "learning_rate": 1.020992011889281e-05, "loss": 1.1992, "step": 21960 }, { "epoch": 0.4999658648704003, "grad_norm": 214.0, "learning_rate": 1.020527586847483e-05, "loss": 0.2643, "step": 21970 }, { "epoch": 0.5001934324010651, "grad_norm": 454.0, "learning_rate": 1.0200631618056846e-05, "loss": 0.5856, "step": 21980 }, { "epoch": 0.5004209999317297, "grad_norm": 0.84765625, "learning_rate": 1.0195987367638863e-05, "loss": 0.2545, "step": 21990 }, { "epoch": 0.5006485674623945, "grad_norm": 0.0257568359375, "learning_rate": 1.0191343117220882e-05, "loss": 0.5556, "step": 22000 }, { "epoch": 0.5008761349930592, "grad_norm": 0.0439453125, "learning_rate": 1.01866988668029e-05, "loss": 0.4605, "step": 22010 }, { "epoch": 0.5011037025237239, "grad_norm": 120.0, "learning_rate": 1.0182054616384915e-05, "loss": 1.0527, "step": 22020 }, { "epoch": 0.5013312700543886, "grad_norm": 32.5, "learning_rate": 1.0177410365966933e-05, "loss": 0.8035, "step": 22030 }, { "epoch": 0.5015588375850534, "grad_norm": 0.90234375, "learning_rate": 1.017276611554895e-05, "loss": 0.3515, "step": 22040 }, { "epoch": 0.501786405115718, "grad_norm": 164.0, "learning_rate": 1.016812186513097e-05, "loss": 0.7802, "step": 22050 }, { "epoch": 0.5020139726463828, "grad_norm": 684.0, "learning_rate": 1.0163477614712986e-05, "loss": 0.5205, "step": 22060 }, { "epoch": 0.5022415401770476, "grad_norm": 0.66015625, "learning_rate": 1.0158833364295005e-05, "loss": 0.99, "step": 22070 }, { "epoch": 0.5024691077077122, "grad_norm": 79.5, "learning_rate": 1.0154189113877022e-05, "loss": 1.0367, "step": 22080 }, { "epoch": 0.502696675238377, "grad_norm": 155.0, "learning_rate": 1.0149544863459037e-05, "loss": 0.6468, "step": 22090 }, { "epoch": 0.5029242427690417, "grad_norm": 51.25, "learning_rate": 1.0144900613041056e-05, "loss": 0.6571, "step": 22100 }, { "epoch": 0.5031518102997065, "grad_norm": 0.06201171875, "learning_rate": 1.0140256362623073e-05, "loss": 0.668, "step": 22110 }, { "epoch": 0.5033793778303711, "grad_norm": 155.0, "learning_rate": 1.013561211220509e-05, "loss": 1.2999, "step": 22120 }, { "epoch": 0.5036069453610359, "grad_norm": 151.0, "learning_rate": 1.0130967861787109e-05, "loss": 0.6087, "step": 22130 }, { "epoch": 0.5038345128917006, "grad_norm": 216.0, "learning_rate": 1.0126323611369126e-05, "loss": 0.5312, "step": 22140 }, { "epoch": 0.5040620804223653, "grad_norm": 752.0, "learning_rate": 1.0121679360951145e-05, "loss": 1.02, "step": 22150 }, { "epoch": 0.5042896479530301, "grad_norm": 0.00160980224609375, "learning_rate": 1.011703511053316e-05, "loss": 0.5102, "step": 22160 }, { "epoch": 0.5045172154836948, "grad_norm": 1.3515625, "learning_rate": 1.0112390860115177e-05, "loss": 1.1732, "step": 22170 }, { "epoch": 0.5047447830143595, "grad_norm": 0.07177734375, "learning_rate": 1.0107746609697196e-05, "loss": 0.329, "step": 22180 }, { "epoch": 0.5049723505450242, "grad_norm": 148.0, "learning_rate": 1.0103102359279213e-05, "loss": 0.4687, "step": 22190 }, { "epoch": 0.505199918075689, "grad_norm": 228.0, "learning_rate": 1.0098458108861232e-05, "loss": 0.5934, "step": 22200 }, { "epoch": 0.5054274856063536, "grad_norm": 168.0, "learning_rate": 1.0093813858443249e-05, "loss": 0.6435, "step": 22210 }, { "epoch": 0.5056550531370184, "grad_norm": 396.0, "learning_rate": 1.0089169608025266e-05, "loss": 0.8552, "step": 22220 }, { "epoch": 0.5058826206676831, "grad_norm": 0.058349609375, "learning_rate": 1.0084525357607283e-05, "loss": 0.6292, "step": 22230 }, { "epoch": 0.5061101881983479, "grad_norm": 50.5, "learning_rate": 1.00798811071893e-05, "loss": 0.5905, "step": 22240 }, { "epoch": 0.5063377557290126, "grad_norm": 6.65625, "learning_rate": 1.0075236856771317e-05, "loss": 0.5234, "step": 22250 }, { "epoch": 0.5065653232596773, "grad_norm": 1.2109375, "learning_rate": 1.0070592606353336e-05, "loss": 0.8258, "step": 22260 }, { "epoch": 0.5067928907903421, "grad_norm": 162.0, "learning_rate": 1.0065948355935353e-05, "loss": 0.2971, "step": 22270 }, { "epoch": 0.5070204583210067, "grad_norm": 143.0, "learning_rate": 1.0061304105517372e-05, "loss": 1.2603, "step": 22280 }, { "epoch": 0.5072480258516715, "grad_norm": 6.09375, "learning_rate": 1.0056659855099389e-05, "loss": 0.897, "step": 22290 }, { "epoch": 0.5074755933823362, "grad_norm": 212.0, "learning_rate": 1.0052015604681404e-05, "loss": 0.9263, "step": 22300 }, { "epoch": 0.5077031609130009, "grad_norm": 0.000972747802734375, "learning_rate": 1.0047371354263423e-05, "loss": 0.3934, "step": 22310 }, { "epoch": 0.5079307284436657, "grad_norm": 2.828125, "learning_rate": 1.004272710384544e-05, "loss": 0.2122, "step": 22320 }, { "epoch": 0.5081582959743304, "grad_norm": 0.0010986328125, "learning_rate": 1.0038082853427457e-05, "loss": 1.3057, "step": 22330 }, { "epoch": 0.5083858635049952, "grad_norm": 18.75, "learning_rate": 1.0033438603009476e-05, "loss": 1.0187, "step": 22340 }, { "epoch": 0.5086134310356598, "grad_norm": 213.0, "learning_rate": 1.0028794352591493e-05, "loss": 1.6953, "step": 22350 }, { "epoch": 0.5088409985663246, "grad_norm": 6.5625, "learning_rate": 1.0024150102173511e-05, "loss": 1.0856, "step": 22360 }, { "epoch": 0.5090685660969893, "grad_norm": 79.5, "learning_rate": 1.0019505851755527e-05, "loss": 1.5442, "step": 22370 }, { "epoch": 0.509296133627654, "grad_norm": 207.0, "learning_rate": 1.0014861601337544e-05, "loss": 0.6377, "step": 22380 }, { "epoch": 0.5095237011583187, "grad_norm": 0.000926971435546875, "learning_rate": 1.0010217350919563e-05, "loss": 0.8089, "step": 22390 }, { "epoch": 0.5097512686889835, "grad_norm": 0.0169677734375, "learning_rate": 1.000557310050158e-05, "loss": 1.1553, "step": 22400 }, { "epoch": 0.5099788362196482, "grad_norm": 164.0, "learning_rate": 1.0000928850083598e-05, "loss": 1.5977, "step": 22410 }, { "epoch": 0.5102064037503129, "grad_norm": 66.0, "learning_rate": 9.996284599665614e-06, "loss": 0.9482, "step": 22420 }, { "epoch": 0.5104339712809777, "grad_norm": 247.0, "learning_rate": 9.991640349247633e-06, "loss": 0.9757, "step": 22430 }, { "epoch": 0.5106615388116423, "grad_norm": 78.0, "learning_rate": 9.98699609882965e-06, "loss": 0.4563, "step": 22440 }, { "epoch": 0.5108891063423071, "grad_norm": 185.0, "learning_rate": 9.982351848411668e-06, "loss": 1.4184, "step": 22450 }, { "epoch": 0.5111166738729718, "grad_norm": 129.0, "learning_rate": 9.977707597993684e-06, "loss": 0.8275, "step": 22460 }, { "epoch": 0.5113442414036365, "grad_norm": 0.0625, "learning_rate": 9.973063347575702e-06, "loss": 0.592, "step": 22470 }, { "epoch": 0.5115718089343012, "grad_norm": 218.0, "learning_rate": 9.96841909715772e-06, "loss": 0.7545, "step": 22480 }, { "epoch": 0.511799376464966, "grad_norm": 92.5, "learning_rate": 9.963774846739737e-06, "loss": 0.3991, "step": 22490 }, { "epoch": 0.5120269439956308, "grad_norm": 81.5, "learning_rate": 9.959130596321754e-06, "loss": 0.9894, "step": 22500 }, { "epoch": 0.5122545115262954, "grad_norm": 394.0, "learning_rate": 9.954486345903772e-06, "loss": 1.798, "step": 22510 }, { "epoch": 0.5124820790569602, "grad_norm": 262.0, "learning_rate": 9.94984209548579e-06, "loss": 0.5383, "step": 22520 }, { "epoch": 0.5127096465876249, "grad_norm": 57.75, "learning_rate": 9.945197845067806e-06, "loss": 1.9652, "step": 22530 }, { "epoch": 0.5129372141182896, "grad_norm": 0.04541015625, "learning_rate": 9.940553594649825e-06, "loss": 0.8438, "step": 22540 }, { "epoch": 0.5131647816489543, "grad_norm": 14.4375, "learning_rate": 9.935909344231842e-06, "loss": 0.7467, "step": 22550 }, { "epoch": 0.5133923491796191, "grad_norm": 0.10791015625, "learning_rate": 9.93126509381386e-06, "loss": 0.8841, "step": 22560 }, { "epoch": 0.5136199167102837, "grad_norm": 0.392578125, "learning_rate": 9.926620843395876e-06, "loss": 0.8254, "step": 22570 }, { "epoch": 0.5138474842409485, "grad_norm": 72.0, "learning_rate": 9.921976592977895e-06, "loss": 0.3011, "step": 22580 }, { "epoch": 0.5140750517716133, "grad_norm": 20.0, "learning_rate": 9.917332342559912e-06, "loss": 0.7301, "step": 22590 }, { "epoch": 0.5143026193022779, "grad_norm": 71.0, "learning_rate": 9.91268809214193e-06, "loss": 0.5231, "step": 22600 }, { "epoch": 0.5145301868329427, "grad_norm": 183.0, "learning_rate": 9.908043841723946e-06, "loss": 0.7222, "step": 22610 }, { "epoch": 0.5147577543636074, "grad_norm": 190.0, "learning_rate": 9.903399591305965e-06, "loss": 0.7516, "step": 22620 }, { "epoch": 0.5149853218942722, "grad_norm": 6.84375, "learning_rate": 9.89875534088798e-06, "loss": 0.6681, "step": 22630 }, { "epoch": 0.5152128894249368, "grad_norm": 181.0, "learning_rate": 9.89411109047e-06, "loss": 0.718, "step": 22640 }, { "epoch": 0.5154404569556016, "grad_norm": 54.25, "learning_rate": 9.889466840052016e-06, "loss": 0.9276, "step": 22650 }, { "epoch": 0.5156680244862663, "grad_norm": 0.0091552734375, "learning_rate": 9.884822589634035e-06, "loss": 1.1055, "step": 22660 }, { "epoch": 0.515895592016931, "grad_norm": 147.0, "learning_rate": 9.88017833921605e-06, "loss": 1.2565, "step": 22670 }, { "epoch": 0.5161231595475958, "grad_norm": 0.004180908203125, "learning_rate": 9.875534088798069e-06, "loss": 0.4571, "step": 22680 }, { "epoch": 0.5163507270782605, "grad_norm": 156.0, "learning_rate": 9.870889838380086e-06, "loss": 0.8516, "step": 22690 }, { "epoch": 0.5165782946089252, "grad_norm": 41.5, "learning_rate": 9.866245587962103e-06, "loss": 0.6014, "step": 22700 }, { "epoch": 0.5168058621395899, "grad_norm": 1.484375, "learning_rate": 9.86160133754412e-06, "loss": 0.496, "step": 22710 }, { "epoch": 0.5170334296702547, "grad_norm": 0.00860595703125, "learning_rate": 9.856957087126139e-06, "loss": 0.739, "step": 22720 }, { "epoch": 0.5172609972009193, "grad_norm": 0.00127410888671875, "learning_rate": 9.852312836708156e-06, "loss": 0.8418, "step": 22730 }, { "epoch": 0.5174885647315841, "grad_norm": 147.0, "learning_rate": 9.847668586290173e-06, "loss": 0.8962, "step": 22740 }, { "epoch": 0.5177161322622488, "grad_norm": 1.0546875, "learning_rate": 9.843024335872192e-06, "loss": 0.1405, "step": 22750 }, { "epoch": 0.5179436997929135, "grad_norm": 426.0, "learning_rate": 9.838380085454209e-06, "loss": 1.208, "step": 22760 }, { "epoch": 0.5181712673235783, "grad_norm": 45.0, "learning_rate": 9.833735835036226e-06, "loss": 0.9229, "step": 22770 }, { "epoch": 0.518398834854243, "grad_norm": 0.001007080078125, "learning_rate": 9.829091584618243e-06, "loss": 1.0013, "step": 22780 }, { "epoch": 0.5186264023849078, "grad_norm": 628.0, "learning_rate": 9.824447334200262e-06, "loss": 0.3145, "step": 22790 }, { "epoch": 0.5188539699155724, "grad_norm": 12.375, "learning_rate": 9.819803083782279e-06, "loss": 0.3686, "step": 22800 }, { "epoch": 0.5190815374462372, "grad_norm": 0.01300048828125, "learning_rate": 9.815158833364296e-06, "loss": 0.7322, "step": 22810 }, { "epoch": 0.5193091049769019, "grad_norm": 260.0, "learning_rate": 9.810514582946313e-06, "loss": 0.8229, "step": 22820 }, { "epoch": 0.5195366725075666, "grad_norm": 378.0, "learning_rate": 9.805870332528332e-06, "loss": 0.7349, "step": 22830 }, { "epoch": 0.5197642400382313, "grad_norm": 0.05224609375, "learning_rate": 9.801226082110347e-06, "loss": 1.2588, "step": 22840 }, { "epoch": 0.5199918075688961, "grad_norm": 146.0, "learning_rate": 9.796581831692366e-06, "loss": 0.4962, "step": 22850 }, { "epoch": 0.5202193750995608, "grad_norm": 10.3125, "learning_rate": 9.791937581274383e-06, "loss": 0.7033, "step": 22860 }, { "epoch": 0.5204469426302255, "grad_norm": 64.5, "learning_rate": 9.787293330856402e-06, "loss": 1.0251, "step": 22870 }, { "epoch": 0.5206745101608903, "grad_norm": 80.0, "learning_rate": 9.782649080438417e-06, "loss": 0.5777, "step": 22880 }, { "epoch": 0.5209020776915549, "grad_norm": 74.5, "learning_rate": 9.778004830020436e-06, "loss": 0.6878, "step": 22890 }, { "epoch": 0.5211296452222197, "grad_norm": 0.0014190673828125, "learning_rate": 9.773360579602453e-06, "loss": 0.7369, "step": 22900 }, { "epoch": 0.5213572127528844, "grad_norm": 0.1611328125, "learning_rate": 9.76871632918447e-06, "loss": 1.2096, "step": 22910 }, { "epoch": 0.5215847802835492, "grad_norm": 0.0302734375, "learning_rate": 9.764072078766489e-06, "loss": 0.7396, "step": 22920 }, { "epoch": 0.5218123478142138, "grad_norm": 56.0, "learning_rate": 9.759427828348506e-06, "loss": 0.5816, "step": 22930 }, { "epoch": 0.5220399153448786, "grad_norm": 0.000885009765625, "learning_rate": 9.754783577930523e-06, "loss": 0.9288, "step": 22940 }, { "epoch": 0.5222674828755434, "grad_norm": 136.0, "learning_rate": 9.75013932751254e-06, "loss": 0.9478, "step": 22950 }, { "epoch": 0.522495050406208, "grad_norm": 0.010986328125, "learning_rate": 9.745495077094558e-06, "loss": 0.9584, "step": 22960 }, { "epoch": 0.5227226179368728, "grad_norm": 133.0, "learning_rate": 9.740850826676576e-06, "loss": 0.3149, "step": 22970 }, { "epoch": 0.5229501854675375, "grad_norm": 154.0, "learning_rate": 9.736206576258593e-06, "loss": 1.3308, "step": 22980 }, { "epoch": 0.5231777529982022, "grad_norm": 4.34375, "learning_rate": 9.73156232584061e-06, "loss": 0.261, "step": 22990 }, { "epoch": 0.5234053205288669, "grad_norm": 68.0, "learning_rate": 9.726918075422628e-06, "loss": 0.7063, "step": 23000 }, { "epoch": 0.5236328880595317, "grad_norm": 0.2431640625, "learning_rate": 9.722273825004645e-06, "loss": 1.5443, "step": 23010 }, { "epoch": 0.5238604555901963, "grad_norm": 520.0, "learning_rate": 9.717629574586662e-06, "loss": 1.2454, "step": 23020 }, { "epoch": 0.5240880231208611, "grad_norm": 34.5, "learning_rate": 9.71298532416868e-06, "loss": 0.4762, "step": 23030 }, { "epoch": 0.5243155906515259, "grad_norm": 80.0, "learning_rate": 9.708341073750698e-06, "loss": 1.1557, "step": 23040 }, { "epoch": 0.5245431581821905, "grad_norm": 652.0, "learning_rate": 9.703696823332714e-06, "loss": 0.8935, "step": 23050 }, { "epoch": 0.5247707257128553, "grad_norm": 109.0, "learning_rate": 9.699052572914732e-06, "loss": 1.2117, "step": 23060 }, { "epoch": 0.52499829324352, "grad_norm": 0.006683349609375, "learning_rate": 9.69440832249675e-06, "loss": 1.2226, "step": 23070 }, { "epoch": 0.5252258607741848, "grad_norm": 100.0, "learning_rate": 9.689764072078768e-06, "loss": 0.8437, "step": 23080 }, { "epoch": 0.5254534283048494, "grad_norm": 1.6875, "learning_rate": 9.685119821660785e-06, "loss": 0.6022, "step": 23090 }, { "epoch": 0.5256809958355142, "grad_norm": 0.00121307373046875, "learning_rate": 9.680475571242802e-06, "loss": 0.5025, "step": 23100 }, { "epoch": 0.5259085633661789, "grad_norm": 43.5, "learning_rate": 9.67583132082482e-06, "loss": 0.7332, "step": 23110 }, { "epoch": 0.5261361308968436, "grad_norm": 0.7421875, "learning_rate": 9.671187070406836e-06, "loss": 0.5716, "step": 23120 }, { "epoch": 0.5263636984275084, "grad_norm": 0.0008697509765625, "learning_rate": 9.666542819988855e-06, "loss": 0.5232, "step": 23130 }, { "epoch": 0.5265912659581731, "grad_norm": 152.0, "learning_rate": 9.661898569570872e-06, "loss": 1.044, "step": 23140 }, { "epoch": 0.5268188334888378, "grad_norm": 81.0, "learning_rate": 9.65725431915289e-06, "loss": 0.2003, "step": 23150 }, { "epoch": 0.5270464010195025, "grad_norm": 194.0, "learning_rate": 9.652610068734906e-06, "loss": 0.9206, "step": 23160 }, { "epoch": 0.5272739685501673, "grad_norm": 0.0027008056640625, "learning_rate": 9.647965818316925e-06, "loss": 0.9257, "step": 23170 }, { "epoch": 0.527501536080832, "grad_norm": 226.0, "learning_rate": 9.643321567898942e-06, "loss": 0.767, "step": 23180 }, { "epoch": 0.5277291036114967, "grad_norm": 151.0, "learning_rate": 9.63867731748096e-06, "loss": 0.8413, "step": 23190 }, { "epoch": 0.5279566711421614, "grad_norm": 266.0, "learning_rate": 9.634033067062976e-06, "loss": 0.6634, "step": 23200 }, { "epoch": 0.5281842386728262, "grad_norm": 17.375, "learning_rate": 9.629388816644995e-06, "loss": 0.6534, "step": 23210 }, { "epoch": 0.5284118062034909, "grad_norm": 112.0, "learning_rate": 9.624744566227012e-06, "loss": 0.6706, "step": 23220 }, { "epoch": 0.5286393737341556, "grad_norm": 744.0, "learning_rate": 9.620100315809029e-06, "loss": 0.856, "step": 23230 }, { "epoch": 0.5288669412648204, "grad_norm": 229.0, "learning_rate": 9.615456065391046e-06, "loss": 1.1824, "step": 23240 }, { "epoch": 0.529094508795485, "grad_norm": 238.0, "learning_rate": 9.610811814973065e-06, "loss": 0.8667, "step": 23250 }, { "epoch": 0.5293220763261498, "grad_norm": 278.0, "learning_rate": 9.606167564555082e-06, "loss": 0.7862, "step": 23260 }, { "epoch": 0.5295496438568145, "grad_norm": 0.00080108642578125, "learning_rate": 9.601523314137099e-06, "loss": 0.2652, "step": 23270 }, { "epoch": 0.5297772113874792, "grad_norm": 0.000629425048828125, "learning_rate": 9.596879063719116e-06, "loss": 1.2018, "step": 23280 }, { "epoch": 0.5300047789181439, "grad_norm": 180.0, "learning_rate": 9.592234813301135e-06, "loss": 2.109, "step": 23290 }, { "epoch": 0.5302323464488087, "grad_norm": 147.0, "learning_rate": 9.587590562883152e-06, "loss": 0.4448, "step": 23300 }, { "epoch": 0.5304599139794735, "grad_norm": 0.01239013671875, "learning_rate": 9.582946312465169e-06, "loss": 0.5257, "step": 23310 }, { "epoch": 0.5306874815101381, "grad_norm": 362.0, "learning_rate": 9.578302062047186e-06, "loss": 0.7007, "step": 23320 }, { "epoch": 0.5309150490408029, "grad_norm": 201.0, "learning_rate": 9.573657811629203e-06, "loss": 0.9443, "step": 23330 }, { "epoch": 0.5311426165714676, "grad_norm": 0.007476806640625, "learning_rate": 9.569013561211222e-06, "loss": 0.6834, "step": 23340 }, { "epoch": 0.5313701841021323, "grad_norm": 98.5, "learning_rate": 9.564369310793239e-06, "loss": 0.6474, "step": 23350 }, { "epoch": 0.531597751632797, "grad_norm": 3.546875, "learning_rate": 9.559725060375256e-06, "loss": 0.2305, "step": 23360 }, { "epoch": 0.5318253191634618, "grad_norm": 103.5, "learning_rate": 9.555080809957273e-06, "loss": 0.8612, "step": 23370 }, { "epoch": 0.5320528866941265, "grad_norm": 0.000835418701171875, "learning_rate": 9.550436559539292e-06, "loss": 0.4043, "step": 23380 }, { "epoch": 0.5322804542247912, "grad_norm": 0.00091552734375, "learning_rate": 9.545792309121309e-06, "loss": 0.8981, "step": 23390 }, { "epoch": 0.532508021755456, "grad_norm": 181.0, "learning_rate": 9.541148058703326e-06, "loss": 0.7449, "step": 23400 }, { "epoch": 0.5327355892861206, "grad_norm": 0.037841796875, "learning_rate": 9.536503808285343e-06, "loss": 0.8689, "step": 23410 }, { "epoch": 0.5329631568167854, "grad_norm": 128.0, "learning_rate": 9.531859557867362e-06, "loss": 0.5592, "step": 23420 }, { "epoch": 0.5331907243474501, "grad_norm": 86.5, "learning_rate": 9.527215307449379e-06, "loss": 0.5846, "step": 23430 }, { "epoch": 0.5334182918781148, "grad_norm": 0.0003910064697265625, "learning_rate": 9.522571057031396e-06, "loss": 0.344, "step": 23440 }, { "epoch": 0.5336458594087795, "grad_norm": 31.875, "learning_rate": 9.517926806613413e-06, "loss": 0.7053, "step": 23450 }, { "epoch": 0.5338734269394443, "grad_norm": 130.0, "learning_rate": 9.513282556195432e-06, "loss": 0.9396, "step": 23460 }, { "epoch": 0.5341009944701091, "grad_norm": 0.0380859375, "learning_rate": 9.508638305777449e-06, "loss": 0.8451, "step": 23470 }, { "epoch": 0.5343285620007737, "grad_norm": 362.0, "learning_rate": 9.503994055359466e-06, "loss": 0.5204, "step": 23480 }, { "epoch": 0.5345561295314385, "grad_norm": 352.0, "learning_rate": 9.499349804941483e-06, "loss": 1.3376, "step": 23490 }, { "epoch": 0.5347836970621032, "grad_norm": 65.0, "learning_rate": 9.494705554523501e-06, "loss": 0.7356, "step": 23500 }, { "epoch": 0.5350112645927679, "grad_norm": 166.0, "learning_rate": 9.490061304105519e-06, "loss": 0.5742, "step": 23510 }, { "epoch": 0.5352388321234326, "grad_norm": 82.5, "learning_rate": 9.485417053687536e-06, "loss": 0.5862, "step": 23520 }, { "epoch": 0.5354663996540974, "grad_norm": 0.01348876953125, "learning_rate": 9.480772803269553e-06, "loss": 0.9552, "step": 23530 }, { "epoch": 0.535693967184762, "grad_norm": 98.5, "learning_rate": 9.47612855285157e-06, "loss": 0.7022, "step": 23540 }, { "epoch": 0.5359215347154268, "grad_norm": 27.625, "learning_rate": 9.471484302433588e-06, "loss": 0.6364, "step": 23550 }, { "epoch": 0.5361491022460916, "grad_norm": 200.0, "learning_rate": 9.466840052015605e-06, "loss": 1.2397, "step": 23560 }, { "epoch": 0.5363766697767562, "grad_norm": 0.0013427734375, "learning_rate": 9.462195801597624e-06, "loss": 0.5858, "step": 23570 }, { "epoch": 0.536604237307421, "grad_norm": 96.0, "learning_rate": 9.45755155117964e-06, "loss": 1.6245, "step": 23580 }, { "epoch": 0.5368318048380857, "grad_norm": 0.006011962890625, "learning_rate": 9.452907300761658e-06, "loss": 1.0109, "step": 23590 }, { "epoch": 0.5370593723687505, "grad_norm": 38.25, "learning_rate": 9.448263050343675e-06, "loss": 0.8656, "step": 23600 }, { "epoch": 0.5372869398994151, "grad_norm": 0.01031494140625, "learning_rate": 9.443618799925692e-06, "loss": 0.311, "step": 23610 }, { "epoch": 0.5375145074300799, "grad_norm": 494.0, "learning_rate": 9.43897454950771e-06, "loss": 1.2741, "step": 23620 }, { "epoch": 0.5377420749607446, "grad_norm": 0.00714111328125, "learning_rate": 9.434330299089728e-06, "loss": 0.6309, "step": 23630 }, { "epoch": 0.5379696424914093, "grad_norm": 0.1162109375, "learning_rate": 9.429686048671745e-06, "loss": 0.7897, "step": 23640 }, { "epoch": 0.5381972100220741, "grad_norm": 0.056396484375, "learning_rate": 9.425041798253762e-06, "loss": 1.1082, "step": 23650 }, { "epoch": 0.5384247775527388, "grad_norm": 0.083984375, "learning_rate": 9.42039754783578e-06, "loss": 1.3362, "step": 23660 }, { "epoch": 0.5386523450834035, "grad_norm": 206.0, "learning_rate": 9.415753297417798e-06, "loss": 0.394, "step": 23670 }, { "epoch": 0.5388799126140682, "grad_norm": 109.5, "learning_rate": 9.411109046999815e-06, "loss": 0.9042, "step": 23680 }, { "epoch": 0.539107480144733, "grad_norm": 202.0, "learning_rate": 9.406464796581832e-06, "loss": 0.6342, "step": 23690 }, { "epoch": 0.5393350476753976, "grad_norm": 110.0, "learning_rate": 9.40182054616385e-06, "loss": 0.4418, "step": 23700 }, { "epoch": 0.5395626152060624, "grad_norm": 0.03076171875, "learning_rate": 9.397176295745868e-06, "loss": 0.6573, "step": 23710 }, { "epoch": 0.5397901827367271, "grad_norm": 0.00274658203125, "learning_rate": 9.392532045327885e-06, "loss": 0.6993, "step": 23720 }, { "epoch": 0.5400177502673918, "grad_norm": 0.00122833251953125, "learning_rate": 9.387887794909902e-06, "loss": 0.6165, "step": 23730 }, { "epoch": 0.5402453177980566, "grad_norm": 0.005035400390625, "learning_rate": 9.383243544491921e-06, "loss": 1.447, "step": 23740 }, { "epoch": 0.5404728853287213, "grad_norm": 204.0, "learning_rate": 9.378599294073936e-06, "loss": 0.8468, "step": 23750 }, { "epoch": 0.5407004528593861, "grad_norm": 2.15625, "learning_rate": 9.373955043655955e-06, "loss": 1.2733, "step": 23760 }, { "epoch": 0.5409280203900507, "grad_norm": 0.0004673004150390625, "learning_rate": 9.369310793237972e-06, "loss": 0.5335, "step": 23770 }, { "epoch": 0.5411555879207155, "grad_norm": 99.0, "learning_rate": 9.364666542819991e-06, "loss": 0.7429, "step": 23780 }, { "epoch": 0.5413831554513802, "grad_norm": 0.0230712890625, "learning_rate": 9.360022292402006e-06, "loss": 0.453, "step": 23790 }, { "epoch": 0.5416107229820449, "grad_norm": 85.5, "learning_rate": 9.355378041984025e-06, "loss": 1.5686, "step": 23800 }, { "epoch": 0.5418382905127096, "grad_norm": 124.5, "learning_rate": 9.350733791566042e-06, "loss": 0.4528, "step": 23810 }, { "epoch": 0.5420658580433744, "grad_norm": 0.010986328125, "learning_rate": 9.346089541148059e-06, "loss": 0.7081, "step": 23820 }, { "epoch": 0.5422934255740391, "grad_norm": 384.0, "learning_rate": 9.341445290730076e-06, "loss": 0.9133, "step": 23830 }, { "epoch": 0.5425209931047038, "grad_norm": 0.00103759765625, "learning_rate": 9.336801040312095e-06, "loss": 0.5391, "step": 23840 }, { "epoch": 0.5427485606353686, "grad_norm": 195.0, "learning_rate": 9.332156789894112e-06, "loss": 0.6821, "step": 23850 }, { "epoch": 0.5429761281660332, "grad_norm": 120.0, "learning_rate": 9.327512539476129e-06, "loss": 0.2072, "step": 23860 }, { "epoch": 0.543203695696698, "grad_norm": 0.0029754638671875, "learning_rate": 9.322868289058146e-06, "loss": 1.5059, "step": 23870 }, { "epoch": 0.5434312632273627, "grad_norm": 0.546875, "learning_rate": 9.318224038640165e-06, "loss": 1.3615, "step": 23880 }, { "epoch": 0.5436588307580275, "grad_norm": 146.0, "learning_rate": 9.313579788222182e-06, "loss": 1.215, "step": 23890 }, { "epoch": 0.5438863982886921, "grad_norm": 59.75, "learning_rate": 9.308935537804199e-06, "loss": 0.5825, "step": 23900 }, { "epoch": 0.5441139658193569, "grad_norm": 46.25, "learning_rate": 9.304291287386216e-06, "loss": 0.9045, "step": 23910 }, { "epoch": 0.5443415333500217, "grad_norm": 204.0, "learning_rate": 9.299647036968235e-06, "loss": 0.4385, "step": 23920 }, { "epoch": 0.5445691008806863, "grad_norm": 167.0, "learning_rate": 9.295002786550252e-06, "loss": 0.6527, "step": 23930 }, { "epoch": 0.5447966684113511, "grad_norm": 7.03125, "learning_rate": 9.290358536132269e-06, "loss": 0.3641, "step": 23940 }, { "epoch": 0.5450242359420158, "grad_norm": 104.5, "learning_rate": 9.285714285714288e-06, "loss": 0.8834, "step": 23950 }, { "epoch": 0.5452518034726805, "grad_norm": 161.0, "learning_rate": 9.281070035296303e-06, "loss": 1.2778, "step": 23960 }, { "epoch": 0.5454793710033452, "grad_norm": 116.5, "learning_rate": 9.276425784878322e-06, "loss": 0.8246, "step": 23970 }, { "epoch": 0.54570693853401, "grad_norm": 132.0, "learning_rate": 9.271781534460339e-06, "loss": 0.9897, "step": 23980 }, { "epoch": 0.5459345060646746, "grad_norm": 2.671875, "learning_rate": 9.267137284042357e-06, "loss": 0.9666, "step": 23990 }, { "epoch": 0.5461620735953394, "grad_norm": 176.0, "learning_rate": 9.262493033624373e-06, "loss": 1.272, "step": 24000 }, { "epoch": 0.5463896411260042, "grad_norm": 0.06640625, "learning_rate": 9.257848783206392e-06, "loss": 0.5377, "step": 24010 }, { "epoch": 0.5466172086566689, "grad_norm": 122.5, "learning_rate": 9.253204532788409e-06, "loss": 0.5626, "step": 24020 }, { "epoch": 0.5468447761873336, "grad_norm": 0.2021484375, "learning_rate": 9.248560282370426e-06, "loss": 0.5244, "step": 24030 }, { "epoch": 0.5470723437179983, "grad_norm": 13.375, "learning_rate": 9.243916031952443e-06, "loss": 0.4959, "step": 24040 }, { "epoch": 0.5472999112486631, "grad_norm": 125.0, "learning_rate": 9.239271781534462e-06, "loss": 0.9633, "step": 24050 }, { "epoch": 0.5475274787793277, "grad_norm": 0.0024871826171875, "learning_rate": 9.234627531116479e-06, "loss": 0.7292, "step": 24060 }, { "epoch": 0.5477550463099925, "grad_norm": 87.0, "learning_rate": 9.229983280698496e-06, "loss": 0.8352, "step": 24070 }, { "epoch": 0.5479826138406572, "grad_norm": 123.5, "learning_rate": 9.225339030280513e-06, "loss": 0.7272, "step": 24080 }, { "epoch": 0.5482101813713219, "grad_norm": 64.0, "learning_rate": 9.220694779862531e-06, "loss": 0.6276, "step": 24090 }, { "epoch": 0.5484377489019867, "grad_norm": 59.0, "learning_rate": 9.216050529444548e-06, "loss": 1.9153, "step": 24100 }, { "epoch": 0.5486653164326514, "grad_norm": 0.0004825592041015625, "learning_rate": 9.211406279026566e-06, "loss": 0.6773, "step": 24110 }, { "epoch": 0.5488928839633161, "grad_norm": 0.00151824951171875, "learning_rate": 9.206762028608584e-06, "loss": 1.382, "step": 24120 }, { "epoch": 0.5491204514939808, "grad_norm": 0.0101318359375, "learning_rate": 9.202117778190601e-06, "loss": 0.5747, "step": 24130 }, { "epoch": 0.5493480190246456, "grad_norm": 0.11083984375, "learning_rate": 9.197473527772618e-06, "loss": 0.4195, "step": 24140 }, { "epoch": 0.5495755865553102, "grad_norm": 171.0, "learning_rate": 9.192829277354635e-06, "loss": 0.8162, "step": 24150 }, { "epoch": 0.549803154085975, "grad_norm": 0.004058837890625, "learning_rate": 9.188185026936654e-06, "loss": 1.0668, "step": 24160 }, { "epoch": 0.5500307216166397, "grad_norm": 2.203125, "learning_rate": 9.18354077651867e-06, "loss": 0.5932, "step": 24170 }, { "epoch": 0.5502582891473045, "grad_norm": 16.125, "learning_rate": 9.178896526100688e-06, "loss": 1.4161, "step": 24180 }, { "epoch": 0.5504858566779692, "grad_norm": 234.0, "learning_rate": 9.174252275682705e-06, "loss": 0.7773, "step": 24190 }, { "epoch": 0.5507134242086339, "grad_norm": 75.5, "learning_rate": 9.169608025264724e-06, "loss": 0.4092, "step": 24200 }, { "epoch": 0.5509409917392987, "grad_norm": 0.0810546875, "learning_rate": 9.16496377484674e-06, "loss": 0.4852, "step": 24210 }, { "epoch": 0.5511685592699633, "grad_norm": 1.5, "learning_rate": 9.160319524428758e-06, "loss": 0.951, "step": 24220 }, { "epoch": 0.5513961268006281, "grad_norm": 0.00177764892578125, "learning_rate": 9.155675274010775e-06, "loss": 0.3903, "step": 24230 }, { "epoch": 0.5516236943312928, "grad_norm": 124.5, "learning_rate": 9.151031023592792e-06, "loss": 1.7245, "step": 24240 }, { "epoch": 0.5518512618619575, "grad_norm": 764.0, "learning_rate": 9.14638677317481e-06, "loss": 0.8228, "step": 24250 }, { "epoch": 0.5520788293926222, "grad_norm": 84.5, "learning_rate": 9.141742522756828e-06, "loss": 0.1098, "step": 24260 }, { "epoch": 0.552306396923287, "grad_norm": 149.0, "learning_rate": 9.137098272338845e-06, "loss": 1.3838, "step": 24270 }, { "epoch": 0.5525339644539518, "grad_norm": 120.5, "learning_rate": 9.132454021920862e-06, "loss": 1.0168, "step": 24280 }, { "epoch": 0.5527615319846164, "grad_norm": 0.003326416015625, "learning_rate": 9.127809771502881e-06, "loss": 1.1998, "step": 24290 }, { "epoch": 0.5529890995152812, "grad_norm": 98.5, "learning_rate": 9.123165521084898e-06, "loss": 0.7283, "step": 24300 }, { "epoch": 0.5532166670459459, "grad_norm": 89.5, "learning_rate": 9.118521270666915e-06, "loss": 1.6546, "step": 24310 }, { "epoch": 0.5534442345766106, "grad_norm": 235.0, "learning_rate": 9.113877020248932e-06, "loss": 0.5088, "step": 24320 }, { "epoch": 0.5536718021072753, "grad_norm": 2.15625, "learning_rate": 9.109232769830951e-06, "loss": 1.9311, "step": 24330 }, { "epoch": 0.5538993696379401, "grad_norm": 2.015625, "learning_rate": 9.104588519412968e-06, "loss": 0.8085, "step": 24340 }, { "epoch": 0.5541269371686048, "grad_norm": 92.0, "learning_rate": 9.099944268994985e-06, "loss": 1.1299, "step": 24350 }, { "epoch": 0.5543545046992695, "grad_norm": 97.0, "learning_rate": 9.095300018577002e-06, "loss": 0.6479, "step": 24360 }, { "epoch": 0.5545820722299343, "grad_norm": 101.5, "learning_rate": 9.09065576815902e-06, "loss": 1.1174, "step": 24370 }, { "epoch": 0.5548096397605989, "grad_norm": 121.0, "learning_rate": 9.086011517741036e-06, "loss": 0.2282, "step": 24380 }, { "epoch": 0.5550372072912637, "grad_norm": 48.0, "learning_rate": 9.081367267323055e-06, "loss": 0.5477, "step": 24390 }, { "epoch": 0.5552647748219284, "grad_norm": 0.486328125, "learning_rate": 9.076723016905072e-06, "loss": 0.9137, "step": 24400 }, { "epoch": 0.5554923423525931, "grad_norm": 80.0, "learning_rate": 9.07207876648709e-06, "loss": 0.8723, "step": 24410 }, { "epoch": 0.5557199098832578, "grad_norm": 101.0, "learning_rate": 9.067434516069106e-06, "loss": 0.4637, "step": 24420 }, { "epoch": 0.5559474774139226, "grad_norm": 19.625, "learning_rate": 9.062790265651125e-06, "loss": 0.2319, "step": 24430 }, { "epoch": 0.5561750449445874, "grad_norm": 260.0, "learning_rate": 9.058146015233142e-06, "loss": 0.3633, "step": 24440 }, { "epoch": 0.556402612475252, "grad_norm": 0.17578125, "learning_rate": 9.053501764815159e-06, "loss": 0.8178, "step": 24450 }, { "epoch": 0.5566301800059168, "grad_norm": 85.0, "learning_rate": 9.048857514397178e-06, "loss": 0.7126, "step": 24460 }, { "epoch": 0.5568577475365815, "grad_norm": 77.0, "learning_rate": 9.044213263979195e-06, "loss": 0.6135, "step": 24470 }, { "epoch": 0.5570853150672462, "grad_norm": 544.0, "learning_rate": 9.039569013561212e-06, "loss": 0.6845, "step": 24480 }, { "epoch": 0.5573128825979109, "grad_norm": 76.5, "learning_rate": 9.034924763143229e-06, "loss": 0.6439, "step": 24490 }, { "epoch": 0.5575404501285757, "grad_norm": 0.000858306884765625, "learning_rate": 9.030280512725248e-06, "loss": 0.845, "step": 24500 }, { "epoch": 0.5577680176592403, "grad_norm": 109.5, "learning_rate": 9.025636262307265e-06, "loss": 0.4849, "step": 24510 }, { "epoch": 0.5579955851899051, "grad_norm": 56.25, "learning_rate": 9.020992011889282e-06, "loss": 0.6319, "step": 24520 }, { "epoch": 0.5582231527205699, "grad_norm": 120.0, "learning_rate": 9.016347761471299e-06, "loss": 0.8995, "step": 24530 }, { "epoch": 0.5584507202512345, "grad_norm": 249.0, "learning_rate": 9.011703511053318e-06, "loss": 0.5883, "step": 24540 }, { "epoch": 0.5586782877818993, "grad_norm": 103.0, "learning_rate": 9.007059260635335e-06, "loss": 0.6253, "step": 24550 }, { "epoch": 0.558905855312564, "grad_norm": 0.007720947265625, "learning_rate": 9.002415010217352e-06, "loss": 0.557, "step": 24560 }, { "epoch": 0.5591334228432288, "grad_norm": 0.01806640625, "learning_rate": 8.997770759799369e-06, "loss": 0.523, "step": 24570 }, { "epoch": 0.5593609903738934, "grad_norm": 25.625, "learning_rate": 8.993126509381387e-06, "loss": 0.5487, "step": 24580 }, { "epoch": 0.5595885579045582, "grad_norm": 932.0, "learning_rate": 8.988482258963403e-06, "loss": 0.729, "step": 24590 }, { "epoch": 0.5598161254352229, "grad_norm": 0.00537109375, "learning_rate": 8.983838008545422e-06, "loss": 0.331, "step": 24600 }, { "epoch": 0.5600436929658876, "grad_norm": 255.0, "learning_rate": 8.979193758127439e-06, "loss": 0.4554, "step": 24610 }, { "epoch": 0.5602712604965524, "grad_norm": 27.625, "learning_rate": 8.974549507709457e-06, "loss": 1.5149, "step": 24620 }, { "epoch": 0.5604988280272171, "grad_norm": 224.0, "learning_rate": 8.969905257291473e-06, "loss": 0.4854, "step": 24630 }, { "epoch": 0.5607263955578818, "grad_norm": 0.0023345947265625, "learning_rate": 8.965261006873491e-06, "loss": 0.4818, "step": 24640 }, { "epoch": 0.5609539630885465, "grad_norm": 0.0234375, "learning_rate": 8.960616756455509e-06, "loss": 1.1701, "step": 24650 }, { "epoch": 0.5611815306192113, "grad_norm": 12.4375, "learning_rate": 8.955972506037526e-06, "loss": 0.7328, "step": 24660 }, { "epoch": 0.5614090981498759, "grad_norm": 18.5, "learning_rate": 8.951328255619544e-06, "loss": 0.5727, "step": 24670 }, { "epoch": 0.5616366656805407, "grad_norm": 146.0, "learning_rate": 8.946684005201561e-06, "loss": 1.06, "step": 24680 }, { "epoch": 0.5618642332112054, "grad_norm": 292.0, "learning_rate": 8.942039754783578e-06, "loss": 0.381, "step": 24690 }, { "epoch": 0.5620918007418702, "grad_norm": 50.5, "learning_rate": 8.937395504365595e-06, "loss": 1.6632, "step": 24700 }, { "epoch": 0.5623193682725349, "grad_norm": 4.40625, "learning_rate": 8.932751253947614e-06, "loss": 1.2516, "step": 24710 }, { "epoch": 0.5625469358031996, "grad_norm": 46.0, "learning_rate": 8.928107003529631e-06, "loss": 0.4654, "step": 24720 }, { "epoch": 0.5627745033338644, "grad_norm": 171.0, "learning_rate": 8.923462753111648e-06, "loss": 0.7275, "step": 24730 }, { "epoch": 0.563002070864529, "grad_norm": 11.75, "learning_rate": 8.918818502693665e-06, "loss": 0.9091, "step": 24740 }, { "epoch": 0.5632296383951938, "grad_norm": 15.0, "learning_rate": 8.914174252275684e-06, "loss": 1.0297, "step": 24750 }, { "epoch": 0.5634572059258585, "grad_norm": 51.75, "learning_rate": 8.909530001857701e-06, "loss": 0.6384, "step": 24760 }, { "epoch": 0.5636847734565232, "grad_norm": 68.5, "learning_rate": 8.904885751439718e-06, "loss": 0.3996, "step": 24770 }, { "epoch": 0.5639123409871879, "grad_norm": 113.5, "learning_rate": 8.900241501021735e-06, "loss": 0.8625, "step": 24780 }, { "epoch": 0.5641399085178527, "grad_norm": 396.0, "learning_rate": 8.895597250603754e-06, "loss": 1.0918, "step": 24790 }, { "epoch": 0.5643674760485174, "grad_norm": 137.0, "learning_rate": 8.89095300018577e-06, "loss": 0.4534, "step": 24800 }, { "epoch": 0.5645950435791821, "grad_norm": 256.0, "learning_rate": 8.886308749767788e-06, "loss": 1.3702, "step": 24810 }, { "epoch": 0.5648226111098469, "grad_norm": 1.53125, "learning_rate": 8.881664499349805e-06, "loss": 0.4997, "step": 24820 }, { "epoch": 0.5650501786405115, "grad_norm": 0.003875732421875, "learning_rate": 8.877020248931824e-06, "loss": 0.616, "step": 24830 }, { "epoch": 0.5652777461711763, "grad_norm": 0.11669921875, "learning_rate": 8.872375998513841e-06, "loss": 0.7854, "step": 24840 }, { "epoch": 0.565505313701841, "grad_norm": 112.5, "learning_rate": 8.867731748095858e-06, "loss": 1.1545, "step": 24850 }, { "epoch": 0.5657328812325058, "grad_norm": 106.0, "learning_rate": 8.863087497677875e-06, "loss": 0.6944, "step": 24860 }, { "epoch": 0.5659604487631704, "grad_norm": 112.5, "learning_rate": 8.858443247259892e-06, "loss": 0.3515, "step": 24870 }, { "epoch": 0.5661880162938352, "grad_norm": 0.000766754150390625, "learning_rate": 8.853798996841911e-06, "loss": 0.9856, "step": 24880 }, { "epoch": 0.5664155838245, "grad_norm": 251.0, "learning_rate": 8.849154746423928e-06, "loss": 0.6841, "step": 24890 }, { "epoch": 0.5666431513551646, "grad_norm": 0.0012054443359375, "learning_rate": 8.844510496005945e-06, "loss": 1.6468, "step": 24900 }, { "epoch": 0.5668707188858294, "grad_norm": 0.314453125, "learning_rate": 8.839866245587962e-06, "loss": 0.5117, "step": 24910 }, { "epoch": 0.5670982864164941, "grad_norm": 189.0, "learning_rate": 8.835221995169981e-06, "loss": 1.0986, "step": 24920 }, { "epoch": 0.5673258539471588, "grad_norm": 87.0, "learning_rate": 8.830577744751998e-06, "loss": 0.4972, "step": 24930 }, { "epoch": 0.5675534214778235, "grad_norm": 298.0, "learning_rate": 8.825933494334015e-06, "loss": 1.4745, "step": 24940 }, { "epoch": 0.5677809890084883, "grad_norm": 179.0, "learning_rate": 8.821289243916032e-06, "loss": 0.4699, "step": 24950 }, { "epoch": 0.5680085565391529, "grad_norm": 129.0, "learning_rate": 8.81664499349805e-06, "loss": 0.6677, "step": 24960 }, { "epoch": 0.5682361240698177, "grad_norm": 286.0, "learning_rate": 8.812000743080068e-06, "loss": 0.4975, "step": 24970 }, { "epoch": 0.5684636916004825, "grad_norm": 0.000675201416015625, "learning_rate": 8.807356492662085e-06, "loss": 0.575, "step": 24980 }, { "epoch": 0.5686912591311472, "grad_norm": 96.0, "learning_rate": 8.802712242244102e-06, "loss": 1.0276, "step": 24990 }, { "epoch": 0.5689188266618119, "grad_norm": 83.0, "learning_rate": 8.79806799182612e-06, "loss": 0.6041, "step": 25000 }, { "epoch": 0.5691463941924766, "grad_norm": 59.25, "learning_rate": 8.793423741408138e-06, "loss": 0.3358, "step": 25010 }, { "epoch": 0.5693739617231414, "grad_norm": 117.0, "learning_rate": 8.788779490990155e-06, "loss": 1.1294, "step": 25020 }, { "epoch": 0.569601529253806, "grad_norm": 90.5, "learning_rate": 8.784135240572172e-06, "loss": 0.6709, "step": 25030 }, { "epoch": 0.5698290967844708, "grad_norm": 0.0001735687255859375, "learning_rate": 8.77949099015419e-06, "loss": 0.5855, "step": 25040 }, { "epoch": 0.5700566643151355, "grad_norm": 165.0, "learning_rate": 8.774846739736208e-06, "loss": 0.5593, "step": 25050 }, { "epoch": 0.5702842318458002, "grad_norm": 1.859375, "learning_rate": 8.770202489318225e-06, "loss": 0.4913, "step": 25060 }, { "epoch": 0.570511799376465, "grad_norm": 712.0, "learning_rate": 8.765558238900242e-06, "loss": 0.5018, "step": 25070 }, { "epoch": 0.5707393669071297, "grad_norm": 33.5, "learning_rate": 8.760913988482259e-06, "loss": 0.5816, "step": 25080 }, { "epoch": 0.5709669344377944, "grad_norm": 4.71875, "learning_rate": 8.756269738064278e-06, "loss": 0.6243, "step": 25090 }, { "epoch": 0.5711945019684591, "grad_norm": 0.236328125, "learning_rate": 8.751625487646295e-06, "loss": 0.5794, "step": 25100 }, { "epoch": 0.5714220694991239, "grad_norm": 1.1171875, "learning_rate": 8.746981237228312e-06, "loss": 0.4193, "step": 25110 }, { "epoch": 0.5716496370297885, "grad_norm": 60.75, "learning_rate": 8.742336986810329e-06, "loss": 0.5261, "step": 25120 }, { "epoch": 0.5718772045604533, "grad_norm": 0.12109375, "learning_rate": 8.737692736392347e-06, "loss": 1.2418, "step": 25130 }, { "epoch": 0.572104772091118, "grad_norm": 0.002044677734375, "learning_rate": 8.733048485974365e-06, "loss": 0.8983, "step": 25140 }, { "epoch": 0.5723323396217828, "grad_norm": 0.0004425048828125, "learning_rate": 8.728404235556382e-06, "loss": 0.9055, "step": 25150 }, { "epoch": 0.5725599071524475, "grad_norm": 157.0, "learning_rate": 8.723759985138399e-06, "loss": 0.7146, "step": 25160 }, { "epoch": 0.5727874746831122, "grad_norm": 0.000621795654296875, "learning_rate": 8.719115734720417e-06, "loss": 0.6849, "step": 25170 }, { "epoch": 0.573015042213777, "grad_norm": 131.0, "learning_rate": 8.714471484302434e-06, "loss": 0.8447, "step": 25180 }, { "epoch": 0.5732426097444416, "grad_norm": 75.0, "learning_rate": 8.709827233884452e-06, "loss": 0.5961, "step": 25190 }, { "epoch": 0.5734701772751064, "grad_norm": 43.0, "learning_rate": 8.705182983466469e-06, "loss": 0.8061, "step": 25200 }, { "epoch": 0.5736977448057711, "grad_norm": 210.0, "learning_rate": 8.700538733048487e-06, "loss": 0.8985, "step": 25210 }, { "epoch": 0.5739253123364358, "grad_norm": 1416.0, "learning_rate": 8.695894482630504e-06, "loss": 1.0445, "step": 25220 }, { "epoch": 0.5741528798671005, "grad_norm": 0.263671875, "learning_rate": 8.691250232212521e-06, "loss": 0.4159, "step": 25230 }, { "epoch": 0.5743804473977653, "grad_norm": 23.125, "learning_rate": 8.686605981794538e-06, "loss": 1.5902, "step": 25240 }, { "epoch": 0.57460801492843, "grad_norm": 932.0, "learning_rate": 8.681961731376557e-06, "loss": 1.8505, "step": 25250 }, { "epoch": 0.5748355824590947, "grad_norm": 137.0, "learning_rate": 8.677317480958574e-06, "loss": 0.3337, "step": 25260 }, { "epoch": 0.5750631499897595, "grad_norm": 27.5, "learning_rate": 8.672673230540591e-06, "loss": 1.3583, "step": 25270 }, { "epoch": 0.5752907175204242, "grad_norm": 0.2197265625, "learning_rate": 8.668028980122608e-06, "loss": 0.8984, "step": 25280 }, { "epoch": 0.5755182850510889, "grad_norm": 0.03271484375, "learning_rate": 8.663384729704625e-06, "loss": 1.136, "step": 25290 }, { "epoch": 0.5757458525817536, "grad_norm": 2.8371810913085938e-05, "learning_rate": 8.658740479286644e-06, "loss": 0.405, "step": 25300 }, { "epoch": 0.5759734201124184, "grad_norm": 229.0, "learning_rate": 8.654096228868661e-06, "loss": 1.217, "step": 25310 }, { "epoch": 0.5762009876430831, "grad_norm": 121.0, "learning_rate": 8.64945197845068e-06, "loss": 1.1756, "step": 25320 }, { "epoch": 0.5764285551737478, "grad_norm": 17.375, "learning_rate": 8.644807728032695e-06, "loss": 0.5437, "step": 25330 }, { "epoch": 0.5766561227044126, "grad_norm": 0.0027923583984375, "learning_rate": 8.640163477614714e-06, "loss": 0.7501, "step": 25340 }, { "epoch": 0.5768836902350772, "grad_norm": 0.005584716796875, "learning_rate": 8.635519227196731e-06, "loss": 0.3539, "step": 25350 }, { "epoch": 0.577111257765742, "grad_norm": 48.25, "learning_rate": 8.630874976778748e-06, "loss": 2.057, "step": 25360 }, { "epoch": 0.5773388252964067, "grad_norm": 157.0, "learning_rate": 8.626230726360765e-06, "loss": 1.3398, "step": 25370 }, { "epoch": 0.5775663928270715, "grad_norm": 73.5, "learning_rate": 8.621586475942784e-06, "loss": 1.3099, "step": 25380 }, { "epoch": 0.5777939603577361, "grad_norm": 308.0, "learning_rate": 8.616942225524801e-06, "loss": 0.6419, "step": 25390 }, { "epoch": 0.5780215278884009, "grad_norm": 0.06640625, "learning_rate": 8.612297975106818e-06, "loss": 0.297, "step": 25400 }, { "epoch": 0.5782490954190657, "grad_norm": 99.5, "learning_rate": 8.607653724688835e-06, "loss": 0.757, "step": 25410 }, { "epoch": 0.5784766629497303, "grad_norm": 220.0, "learning_rate": 8.603009474270854e-06, "loss": 1.2253, "step": 25420 }, { "epoch": 0.5787042304803951, "grad_norm": 0.09228515625, "learning_rate": 8.598365223852871e-06, "loss": 0.9792, "step": 25430 }, { "epoch": 0.5789317980110598, "grad_norm": 956.0, "learning_rate": 8.593720973434888e-06, "loss": 0.6888, "step": 25440 }, { "epoch": 0.5791593655417245, "grad_norm": 350.0, "learning_rate": 8.589076723016905e-06, "loss": 1.1452, "step": 25450 }, { "epoch": 0.5793869330723892, "grad_norm": 0.000278472900390625, "learning_rate": 8.584432472598924e-06, "loss": 0.5728, "step": 25460 }, { "epoch": 0.579614500603054, "grad_norm": 386.0, "learning_rate": 8.579788222180941e-06, "loss": 0.5878, "step": 25470 }, { "epoch": 0.5798420681337186, "grad_norm": 109.5, "learning_rate": 8.575143971762958e-06, "loss": 0.8491, "step": 25480 }, { "epoch": 0.5800696356643834, "grad_norm": 280.0, "learning_rate": 8.570499721344977e-06, "loss": 0.8904, "step": 25490 }, { "epoch": 0.5802972031950482, "grad_norm": 79.0, "learning_rate": 8.565855470926992e-06, "loss": 0.7012, "step": 25500 }, { "epoch": 0.5805247707257128, "grad_norm": 0.71875, "learning_rate": 8.56121122050901e-06, "loss": 0.169, "step": 25510 }, { "epoch": 0.5807523382563776, "grad_norm": 0.0191650390625, "learning_rate": 8.556566970091028e-06, "loss": 1.1218, "step": 25520 }, { "epoch": 0.5809799057870423, "grad_norm": 180.0, "learning_rate": 8.551922719673047e-06, "loss": 0.4839, "step": 25530 }, { "epoch": 0.5812074733177071, "grad_norm": 83.0, "learning_rate": 8.547278469255062e-06, "loss": 0.744, "step": 25540 }, { "epoch": 0.5814350408483717, "grad_norm": 62.0, "learning_rate": 8.54263421883708e-06, "loss": 1.1455, "step": 25550 }, { "epoch": 0.5816626083790365, "grad_norm": 3.5625, "learning_rate": 8.537989968419098e-06, "loss": 0.9934, "step": 25560 }, { "epoch": 0.5818901759097012, "grad_norm": 0.00080108642578125, "learning_rate": 8.533345718001115e-06, "loss": 0.8347, "step": 25570 }, { "epoch": 0.5821177434403659, "grad_norm": 100.0, "learning_rate": 8.528701467583132e-06, "loss": 2.2822, "step": 25580 }, { "epoch": 0.5823453109710307, "grad_norm": 74.5, "learning_rate": 8.52405721716515e-06, "loss": 0.6918, "step": 25590 }, { "epoch": 0.5825728785016954, "grad_norm": 147.0, "learning_rate": 8.519412966747168e-06, "loss": 1.4254, "step": 25600 }, { "epoch": 0.5828004460323601, "grad_norm": 0.44921875, "learning_rate": 8.514768716329185e-06, "loss": 0.1297, "step": 25610 }, { "epoch": 0.5830280135630248, "grad_norm": 40.75, "learning_rate": 8.510124465911202e-06, "loss": 1.1366, "step": 25620 }, { "epoch": 0.5832555810936896, "grad_norm": 167.0, "learning_rate": 8.50548021549322e-06, "loss": 0.3697, "step": 25630 }, { "epoch": 0.5834831486243542, "grad_norm": 166.0, "learning_rate": 8.500835965075238e-06, "loss": 1.2442, "step": 25640 }, { "epoch": 0.583710716155019, "grad_norm": 44.75, "learning_rate": 8.496191714657255e-06, "loss": 0.7065, "step": 25650 }, { "epoch": 0.5839382836856837, "grad_norm": 107.5, "learning_rate": 8.491547464239272e-06, "loss": 1.1518, "step": 25660 }, { "epoch": 0.5841658512163485, "grad_norm": 0.04541015625, "learning_rate": 8.48690321382129e-06, "loss": 0.2402, "step": 25670 }, { "epoch": 0.5843934187470132, "grad_norm": 78.5, "learning_rate": 8.482258963403308e-06, "loss": 1.0734, "step": 25680 }, { "epoch": 0.5846209862776779, "grad_norm": 70.5, "learning_rate": 8.477614712985325e-06, "loss": 0.5783, "step": 25690 }, { "epoch": 0.5848485538083427, "grad_norm": 3.765625, "learning_rate": 8.472970462567343e-06, "loss": 0.4745, "step": 25700 }, { "epoch": 0.5850761213390073, "grad_norm": 142.0, "learning_rate": 8.468326212149359e-06, "loss": 1.2366, "step": 25710 }, { "epoch": 0.5853036888696721, "grad_norm": 0.0035552978515625, "learning_rate": 8.463681961731377e-06, "loss": 0.561, "step": 25720 }, { "epoch": 0.5855312564003368, "grad_norm": 0.00244140625, "learning_rate": 8.459037711313394e-06, "loss": 0.9686, "step": 25730 }, { "epoch": 0.5857588239310015, "grad_norm": 6.375, "learning_rate": 8.454393460895413e-06, "loss": 1.2211, "step": 25740 }, { "epoch": 0.5859863914616662, "grad_norm": 48.0, "learning_rate": 8.449749210477429e-06, "loss": 1.0324, "step": 25750 }, { "epoch": 0.586213958992331, "grad_norm": 68.5, "learning_rate": 8.445104960059447e-06, "loss": 0.3552, "step": 25760 }, { "epoch": 0.5864415265229957, "grad_norm": 466.0, "learning_rate": 8.440460709641464e-06, "loss": 0.7302, "step": 25770 }, { "epoch": 0.5866690940536604, "grad_norm": 108.0, "learning_rate": 8.435816459223481e-06, "loss": 0.7708, "step": 25780 }, { "epoch": 0.5868966615843252, "grad_norm": 150.0, "learning_rate": 8.431172208805499e-06, "loss": 1.062, "step": 25790 }, { "epoch": 0.5871242291149898, "grad_norm": 241.0, "learning_rate": 8.426527958387517e-06, "loss": 0.975, "step": 25800 }, { "epoch": 0.5873517966456546, "grad_norm": 0.0137939453125, "learning_rate": 8.421883707969534e-06, "loss": 1.0863, "step": 25810 }, { "epoch": 0.5875793641763193, "grad_norm": 185.0, "learning_rate": 8.417239457551551e-06, "loss": 0.5952, "step": 25820 }, { "epoch": 0.5878069317069841, "grad_norm": 0.0028533935546875, "learning_rate": 8.412595207133568e-06, "loss": 0.7066, "step": 25830 }, { "epoch": 0.5880344992376487, "grad_norm": 42.25, "learning_rate": 8.407950956715587e-06, "loss": 0.5089, "step": 25840 }, { "epoch": 0.5882620667683135, "grad_norm": 676.0, "learning_rate": 8.403306706297604e-06, "loss": 1.4851, "step": 25850 }, { "epoch": 0.5884896342989783, "grad_norm": 159.0, "learning_rate": 8.398662455879621e-06, "loss": 1.3547, "step": 25860 }, { "epoch": 0.5887172018296429, "grad_norm": 0.29296875, "learning_rate": 8.39401820546164e-06, "loss": 0.1918, "step": 25870 }, { "epoch": 0.5889447693603077, "grad_norm": 14.8125, "learning_rate": 8.389373955043657e-06, "loss": 1.2163, "step": 25880 }, { "epoch": 0.5891723368909724, "grad_norm": 91.5, "learning_rate": 8.384729704625674e-06, "loss": 1.3602, "step": 25890 }, { "epoch": 0.5893999044216371, "grad_norm": 101.0, "learning_rate": 8.380085454207691e-06, "loss": 0.5255, "step": 25900 }, { "epoch": 0.5896274719523018, "grad_norm": 67.0, "learning_rate": 8.37544120378971e-06, "loss": 0.2264, "step": 25910 }, { "epoch": 0.5898550394829666, "grad_norm": 109.5, "learning_rate": 8.370796953371727e-06, "loss": 0.3729, "step": 25920 }, { "epoch": 0.5900826070136312, "grad_norm": 22.375, "learning_rate": 8.366152702953744e-06, "loss": 0.5509, "step": 25930 }, { "epoch": 0.590310174544296, "grad_norm": 63.5, "learning_rate": 8.361508452535761e-06, "loss": 0.797, "step": 25940 }, { "epoch": 0.5905377420749608, "grad_norm": 68.5, "learning_rate": 8.35686420211778e-06, "loss": 1.002, "step": 25950 }, { "epoch": 0.5907653096056255, "grad_norm": 152.0, "learning_rate": 8.352219951699795e-06, "loss": 0.6576, "step": 25960 }, { "epoch": 0.5909928771362902, "grad_norm": 26.625, "learning_rate": 8.347575701281814e-06, "loss": 0.8116, "step": 25970 }, { "epoch": 0.5912204446669549, "grad_norm": 45.75, "learning_rate": 8.342931450863831e-06, "loss": 0.7567, "step": 25980 }, { "epoch": 0.5914480121976197, "grad_norm": 272.0, "learning_rate": 8.33828720044585e-06, "loss": 0.6953, "step": 25990 }, { "epoch": 0.5916755797282843, "grad_norm": 0.0026092529296875, "learning_rate": 8.333642950027865e-06, "loss": 1.1816, "step": 26000 }, { "epoch": 0.5919031472589491, "grad_norm": 132.0, "learning_rate": 8.328998699609884e-06, "loss": 0.12, "step": 26010 }, { "epoch": 0.5921307147896138, "grad_norm": 62.75, "learning_rate": 8.324354449191901e-06, "loss": 0.5977, "step": 26020 }, { "epoch": 0.5923582823202785, "grad_norm": 648.0, "learning_rate": 8.319710198773918e-06, "loss": 0.9235, "step": 26030 }, { "epoch": 0.5925858498509433, "grad_norm": 1.4765625, "learning_rate": 8.315065948355937e-06, "loss": 0.7423, "step": 26040 }, { "epoch": 0.592813417381608, "grad_norm": 0.0004673004150390625, "learning_rate": 8.310421697937954e-06, "loss": 0.4735, "step": 26050 }, { "epoch": 0.5930409849122728, "grad_norm": 2.859375, "learning_rate": 8.305777447519971e-06, "loss": 0.5857, "step": 26060 }, { "epoch": 0.5932685524429374, "grad_norm": 0.01031494140625, "learning_rate": 8.301133197101988e-06, "loss": 1.1062, "step": 26070 }, { "epoch": 0.5934961199736022, "grad_norm": 0.0025787353515625, "learning_rate": 8.296488946684007e-06, "loss": 0.9363, "step": 26080 }, { "epoch": 0.5937236875042669, "grad_norm": 0.006134033203125, "learning_rate": 8.291844696266024e-06, "loss": 0.8896, "step": 26090 }, { "epoch": 0.5939512550349316, "grad_norm": 0.002288818359375, "learning_rate": 8.28720044584804e-06, "loss": 0.4354, "step": 26100 }, { "epoch": 0.5941788225655963, "grad_norm": 322.0, "learning_rate": 8.282556195430058e-06, "loss": 0.6099, "step": 26110 }, { "epoch": 0.5944063900962611, "grad_norm": 87.5, "learning_rate": 8.277911945012077e-06, "loss": 0.5458, "step": 26120 }, { "epoch": 0.5946339576269258, "grad_norm": 19.375, "learning_rate": 8.273267694594094e-06, "loss": 0.9316, "step": 26130 }, { "epoch": 0.5948615251575905, "grad_norm": 0.494140625, "learning_rate": 8.26862344417611e-06, "loss": 0.4042, "step": 26140 }, { "epoch": 0.5950890926882553, "grad_norm": 106.5, "learning_rate": 8.263979193758128e-06, "loss": 1.4761, "step": 26150 }, { "epoch": 0.5953166602189199, "grad_norm": 80.5, "learning_rate": 8.259334943340146e-06, "loss": 0.6948, "step": 26160 }, { "epoch": 0.5955442277495847, "grad_norm": 0.00019741058349609375, "learning_rate": 8.254690692922162e-06, "loss": 1.0418, "step": 26170 }, { "epoch": 0.5957717952802494, "grad_norm": 0.01129150390625, "learning_rate": 8.25004644250418e-06, "loss": 0.2494, "step": 26180 }, { "epoch": 0.5959993628109141, "grad_norm": 185.0, "learning_rate": 8.245402192086198e-06, "loss": 0.7833, "step": 26190 }, { "epoch": 0.5962269303415788, "grad_norm": 4.34375, "learning_rate": 8.240757941668216e-06, "loss": 0.9126, "step": 26200 }, { "epoch": 0.5964544978722436, "grad_norm": 0.007171630859375, "learning_rate": 8.236113691250233e-06, "loss": 0.6116, "step": 26210 }, { "epoch": 0.5966820654029084, "grad_norm": 0.002166748046875, "learning_rate": 8.23146944083225e-06, "loss": 0.5816, "step": 26220 }, { "epoch": 0.596909632933573, "grad_norm": 96.0, "learning_rate": 8.226825190414268e-06, "loss": 0.4859, "step": 26230 }, { "epoch": 0.5971372004642378, "grad_norm": 2.90625, "learning_rate": 8.222180939996285e-06, "loss": 1.0017, "step": 26240 }, { "epoch": 0.5973647679949025, "grad_norm": 86.5, "learning_rate": 8.217536689578303e-06, "loss": 0.6071, "step": 26250 }, { "epoch": 0.5975923355255672, "grad_norm": 262.0, "learning_rate": 8.21289243916032e-06, "loss": 0.6, "step": 26260 }, { "epoch": 0.5978199030562319, "grad_norm": 107.5, "learning_rate": 8.208248188742337e-06, "loss": 0.7266, "step": 26270 }, { "epoch": 0.5980474705868967, "grad_norm": 0.001556396484375, "learning_rate": 8.203603938324355e-06, "loss": 0.501, "step": 26280 }, { "epoch": 0.5982750381175614, "grad_norm": 3.3125, "learning_rate": 8.198959687906373e-06, "loss": 0.6813, "step": 26290 }, { "epoch": 0.5985026056482261, "grad_norm": 0.32421875, "learning_rate": 8.19431543748839e-06, "loss": 1.4521, "step": 26300 }, { "epoch": 0.5987301731788909, "grad_norm": 0.00118255615234375, "learning_rate": 8.189671187070407e-06, "loss": 0.7628, "step": 26310 }, { "epoch": 0.5989577407095555, "grad_norm": 150.0, "learning_rate": 8.185026936652424e-06, "loss": 1.2952, "step": 26320 }, { "epoch": 0.5991853082402203, "grad_norm": 159.0, "learning_rate": 8.180382686234443e-06, "loss": 1.0989, "step": 26330 }, { "epoch": 0.599412875770885, "grad_norm": 0.00168609619140625, "learning_rate": 8.17573843581646e-06, "loss": 0.6091, "step": 26340 }, { "epoch": 0.5996404433015498, "grad_norm": 84.0, "learning_rate": 8.171094185398477e-06, "loss": 0.52, "step": 26350 }, { "epoch": 0.5998680108322144, "grad_norm": 30.875, "learning_rate": 8.166449934980494e-06, "loss": 1.7106, "step": 26360 }, { "epoch": 0.6000955783628792, "grad_norm": 54.75, "learning_rate": 8.161805684562513e-06, "loss": 0.7871, "step": 26370 }, { "epoch": 0.600323145893544, "grad_norm": 167.0, "learning_rate": 8.15716143414453e-06, "loss": 1.188, "step": 26380 }, { "epoch": 0.6005507134242086, "grad_norm": 0.1494140625, "learning_rate": 8.152517183726547e-06, "loss": 1.3993, "step": 26390 }, { "epoch": 0.6007782809548734, "grad_norm": 0.000614166259765625, "learning_rate": 8.147872933308564e-06, "loss": 1.2213, "step": 26400 }, { "epoch": 0.6010058484855381, "grad_norm": 268.0, "learning_rate": 8.143228682890583e-06, "loss": 0.7938, "step": 26410 }, { "epoch": 0.6012334160162028, "grad_norm": 130.0, "learning_rate": 8.1385844324726e-06, "loss": 1.0251, "step": 26420 }, { "epoch": 0.6014609835468675, "grad_norm": 252.0, "learning_rate": 8.133940182054617e-06, "loss": 1.5981, "step": 26430 }, { "epoch": 0.6016885510775323, "grad_norm": 0.004791259765625, "learning_rate": 8.129295931636634e-06, "loss": 0.8626, "step": 26440 }, { "epoch": 0.6019161186081969, "grad_norm": 104.5, "learning_rate": 8.124651681218651e-06, "loss": 1.0539, "step": 26450 }, { "epoch": 0.6021436861388617, "grad_norm": 132.0, "learning_rate": 8.12000743080067e-06, "loss": 0.3388, "step": 26460 }, { "epoch": 0.6023712536695265, "grad_norm": 0.0177001953125, "learning_rate": 8.115363180382687e-06, "loss": 1.4597, "step": 26470 }, { "epoch": 0.6025988212001911, "grad_norm": 105.5, "learning_rate": 8.110718929964704e-06, "loss": 0.9583, "step": 26480 }, { "epoch": 0.6028263887308559, "grad_norm": 1.1328125, "learning_rate": 8.106074679546721e-06, "loss": 1.4814, "step": 26490 }, { "epoch": 0.6030539562615206, "grad_norm": 18.5, "learning_rate": 8.10143042912874e-06, "loss": 0.6068, "step": 26500 }, { "epoch": 0.6032815237921854, "grad_norm": 0.515625, "learning_rate": 8.096786178710757e-06, "loss": 0.5982, "step": 26510 }, { "epoch": 0.60350909132285, "grad_norm": 0.0012054443359375, "learning_rate": 8.092141928292774e-06, "loss": 1.2551, "step": 26520 }, { "epoch": 0.6037366588535148, "grad_norm": 53.75, "learning_rate": 8.087497677874791e-06, "loss": 0.6468, "step": 26530 }, { "epoch": 0.6039642263841795, "grad_norm": 27.125, "learning_rate": 8.08285342745681e-06, "loss": 0.4572, "step": 26540 }, { "epoch": 0.6041917939148442, "grad_norm": 194.0, "learning_rate": 8.078209177038827e-06, "loss": 0.6716, "step": 26550 }, { "epoch": 0.604419361445509, "grad_norm": 220.0, "learning_rate": 8.073564926620844e-06, "loss": 0.7758, "step": 26560 }, { "epoch": 0.6046469289761737, "grad_norm": 0.90234375, "learning_rate": 8.068920676202861e-06, "loss": 0.708, "step": 26570 }, { "epoch": 0.6048744965068384, "grad_norm": 52.25, "learning_rate": 8.06427642578488e-06, "loss": 0.8855, "step": 26580 }, { "epoch": 0.6051020640375031, "grad_norm": 0.00390625, "learning_rate": 8.059632175366897e-06, "loss": 0.6922, "step": 26590 }, { "epoch": 0.6053296315681679, "grad_norm": 96.0, "learning_rate": 8.054987924948914e-06, "loss": 0.6089, "step": 26600 }, { "epoch": 0.6055571990988325, "grad_norm": 60.25, "learning_rate": 8.050343674530931e-06, "loss": 0.2634, "step": 26610 }, { "epoch": 0.6057847666294973, "grad_norm": 171.0, "learning_rate": 8.04569942411295e-06, "loss": 0.6496, "step": 26620 }, { "epoch": 0.606012334160162, "grad_norm": 0.04736328125, "learning_rate": 8.041055173694967e-06, "loss": 0.3781, "step": 26630 }, { "epoch": 0.6062399016908268, "grad_norm": 185.0, "learning_rate": 8.036410923276984e-06, "loss": 0.6565, "step": 26640 }, { "epoch": 0.6064674692214915, "grad_norm": 0.0012054443359375, "learning_rate": 8.031766672859e-06, "loss": 0.5764, "step": 26650 }, { "epoch": 0.6066950367521562, "grad_norm": 93.0, "learning_rate": 8.027122422441018e-06, "loss": 0.3594, "step": 26660 }, { "epoch": 0.606922604282821, "grad_norm": 496.0, "learning_rate": 8.022478172023037e-06, "loss": 0.9532, "step": 26670 }, { "epoch": 0.6071501718134856, "grad_norm": 166.0, "learning_rate": 8.017833921605054e-06, "loss": 1.0604, "step": 26680 }, { "epoch": 0.6073777393441504, "grad_norm": 0.015380859375, "learning_rate": 8.013189671187072e-06, "loss": 0.5362, "step": 26690 }, { "epoch": 0.6076053068748151, "grad_norm": 0.019287109375, "learning_rate": 8.008545420769088e-06, "loss": 0.8211, "step": 26700 }, { "epoch": 0.6078328744054798, "grad_norm": 270.0, "learning_rate": 8.003901170351107e-06, "loss": 0.5774, "step": 26710 }, { "epoch": 0.6080604419361445, "grad_norm": 0.3984375, "learning_rate": 7.999256919933124e-06, "loss": 0.5301, "step": 26720 }, { "epoch": 0.6082880094668093, "grad_norm": 61.0, "learning_rate": 7.99461266951514e-06, "loss": 0.5963, "step": 26730 }, { "epoch": 0.608515576997474, "grad_norm": 0.341796875, "learning_rate": 7.989968419097158e-06, "loss": 0.8017, "step": 26740 }, { "epoch": 0.6087431445281387, "grad_norm": 83.0, "learning_rate": 7.985324168679176e-06, "loss": 1.4949, "step": 26750 }, { "epoch": 0.6089707120588035, "grad_norm": 0.00579833984375, "learning_rate": 7.980679918261194e-06, "loss": 0.6625, "step": 26760 }, { "epoch": 0.6091982795894682, "grad_norm": 0.000946044921875, "learning_rate": 7.97603566784321e-06, "loss": 1.6605, "step": 26770 }, { "epoch": 0.6094258471201329, "grad_norm": 119.5, "learning_rate": 7.971391417425228e-06, "loss": 1.5189, "step": 26780 }, { "epoch": 0.6096534146507976, "grad_norm": 0.003204345703125, "learning_rate": 7.966747167007246e-06, "loss": 1.4303, "step": 26790 }, { "epoch": 0.6098809821814624, "grad_norm": 112.0, "learning_rate": 7.962102916589263e-06, "loss": 1.1434, "step": 26800 }, { "epoch": 0.610108549712127, "grad_norm": 6.75, "learning_rate": 7.95745866617128e-06, "loss": 0.5335, "step": 26810 }, { "epoch": 0.6103361172427918, "grad_norm": 0.9453125, "learning_rate": 7.952814415753298e-06, "loss": 1.1048, "step": 26820 }, { "epoch": 0.6105636847734566, "grad_norm": 222.0, "learning_rate": 7.948170165335316e-06, "loss": 0.8536, "step": 26830 }, { "epoch": 0.6107912523041212, "grad_norm": 63.75, "learning_rate": 7.943525914917333e-06, "loss": 0.445, "step": 26840 }, { "epoch": 0.611018819834786, "grad_norm": 104.0, "learning_rate": 7.93888166449935e-06, "loss": 0.6573, "step": 26850 }, { "epoch": 0.6112463873654507, "grad_norm": 38.5, "learning_rate": 7.934237414081367e-06, "loss": 0.6572, "step": 26860 }, { "epoch": 0.6114739548961154, "grad_norm": 216.0, "learning_rate": 7.929593163663384e-06, "loss": 1.1735, "step": 26870 }, { "epoch": 0.6117015224267801, "grad_norm": 21.5, "learning_rate": 7.924948913245403e-06, "loss": 0.8914, "step": 26880 }, { "epoch": 0.6119290899574449, "grad_norm": 160.0, "learning_rate": 7.92030466282742e-06, "loss": 0.4202, "step": 26890 }, { "epoch": 0.6121566574881095, "grad_norm": 227.0, "learning_rate": 7.915660412409439e-06, "loss": 0.626, "step": 26900 }, { "epoch": 0.6123842250187743, "grad_norm": 0.00048828125, "learning_rate": 7.911016161991454e-06, "loss": 0.4403, "step": 26910 }, { "epoch": 0.6126117925494391, "grad_norm": 0.369140625, "learning_rate": 7.906371911573473e-06, "loss": 0.96, "step": 26920 }, { "epoch": 0.6128393600801038, "grad_norm": 0.091796875, "learning_rate": 7.90172766115549e-06, "loss": 0.9964, "step": 26930 }, { "epoch": 0.6130669276107685, "grad_norm": 144.0, "learning_rate": 7.897083410737507e-06, "loss": 0.7726, "step": 26940 }, { "epoch": 0.6132944951414332, "grad_norm": 0.00286865234375, "learning_rate": 7.892439160319524e-06, "loss": 0.2591, "step": 26950 }, { "epoch": 0.613522062672098, "grad_norm": 95.0, "learning_rate": 7.887794909901543e-06, "loss": 0.5821, "step": 26960 }, { "epoch": 0.6137496302027626, "grad_norm": 312.0, "learning_rate": 7.88315065948356e-06, "loss": 1.7937, "step": 26970 }, { "epoch": 0.6139771977334274, "grad_norm": 5.9375, "learning_rate": 7.878506409065577e-06, "loss": 0.4598, "step": 26980 }, { "epoch": 0.6142047652640921, "grad_norm": 102.5, "learning_rate": 7.873862158647594e-06, "loss": 0.5729, "step": 26990 }, { "epoch": 0.6144323327947568, "grad_norm": 57.5, "learning_rate": 7.869217908229613e-06, "loss": 1.1091, "step": 27000 }, { "epoch": 0.6146599003254216, "grad_norm": 0.031982421875, "learning_rate": 7.86457365781163e-06, "loss": 0.596, "step": 27010 }, { "epoch": 0.6148874678560863, "grad_norm": 0.0029144287109375, "learning_rate": 7.859929407393647e-06, "loss": 0.7869, "step": 27020 }, { "epoch": 0.615115035386751, "grad_norm": 102.5, "learning_rate": 7.855285156975664e-06, "loss": 1.0048, "step": 27030 }, { "epoch": 0.6153426029174157, "grad_norm": 29.875, "learning_rate": 7.850640906557683e-06, "loss": 0.707, "step": 27040 }, { "epoch": 0.6155701704480805, "grad_norm": 129.0, "learning_rate": 7.8459966561397e-06, "loss": 0.6457, "step": 27050 }, { "epoch": 0.6157977379787452, "grad_norm": 96.0, "learning_rate": 7.841352405721717e-06, "loss": 0.5181, "step": 27060 }, { "epoch": 0.6160253055094099, "grad_norm": 510.0, "learning_rate": 7.836708155303736e-06, "loss": 0.947, "step": 27070 }, { "epoch": 0.6162528730400746, "grad_norm": 0.0001468658447265625, "learning_rate": 7.832063904885751e-06, "loss": 0.7888, "step": 27080 }, { "epoch": 0.6164804405707394, "grad_norm": 1.5078125, "learning_rate": 7.82741965446777e-06, "loss": 0.1453, "step": 27090 }, { "epoch": 0.6167080081014041, "grad_norm": 0.9609375, "learning_rate": 7.822775404049787e-06, "loss": 0.8685, "step": 27100 }, { "epoch": 0.6169355756320688, "grad_norm": 392.0, "learning_rate": 7.818131153631806e-06, "loss": 1.1487, "step": 27110 }, { "epoch": 0.6171631431627336, "grad_norm": 0.0439453125, "learning_rate": 7.813486903213821e-06, "loss": 0.7535, "step": 27120 }, { "epoch": 0.6173907106933982, "grad_norm": 60.5, "learning_rate": 7.80884265279584e-06, "loss": 0.736, "step": 27130 }, { "epoch": 0.617618278224063, "grad_norm": 266.0, "learning_rate": 7.804198402377857e-06, "loss": 0.8922, "step": 27140 }, { "epoch": 0.6178458457547277, "grad_norm": 83.5, "learning_rate": 7.799554151959874e-06, "loss": 1.2095, "step": 27150 }, { "epoch": 0.6180734132853924, "grad_norm": 0.67578125, "learning_rate": 7.794909901541891e-06, "loss": 0.8165, "step": 27160 }, { "epoch": 0.6183009808160571, "grad_norm": 422.0, "learning_rate": 7.79026565112391e-06, "loss": 1.531, "step": 27170 }, { "epoch": 0.6185285483467219, "grad_norm": 70.5, "learning_rate": 7.785621400705927e-06, "loss": 0.2206, "step": 27180 }, { "epoch": 0.6187561158773867, "grad_norm": 326.0, "learning_rate": 7.780977150287944e-06, "loss": 0.8157, "step": 27190 }, { "epoch": 0.6189836834080513, "grad_norm": 280.0, "learning_rate": 7.776332899869961e-06, "loss": 0.7868, "step": 27200 }, { "epoch": 0.6192112509387161, "grad_norm": 0.000957489013671875, "learning_rate": 7.77168864945198e-06, "loss": 0.7919, "step": 27210 }, { "epoch": 0.6194388184693808, "grad_norm": 29.875, "learning_rate": 7.767044399033997e-06, "loss": 0.5761, "step": 27220 }, { "epoch": 0.6196663860000455, "grad_norm": 143.0, "learning_rate": 7.762400148616014e-06, "loss": 1.1154, "step": 27230 }, { "epoch": 0.6198939535307102, "grad_norm": 260.0, "learning_rate": 7.757755898198032e-06, "loss": 0.3777, "step": 27240 }, { "epoch": 0.620121521061375, "grad_norm": 1.453125, "learning_rate": 7.75311164778005e-06, "loss": 1.1455, "step": 27250 }, { "epoch": 0.6203490885920396, "grad_norm": 0.03173828125, "learning_rate": 7.748467397362067e-06, "loss": 1.0765, "step": 27260 }, { "epoch": 0.6205766561227044, "grad_norm": 0.01220703125, "learning_rate": 7.743823146944084e-06, "loss": 0.9248, "step": 27270 }, { "epoch": 0.6208042236533692, "grad_norm": 167.0, "learning_rate": 7.739178896526102e-06, "loss": 0.5301, "step": 27280 }, { "epoch": 0.6210317911840338, "grad_norm": 251.0, "learning_rate": 7.734534646108118e-06, "loss": 1.9197, "step": 27290 }, { "epoch": 0.6212593587146986, "grad_norm": 494.0, "learning_rate": 7.729890395690136e-06, "loss": 0.4547, "step": 27300 }, { "epoch": 0.6214869262453633, "grad_norm": 95.0, "learning_rate": 7.725246145272154e-06, "loss": 0.5393, "step": 27310 }, { "epoch": 0.621714493776028, "grad_norm": 116.5, "learning_rate": 7.720601894854172e-06, "loss": 1.5258, "step": 27320 }, { "epoch": 0.6219420613066927, "grad_norm": 0.003387451171875, "learning_rate": 7.715957644436188e-06, "loss": 0.5219, "step": 27330 }, { "epoch": 0.6221696288373575, "grad_norm": 71.0, "learning_rate": 7.711313394018206e-06, "loss": 1.0385, "step": 27340 }, { "epoch": 0.6223971963680223, "grad_norm": 37.75, "learning_rate": 7.706669143600223e-06, "loss": 0.8267, "step": 27350 }, { "epoch": 0.6226247638986869, "grad_norm": 12.4375, "learning_rate": 7.70202489318224e-06, "loss": 0.1927, "step": 27360 }, { "epoch": 0.6228523314293517, "grad_norm": 116.0, "learning_rate": 7.697380642764258e-06, "loss": 0.8197, "step": 27370 }, { "epoch": 0.6230798989600164, "grad_norm": 73.0, "learning_rate": 7.692736392346276e-06, "loss": 0.3026, "step": 27380 }, { "epoch": 0.6233074664906811, "grad_norm": 55.0, "learning_rate": 7.688092141928293e-06, "loss": 0.8836, "step": 27390 }, { "epoch": 0.6235350340213458, "grad_norm": 0.12451171875, "learning_rate": 7.68344789151031e-06, "loss": 0.3186, "step": 27400 }, { "epoch": 0.6237626015520106, "grad_norm": 0.123046875, "learning_rate": 7.67880364109233e-06, "loss": 0.8208, "step": 27410 }, { "epoch": 0.6239901690826752, "grad_norm": 520.0, "learning_rate": 7.674159390674346e-06, "loss": 0.9229, "step": 27420 }, { "epoch": 0.62421773661334, "grad_norm": 912.0, "learning_rate": 7.669515140256363e-06, "loss": 0.8222, "step": 27430 }, { "epoch": 0.6244453041440048, "grad_norm": 100.5, "learning_rate": 7.66487088983838e-06, "loss": 0.5477, "step": 27440 }, { "epoch": 0.6246728716746694, "grad_norm": 139.0, "learning_rate": 7.660226639420399e-06, "loss": 0.7356, "step": 27450 }, { "epoch": 0.6249004392053342, "grad_norm": 19.375, "learning_rate": 7.655582389002416e-06, "loss": 0.4427, "step": 27460 }, { "epoch": 0.6251280067359989, "grad_norm": 187.0, "learning_rate": 7.650938138584433e-06, "loss": 0.2093, "step": 27470 }, { "epoch": 0.6253555742666637, "grad_norm": 0.0030670166015625, "learning_rate": 7.64629388816645e-06, "loss": 0.4935, "step": 27480 }, { "epoch": 0.6255831417973283, "grad_norm": 0.09130859375, "learning_rate": 7.641649637748469e-06, "loss": 0.3875, "step": 27490 }, { "epoch": 0.6258107093279931, "grad_norm": 143.0, "learning_rate": 7.637005387330484e-06, "loss": 0.9687, "step": 27500 }, { "epoch": 0.6260382768586578, "grad_norm": 138.0, "learning_rate": 7.632361136912503e-06, "loss": 1.1736, "step": 27510 }, { "epoch": 0.6262658443893225, "grad_norm": 0.76953125, "learning_rate": 7.62771688649452e-06, "loss": 0.8334, "step": 27520 }, { "epoch": 0.6264934119199873, "grad_norm": 0.0771484375, "learning_rate": 7.623072636076538e-06, "loss": 0.9366, "step": 27530 }, { "epoch": 0.626720979450652, "grad_norm": 16.25, "learning_rate": 7.618428385658555e-06, "loss": 0.6488, "step": 27540 }, { "epoch": 0.6269485469813167, "grad_norm": 46.75, "learning_rate": 7.613784135240573e-06, "loss": 0.5018, "step": 27550 }, { "epoch": 0.6271761145119814, "grad_norm": 0.00634765625, "learning_rate": 7.609139884822591e-06, "loss": 0.478, "step": 27560 }, { "epoch": 0.6274036820426462, "grad_norm": 28.625, "learning_rate": 7.604495634404607e-06, "loss": 0.7136, "step": 27570 }, { "epoch": 0.6276312495733108, "grad_norm": 150.0, "learning_rate": 7.599851383986625e-06, "loss": 1.0769, "step": 27580 }, { "epoch": 0.6278588171039756, "grad_norm": 160.0, "learning_rate": 7.595207133568643e-06, "loss": 1.7043, "step": 27590 }, { "epoch": 0.6280863846346403, "grad_norm": 143.0, "learning_rate": 7.590562883150661e-06, "loss": 2.028, "step": 27600 }, { "epoch": 0.6283139521653051, "grad_norm": 88.0, "learning_rate": 7.585918632732677e-06, "loss": 0.3123, "step": 27610 }, { "epoch": 0.6285415196959698, "grad_norm": 78.5, "learning_rate": 7.581274382314695e-06, "loss": 2.3333, "step": 27620 }, { "epoch": 0.6287690872266345, "grad_norm": 120.5, "learning_rate": 7.576630131896713e-06, "loss": 0.5039, "step": 27630 }, { "epoch": 0.6289966547572993, "grad_norm": 0.0002994537353515625, "learning_rate": 7.571985881478729e-06, "loss": 1.1203, "step": 27640 }, { "epoch": 0.6292242222879639, "grad_norm": 0.0021820068359375, "learning_rate": 7.567341631060747e-06, "loss": 1.0932, "step": 27650 }, { "epoch": 0.6294517898186287, "grad_norm": 185.0, "learning_rate": 7.562697380642765e-06, "loss": 0.8792, "step": 27660 }, { "epoch": 0.6296793573492934, "grad_norm": 213.0, "learning_rate": 7.558053130224783e-06, "loss": 0.643, "step": 27670 }, { "epoch": 0.6299069248799581, "grad_norm": 50.75, "learning_rate": 7.5534088798068e-06, "loss": 0.6564, "step": 27680 }, { "epoch": 0.6301344924106228, "grad_norm": 0.0057373046875, "learning_rate": 7.548764629388817e-06, "loss": 0.9343, "step": 27690 }, { "epoch": 0.6303620599412876, "grad_norm": 91.5, "learning_rate": 7.544120378970835e-06, "loss": 1.4397, "step": 27700 }, { "epoch": 0.6305896274719524, "grad_norm": 0.0086669921875, "learning_rate": 7.539476128552852e-06, "loss": 0.734, "step": 27710 }, { "epoch": 0.630817195002617, "grad_norm": 118.5, "learning_rate": 7.53483187813487e-06, "loss": 0.8688, "step": 27720 }, { "epoch": 0.6310447625332818, "grad_norm": 27.25, "learning_rate": 7.530187627716888e-06, "loss": 0.5161, "step": 27730 }, { "epoch": 0.6312723300639465, "grad_norm": 146.0, "learning_rate": 7.525543377298905e-06, "loss": 0.4097, "step": 27740 }, { "epoch": 0.6314998975946112, "grad_norm": 0.0115966796875, "learning_rate": 7.520899126880922e-06, "loss": 0.5369, "step": 27750 }, { "epoch": 0.6317274651252759, "grad_norm": 1.4140625, "learning_rate": 7.51625487646294e-06, "loss": 0.7905, "step": 27760 }, { "epoch": 0.6319550326559407, "grad_norm": 0.0263671875, "learning_rate": 7.5116106260449576e-06, "loss": 0.8762, "step": 27770 }, { "epoch": 0.6321826001866053, "grad_norm": 0.0181884765625, "learning_rate": 7.506966375626974e-06, "loss": 2.1919, "step": 27780 }, { "epoch": 0.6324101677172701, "grad_norm": 1.8046875, "learning_rate": 7.502322125208992e-06, "loss": 0.288, "step": 27790 }, { "epoch": 0.6326377352479349, "grad_norm": 193.0, "learning_rate": 7.4976778747910096e-06, "loss": 0.2423, "step": 27800 }, { "epoch": 0.6328653027785995, "grad_norm": 11.25, "learning_rate": 7.4930336243730275e-06, "loss": 0.424, "step": 27810 }, { "epoch": 0.6330928703092643, "grad_norm": 0.0126953125, "learning_rate": 7.488389373955044e-06, "loss": 0.2471, "step": 27820 }, { "epoch": 0.633320437839929, "grad_norm": 0.043701171875, "learning_rate": 7.4837451235370616e-06, "loss": 0.5126, "step": 27830 }, { "epoch": 0.6335480053705937, "grad_norm": 134.0, "learning_rate": 7.4791008731190795e-06, "loss": 1.4243, "step": 27840 }, { "epoch": 0.6337755729012584, "grad_norm": 46.5, "learning_rate": 7.4744566227010965e-06, "loss": 0.3606, "step": 27850 }, { "epoch": 0.6340031404319232, "grad_norm": 187.0, "learning_rate": 7.469812372283114e-06, "loss": 0.9013, "step": 27860 }, { "epoch": 0.6342307079625878, "grad_norm": 398.0, "learning_rate": 7.4651681218651315e-06, "loss": 0.925, "step": 27870 }, { "epoch": 0.6344582754932526, "grad_norm": 65.5, "learning_rate": 7.460523871447149e-06, "loss": 1.8576, "step": 27880 }, { "epoch": 0.6346858430239174, "grad_norm": 0.1640625, "learning_rate": 7.4558796210291664e-06, "loss": 0.9459, "step": 27890 }, { "epoch": 0.6349134105545821, "grad_norm": 0.62890625, "learning_rate": 7.451235370611184e-06, "loss": 0.7991, "step": 27900 }, { "epoch": 0.6351409780852468, "grad_norm": 68.5, "learning_rate": 7.446591120193201e-06, "loss": 1.2029, "step": 27910 }, { "epoch": 0.6353685456159115, "grad_norm": 0.003997802734375, "learning_rate": 7.4419468697752185e-06, "loss": 0.8635, "step": 27920 }, { "epoch": 0.6355961131465763, "grad_norm": 2.40625, "learning_rate": 7.437302619357236e-06, "loss": 1.5138, "step": 27930 }, { "epoch": 0.6358236806772409, "grad_norm": 135.0, "learning_rate": 7.432658368939254e-06, "loss": 1.0647, "step": 27940 }, { "epoch": 0.6360512482079057, "grad_norm": 147.0, "learning_rate": 7.428014118521272e-06, "loss": 1.0918, "step": 27950 }, { "epoch": 0.6362788157385704, "grad_norm": 10.375, "learning_rate": 7.423369868103288e-06, "loss": 0.837, "step": 27960 }, { "epoch": 0.6365063832692351, "grad_norm": 79.0, "learning_rate": 7.418725617685306e-06, "loss": 1.362, "step": 27970 }, { "epoch": 0.6367339507998999, "grad_norm": 86.5, "learning_rate": 7.414081367267324e-06, "loss": 0.5621, "step": 27980 }, { "epoch": 0.6369615183305646, "grad_norm": 91.0, "learning_rate": 7.40943711684934e-06, "loss": 1.5771, "step": 27990 }, { "epoch": 0.6371890858612294, "grad_norm": 101.0, "learning_rate": 7.404792866431358e-06, "loss": 1.1156, "step": 28000 }, { "epoch": 0.637416653391894, "grad_norm": 0.01336669921875, "learning_rate": 7.400148616013376e-06, "loss": 0.8309, "step": 28010 }, { "epoch": 0.6376442209225588, "grad_norm": 398.0, "learning_rate": 7.395504365595394e-06, "loss": 0.9556, "step": 28020 }, { "epoch": 0.6378717884532235, "grad_norm": 84.0, "learning_rate": 7.39086011517741e-06, "loss": 1.2638, "step": 28030 }, { "epoch": 0.6380993559838882, "grad_norm": 264.0, "learning_rate": 7.386215864759428e-06, "loss": 0.9675, "step": 28040 }, { "epoch": 0.6383269235145529, "grad_norm": 6.90625, "learning_rate": 7.381571614341446e-06, "loss": 1.3657, "step": 28050 }, { "epoch": 0.6385544910452177, "grad_norm": 0.0164794921875, "learning_rate": 7.376927363923463e-06, "loss": 0.6357, "step": 28060 }, { "epoch": 0.6387820585758824, "grad_norm": 0.033447265625, "learning_rate": 7.372283113505481e-06, "loss": 0.7065, "step": 28070 }, { "epoch": 0.6390096261065471, "grad_norm": 0.83984375, "learning_rate": 7.367638863087498e-06, "loss": 1.306, "step": 28080 }, { "epoch": 0.6392371936372119, "grad_norm": 183.0, "learning_rate": 7.362994612669516e-06, "loss": 1.5312, "step": 28090 }, { "epoch": 0.6394647611678765, "grad_norm": 238.0, "learning_rate": 7.358350362251533e-06, "loss": 1.1962, "step": 28100 }, { "epoch": 0.6396923286985413, "grad_norm": 174.0, "learning_rate": 7.353706111833551e-06, "loss": 1.8168, "step": 28110 }, { "epoch": 0.639919896229206, "grad_norm": 40.25, "learning_rate": 7.349061861415568e-06, "loss": 1.7208, "step": 28120 }, { "epoch": 0.6401474637598707, "grad_norm": 53.0, "learning_rate": 7.344417610997585e-06, "loss": 0.3827, "step": 28130 }, { "epoch": 0.6403750312905354, "grad_norm": 0.3046875, "learning_rate": 7.339773360579603e-06, "loss": 0.6288, "step": 28140 }, { "epoch": 0.6406025988212002, "grad_norm": 0.00543212890625, "learning_rate": 7.335129110161621e-06, "loss": 0.69, "step": 28150 }, { "epoch": 0.640830166351865, "grad_norm": 83.0, "learning_rate": 7.330484859743639e-06, "loss": 1.0247, "step": 28160 }, { "epoch": 0.6410577338825296, "grad_norm": 384.0, "learning_rate": 7.325840609325655e-06, "loss": 0.4132, "step": 28170 }, { "epoch": 0.6412853014131944, "grad_norm": 0.12158203125, "learning_rate": 7.321196358907673e-06, "loss": 0.6866, "step": 28180 }, { "epoch": 0.6415128689438591, "grad_norm": 55.5, "learning_rate": 7.316552108489691e-06, "loss": 0.7304, "step": 28190 }, { "epoch": 0.6417404364745238, "grad_norm": 5.59375, "learning_rate": 7.311907858071707e-06, "loss": 0.7675, "step": 28200 }, { "epoch": 0.6419680040051885, "grad_norm": 4.78125, "learning_rate": 7.307263607653725e-06, "loss": 0.6377, "step": 28210 }, { "epoch": 0.6421955715358533, "grad_norm": 6.40625, "learning_rate": 7.302619357235743e-06, "loss": 0.2851, "step": 28220 }, { "epoch": 0.6424231390665179, "grad_norm": 175.0, "learning_rate": 7.297975106817761e-06, "loss": 1.0224, "step": 28230 }, { "epoch": 0.6426507065971827, "grad_norm": 65.0, "learning_rate": 7.293330856399777e-06, "loss": 0.8281, "step": 28240 }, { "epoch": 0.6428782741278475, "grad_norm": 0.427734375, "learning_rate": 7.288686605981795e-06, "loss": 1.3506, "step": 28250 }, { "epoch": 0.6431058416585121, "grad_norm": 231.0, "learning_rate": 7.284042355563813e-06, "loss": 1.6154, "step": 28260 }, { "epoch": 0.6433334091891769, "grad_norm": 202.0, "learning_rate": 7.27939810514583e-06, "loss": 0.2823, "step": 28270 }, { "epoch": 0.6435609767198416, "grad_norm": 0.51953125, "learning_rate": 7.274753854727848e-06, "loss": 0.7322, "step": 28280 }, { "epoch": 0.6437885442505064, "grad_norm": 218.0, "learning_rate": 7.270109604309865e-06, "loss": 1.9364, "step": 28290 }, { "epoch": 0.644016111781171, "grad_norm": 103.5, "learning_rate": 7.265465353891883e-06, "loss": 1.0703, "step": 28300 }, { "epoch": 0.6442436793118358, "grad_norm": 108.5, "learning_rate": 7.2608211034739e-06, "loss": 1.0986, "step": 28310 }, { "epoch": 0.6444712468425006, "grad_norm": 0.26953125, "learning_rate": 7.256176853055918e-06, "loss": 0.8981, "step": 28320 }, { "epoch": 0.6446988143731652, "grad_norm": 58.0, "learning_rate": 7.2515326026379355e-06, "loss": 0.6079, "step": 28330 }, { "epoch": 0.64492638190383, "grad_norm": 112.0, "learning_rate": 7.246888352219952e-06, "loss": 0.5997, "step": 28340 }, { "epoch": 0.6451539494344947, "grad_norm": 196.0, "learning_rate": 7.24224410180197e-06, "loss": 0.9189, "step": 28350 }, { "epoch": 0.6453815169651594, "grad_norm": 0.0091552734375, "learning_rate": 7.2375998513839875e-06, "loss": 0.5351, "step": 28360 }, { "epoch": 0.6456090844958241, "grad_norm": 148.0, "learning_rate": 7.232955600966005e-06, "loss": 0.576, "step": 28370 }, { "epoch": 0.6458366520264889, "grad_norm": 102.5, "learning_rate": 7.228311350548022e-06, "loss": 1.0215, "step": 28380 }, { "epoch": 0.6460642195571535, "grad_norm": 42.0, "learning_rate": 7.2236671001300395e-06, "loss": 0.994, "step": 28390 }, { "epoch": 0.6462917870878183, "grad_norm": 95.5, "learning_rate": 7.2190228497120574e-06, "loss": 0.5555, "step": 28400 }, { "epoch": 0.6465193546184831, "grad_norm": 0.02001953125, "learning_rate": 7.214378599294074e-06, "loss": 0.2731, "step": 28410 }, { "epoch": 0.6467469221491478, "grad_norm": 3.15625, "learning_rate": 7.2097343488760915e-06, "loss": 0.8538, "step": 28420 }, { "epoch": 0.6469744896798125, "grad_norm": 137.0, "learning_rate": 7.2050900984581094e-06, "loss": 1.1068, "step": 28430 }, { "epoch": 0.6472020572104772, "grad_norm": 0.07568359375, "learning_rate": 7.200445848040127e-06, "loss": 0.4599, "step": 28440 }, { "epoch": 0.647429624741142, "grad_norm": 159.0, "learning_rate": 7.195801597622144e-06, "loss": 0.6411, "step": 28450 }, { "epoch": 0.6476571922718066, "grad_norm": 0.00023746490478515625, "learning_rate": 7.1911573472041614e-06, "loss": 0.5034, "step": 28460 }, { "epoch": 0.6478847598024714, "grad_norm": 7.875, "learning_rate": 7.186513096786179e-06, "loss": 0.8581, "step": 28470 }, { "epoch": 0.6481123273331361, "grad_norm": 0.10595703125, "learning_rate": 7.181868846368196e-06, "loss": 0.7156, "step": 28480 }, { "epoch": 0.6483398948638008, "grad_norm": 89.5, "learning_rate": 7.177224595950214e-06, "loss": 0.3234, "step": 28490 }, { "epoch": 0.6485674623944656, "grad_norm": 190.0, "learning_rate": 7.172580345532232e-06, "loss": 1.0109, "step": 28500 }, { "epoch": 0.6487950299251303, "grad_norm": 89.0, "learning_rate": 7.167936095114249e-06, "loss": 0.5118, "step": 28510 }, { "epoch": 0.649022597455795, "grad_norm": 1.40625, "learning_rate": 7.163291844696266e-06, "loss": 0.5127, "step": 28520 }, { "epoch": 0.6492501649864597, "grad_norm": 84.5, "learning_rate": 7.158647594278284e-06, "loss": 0.3938, "step": 28530 }, { "epoch": 0.6494777325171245, "grad_norm": 1.15625, "learning_rate": 7.154003343860302e-06, "loss": 0.8762, "step": 28540 }, { "epoch": 0.6497053000477891, "grad_norm": 112.5, "learning_rate": 7.149359093442318e-06, "loss": 0.7791, "step": 28550 }, { "epoch": 0.6499328675784539, "grad_norm": 0.0027313232421875, "learning_rate": 7.144714843024336e-06, "loss": 0.7079, "step": 28560 }, { "epoch": 0.6501604351091186, "grad_norm": 138.0, "learning_rate": 7.140070592606354e-06, "loss": 1.0715, "step": 28570 }, { "epoch": 0.6503880026397834, "grad_norm": 88.5, "learning_rate": 7.135426342188372e-06, "loss": 1.14, "step": 28580 }, { "epoch": 0.6506155701704481, "grad_norm": 0.002227783203125, "learning_rate": 7.130782091770388e-06, "loss": 1.1638, "step": 28590 }, { "epoch": 0.6508431377011128, "grad_norm": 193.0, "learning_rate": 7.126137841352406e-06, "loss": 0.5161, "step": 28600 }, { "epoch": 0.6510707052317776, "grad_norm": 22.625, "learning_rate": 7.121493590934424e-06, "loss": 0.858, "step": 28610 }, { "epoch": 0.6512982727624422, "grad_norm": 88.0, "learning_rate": 7.116849340516441e-06, "loss": 0.6675, "step": 28620 }, { "epoch": 0.651525840293107, "grad_norm": 69.5, "learning_rate": 7.112205090098458e-06, "loss": 0.7692, "step": 28630 }, { "epoch": 0.6517534078237717, "grad_norm": 140.0, "learning_rate": 7.107560839680476e-06, "loss": 0.6866, "step": 28640 }, { "epoch": 0.6519809753544364, "grad_norm": 207.0, "learning_rate": 7.102916589262494e-06, "loss": 0.6844, "step": 28650 }, { "epoch": 0.6522085428851011, "grad_norm": 10.6875, "learning_rate": 7.098272338844511e-06, "loss": 0.5126, "step": 28660 }, { "epoch": 0.6524361104157659, "grad_norm": 0.004150390625, "learning_rate": 7.093628088426529e-06, "loss": 0.4495, "step": 28670 }, { "epoch": 0.6526636779464307, "grad_norm": 91.0, "learning_rate": 7.088983838008546e-06, "loss": 1.1466, "step": 28680 }, { "epoch": 0.6528912454770953, "grad_norm": 109.5, "learning_rate": 7.084339587590563e-06, "loss": 0.8738, "step": 28690 }, { "epoch": 0.6531188130077601, "grad_norm": 28.375, "learning_rate": 7.079695337172581e-06, "loss": 0.5707, "step": 28700 }, { "epoch": 0.6533463805384248, "grad_norm": 536.0, "learning_rate": 7.075051086754599e-06, "loss": 1.0992, "step": 28710 }, { "epoch": 0.6535739480690895, "grad_norm": 8.375, "learning_rate": 7.070406836336616e-06, "loss": 0.4904, "step": 28720 }, { "epoch": 0.6538015155997542, "grad_norm": 274.0, "learning_rate": 7.065762585918633e-06, "loss": 0.9922, "step": 28730 }, { "epoch": 0.654029083130419, "grad_norm": 132.0, "learning_rate": 7.061118335500651e-06, "loss": 0.4915, "step": 28740 }, { "epoch": 0.6542566506610836, "grad_norm": 0.036865234375, "learning_rate": 7.056474085082669e-06, "loss": 0.1973, "step": 28750 }, { "epoch": 0.6544842181917484, "grad_norm": 76.0, "learning_rate": 7.051829834664685e-06, "loss": 0.4562, "step": 28760 }, { "epoch": 0.6547117857224132, "grad_norm": 69.0, "learning_rate": 7.047185584246703e-06, "loss": 0.337, "step": 28770 }, { "epoch": 0.6549393532530778, "grad_norm": 169.0, "learning_rate": 7.042541333828721e-06, "loss": 0.735, "step": 28780 }, { "epoch": 0.6551669207837426, "grad_norm": 0.6328125, "learning_rate": 7.037897083410739e-06, "loss": 1.1803, "step": 28790 }, { "epoch": 0.6553944883144073, "grad_norm": 212.0, "learning_rate": 7.033252832992755e-06, "loss": 0.4718, "step": 28800 }, { "epoch": 0.655622055845072, "grad_norm": 314.0, "learning_rate": 7.028608582574773e-06, "loss": 0.7963, "step": 28810 }, { "epoch": 0.6558496233757367, "grad_norm": 156.0, "learning_rate": 7.023964332156791e-06, "loss": 0.6631, "step": 28820 }, { "epoch": 0.6560771909064015, "grad_norm": 302.0, "learning_rate": 7.019320081738808e-06, "loss": 0.4793, "step": 28830 }, { "epoch": 0.6563047584370661, "grad_norm": 0.00677490234375, "learning_rate": 7.014675831320825e-06, "loss": 0.3534, "step": 28840 }, { "epoch": 0.6565323259677309, "grad_norm": 0.02001953125, "learning_rate": 7.010031580902843e-06, "loss": 1.1589, "step": 28850 }, { "epoch": 0.6567598934983957, "grad_norm": 79.0, "learning_rate": 7.005387330484861e-06, "loss": 1.3525, "step": 28860 }, { "epoch": 0.6569874610290604, "grad_norm": 0.00396728515625, "learning_rate": 7.000743080066878e-06, "loss": 0.8992, "step": 28870 }, { "epoch": 0.6572150285597251, "grad_norm": 87.0, "learning_rate": 6.9960988296488955e-06, "loss": 0.8453, "step": 28880 }, { "epoch": 0.6574425960903898, "grad_norm": 0.0189208984375, "learning_rate": 6.991454579230913e-06, "loss": 0.4191, "step": 28890 }, { "epoch": 0.6576701636210546, "grad_norm": 163.0, "learning_rate": 6.98681032881293e-06, "loss": 0.2026, "step": 28900 }, { "epoch": 0.6578977311517192, "grad_norm": 152.0, "learning_rate": 6.9821660783949476e-06, "loss": 0.6287, "step": 28910 }, { "epoch": 0.658125298682384, "grad_norm": 158.0, "learning_rate": 6.9775218279769655e-06, "loss": 1.5176, "step": 28920 }, { "epoch": 0.6583528662130487, "grad_norm": 15.4375, "learning_rate": 6.972877577558983e-06, "loss": 0.5684, "step": 28930 }, { "epoch": 0.6585804337437134, "grad_norm": 27.375, "learning_rate": 6.9682333271409996e-06, "loss": 0.7891, "step": 28940 }, { "epoch": 0.6588080012743782, "grad_norm": 239.0, "learning_rate": 6.9635890767230175e-06, "loss": 0.6726, "step": 28950 }, { "epoch": 0.6590355688050429, "grad_norm": 51.25, "learning_rate": 6.958944826305035e-06, "loss": 0.9748, "step": 28960 }, { "epoch": 0.6592631363357077, "grad_norm": 31.5, "learning_rate": 6.954300575887052e-06, "loss": 0.8454, "step": 28970 }, { "epoch": 0.6594907038663723, "grad_norm": 107.0, "learning_rate": 6.9496563254690695e-06, "loss": 0.8428, "step": 28980 }, { "epoch": 0.6597182713970371, "grad_norm": 2.875, "learning_rate": 6.945012075051087e-06, "loss": 0.8919, "step": 28990 }, { "epoch": 0.6599458389277018, "grad_norm": 207.0, "learning_rate": 6.940367824633105e-06, "loss": 0.6434, "step": 29000 }, { "epoch": 0.6601734064583665, "grad_norm": 253.0, "learning_rate": 6.9357235742151215e-06, "loss": 1.5361, "step": 29010 }, { "epoch": 0.6604009739890312, "grad_norm": 79.0, "learning_rate": 6.931079323797139e-06, "loss": 1.1939, "step": 29020 }, { "epoch": 0.660628541519696, "grad_norm": 89.5, "learning_rate": 6.926435073379157e-06, "loss": 1.1751, "step": 29030 }, { "epoch": 0.6608561090503607, "grad_norm": 175.0, "learning_rate": 6.921790822961174e-06, "loss": 1.3992, "step": 29040 }, { "epoch": 0.6610836765810254, "grad_norm": 1.578125, "learning_rate": 6.917146572543192e-06, "loss": 0.8139, "step": 29050 }, { "epoch": 0.6613112441116902, "grad_norm": 99.5, "learning_rate": 6.912502322125209e-06, "loss": 0.7943, "step": 29060 }, { "epoch": 0.6615388116423548, "grad_norm": 2.921875, "learning_rate": 6.907858071707227e-06, "loss": 0.5736, "step": 29070 }, { "epoch": 0.6617663791730196, "grad_norm": 0.005645751953125, "learning_rate": 6.903213821289244e-06, "loss": 0.1383, "step": 29080 }, { "epoch": 0.6619939467036843, "grad_norm": 67.5, "learning_rate": 6.898569570871262e-06, "loss": 0.6006, "step": 29090 }, { "epoch": 0.662221514234349, "grad_norm": 38.0, "learning_rate": 6.89392532045328e-06, "loss": 1.2577, "step": 29100 }, { "epoch": 0.6624490817650137, "grad_norm": 99.5, "learning_rate": 6.889281070035296e-06, "loss": 0.7868, "step": 29110 }, { "epoch": 0.6626766492956785, "grad_norm": 248.0, "learning_rate": 6.884636819617314e-06, "loss": 0.9668, "step": 29120 }, { "epoch": 0.6629042168263433, "grad_norm": 191.0, "learning_rate": 6.879992569199332e-06, "loss": 1.1018, "step": 29130 }, { "epoch": 0.6631317843570079, "grad_norm": 10.875, "learning_rate": 6.87534831878135e-06, "loss": 0.7908, "step": 29140 }, { "epoch": 0.6633593518876727, "grad_norm": 144.0, "learning_rate": 6.870704068363366e-06, "loss": 1.52, "step": 29150 }, { "epoch": 0.6635869194183374, "grad_norm": 192.0, "learning_rate": 6.866059817945384e-06, "loss": 1.2771, "step": 29160 }, { "epoch": 0.6638144869490021, "grad_norm": 94.5, "learning_rate": 6.861415567527402e-06, "loss": 0.5152, "step": 29170 }, { "epoch": 0.6640420544796668, "grad_norm": 0.003936767578125, "learning_rate": 6.856771317109418e-06, "loss": 0.6822, "step": 29180 }, { "epoch": 0.6642696220103316, "grad_norm": 21.625, "learning_rate": 6.852127066691436e-06, "loss": 0.6673, "step": 29190 }, { "epoch": 0.6644971895409962, "grad_norm": 158.0, "learning_rate": 6.847482816273454e-06, "loss": 1.1962, "step": 29200 }, { "epoch": 0.664724757071661, "grad_norm": 62.25, "learning_rate": 6.842838565855472e-06, "loss": 0.5138, "step": 29210 }, { "epoch": 0.6649523246023258, "grad_norm": 284.0, "learning_rate": 6.838194315437489e-06, "loss": 1.3191, "step": 29220 }, { "epoch": 0.6651798921329904, "grad_norm": 63.25, "learning_rate": 6.833550065019506e-06, "loss": 0.3344, "step": 29230 }, { "epoch": 0.6654074596636552, "grad_norm": 215.0, "learning_rate": 6.828905814601524e-06, "loss": 1.3105, "step": 29240 }, { "epoch": 0.6656350271943199, "grad_norm": 342.0, "learning_rate": 6.824261564183541e-06, "loss": 1.1083, "step": 29250 }, { "epoch": 0.6658625947249847, "grad_norm": 232.0, "learning_rate": 6.819617313765559e-06, "loss": 0.7288, "step": 29260 }, { "epoch": 0.6660901622556493, "grad_norm": 274.0, "learning_rate": 6.814973063347577e-06, "loss": 1.2467, "step": 29270 }, { "epoch": 0.6663177297863141, "grad_norm": 274.0, "learning_rate": 6.810328812929594e-06, "loss": 1.363, "step": 29280 }, { "epoch": 0.6665452973169789, "grad_norm": 0.6953125, "learning_rate": 6.805684562511611e-06, "loss": 0.7181, "step": 29290 }, { "epoch": 0.6667728648476435, "grad_norm": 253.0, "learning_rate": 6.801040312093629e-06, "loss": 0.8906, "step": 29300 }, { "epoch": 0.6670004323783083, "grad_norm": 124.5, "learning_rate": 6.796396061675647e-06, "loss": 0.5481, "step": 29310 }, { "epoch": 0.667227999908973, "grad_norm": 0.002685546875, "learning_rate": 6.791751811257663e-06, "loss": 0.6924, "step": 29320 }, { "epoch": 0.6674555674396377, "grad_norm": 164.0, "learning_rate": 6.787107560839681e-06, "loss": 0.7624, "step": 29330 }, { "epoch": 0.6676831349703024, "grad_norm": 105.5, "learning_rate": 6.782463310421699e-06, "loss": 0.8895, "step": 29340 }, { "epoch": 0.6679107025009672, "grad_norm": 0.00136566162109375, "learning_rate": 6.777819060003717e-06, "loss": 0.8149, "step": 29350 }, { "epoch": 0.6681382700316318, "grad_norm": 137.0, "learning_rate": 6.773174809585733e-06, "loss": 1.0462, "step": 29360 }, { "epoch": 0.6683658375622966, "grad_norm": 76.5, "learning_rate": 6.768530559167751e-06, "loss": 0.5308, "step": 29370 }, { "epoch": 0.6685934050929614, "grad_norm": 0.036865234375, "learning_rate": 6.763886308749769e-06, "loss": 0.4774, "step": 29380 }, { "epoch": 0.668820972623626, "grad_norm": 142.0, "learning_rate": 6.759242058331786e-06, "loss": 0.6906, "step": 29390 }, { "epoch": 0.6690485401542908, "grad_norm": 152.0, "learning_rate": 6.754597807913803e-06, "loss": 0.708, "step": 29400 }, { "epoch": 0.6692761076849555, "grad_norm": 0.005035400390625, "learning_rate": 6.749953557495821e-06, "loss": 1.7272, "step": 29410 }, { "epoch": 0.6695036752156203, "grad_norm": 50.25, "learning_rate": 6.7453093070778385e-06, "loss": 0.4084, "step": 29420 }, { "epoch": 0.6697312427462849, "grad_norm": 0.435546875, "learning_rate": 6.740665056659856e-06, "loss": 0.8288, "step": 29430 }, { "epoch": 0.6699588102769497, "grad_norm": 0.0013885498046875, "learning_rate": 6.736020806241873e-06, "loss": 0.9777, "step": 29440 }, { "epoch": 0.6701863778076144, "grad_norm": 1.3671875, "learning_rate": 6.7313765558238905e-06, "loss": 0.5848, "step": 29450 }, { "epoch": 0.6704139453382791, "grad_norm": 162.0, "learning_rate": 6.726732305405908e-06, "loss": 1.2781, "step": 29460 }, { "epoch": 0.6706415128689439, "grad_norm": 11.0625, "learning_rate": 6.7220880549879255e-06, "loss": 0.6443, "step": 29470 }, { "epoch": 0.6708690803996086, "grad_norm": 276.0, "learning_rate": 6.717443804569943e-06, "loss": 0.6079, "step": 29480 }, { "epoch": 0.6710966479302733, "grad_norm": 0.130859375, "learning_rate": 6.7127995541519605e-06, "loss": 1.5336, "step": 29490 }, { "epoch": 0.671324215460938, "grad_norm": 0.00982666015625, "learning_rate": 6.7081553037339775e-06, "loss": 0.739, "step": 29500 }, { "epoch": 0.6715517829916028, "grad_norm": 1.484375, "learning_rate": 6.703511053315995e-06, "loss": 0.1794, "step": 29510 }, { "epoch": 0.6717793505222674, "grad_norm": 0.0751953125, "learning_rate": 6.698866802898013e-06, "loss": 1.0048, "step": 29520 }, { "epoch": 0.6720069180529322, "grad_norm": 110.5, "learning_rate": 6.6942225524800295e-06, "loss": 0.5569, "step": 29530 }, { "epoch": 0.6722344855835969, "grad_norm": 110.0, "learning_rate": 6.6895783020620474e-06, "loss": 0.4082, "step": 29540 }, { "epoch": 0.6724620531142617, "grad_norm": 0.0517578125, "learning_rate": 6.684934051644065e-06, "loss": 1.1431, "step": 29550 }, { "epoch": 0.6726896206449264, "grad_norm": 61.25, "learning_rate": 6.680289801226083e-06, "loss": 0.5949, "step": 29560 }, { "epoch": 0.6729171881755911, "grad_norm": 23.5, "learning_rate": 6.6756455508080994e-06, "loss": 0.4307, "step": 29570 }, { "epoch": 0.6731447557062559, "grad_norm": 127.0, "learning_rate": 6.671001300390117e-06, "loss": 0.6261, "step": 29580 }, { "epoch": 0.6733723232369205, "grad_norm": 0.20703125, "learning_rate": 6.666357049972135e-06, "loss": 0.8326, "step": 29590 }, { "epoch": 0.6735998907675853, "grad_norm": 0.00213623046875, "learning_rate": 6.661712799554152e-06, "loss": 0.6776, "step": 29600 }, { "epoch": 0.67382745829825, "grad_norm": 0.53515625, "learning_rate": 6.657068549136169e-06, "loss": 0.1373, "step": 29610 }, { "epoch": 0.6740550258289147, "grad_norm": 230.0, "learning_rate": 6.652424298718187e-06, "loss": 0.6001, "step": 29620 }, { "epoch": 0.6742825933595794, "grad_norm": 243.0, "learning_rate": 6.647780048300205e-06, "loss": 0.5196, "step": 29630 }, { "epoch": 0.6745101608902442, "grad_norm": 0.0145263671875, "learning_rate": 6.643135797882222e-06, "loss": 0.293, "step": 29640 }, { "epoch": 0.674737728420909, "grad_norm": 334.0, "learning_rate": 6.63849154746424e-06, "loss": 1.9418, "step": 29650 }, { "epoch": 0.6749652959515736, "grad_norm": 66.0, "learning_rate": 6.633847297046257e-06, "loss": 1.1006, "step": 29660 }, { "epoch": 0.6751928634822384, "grad_norm": 59.75, "learning_rate": 6.629203046628274e-06, "loss": 0.2896, "step": 29670 }, { "epoch": 0.675420431012903, "grad_norm": 0.06591796875, "learning_rate": 6.624558796210292e-06, "loss": 0.6353, "step": 29680 }, { "epoch": 0.6756479985435678, "grad_norm": 73.5, "learning_rate": 6.61991454579231e-06, "loss": 0.6565, "step": 29690 }, { "epoch": 0.6758755660742325, "grad_norm": 260.0, "learning_rate": 6.615270295374328e-06, "loss": 1.1819, "step": 29700 }, { "epoch": 0.6761031336048973, "grad_norm": 74.5, "learning_rate": 6.610626044956344e-06, "loss": 1.4757, "step": 29710 }, { "epoch": 0.6763307011355619, "grad_norm": 0.29296875, "learning_rate": 6.605981794538362e-06, "loss": 0.4499, "step": 29720 }, { "epoch": 0.6765582686662267, "grad_norm": 138.0, "learning_rate": 6.60133754412038e-06, "loss": 0.5811, "step": 29730 }, { "epoch": 0.6767858361968915, "grad_norm": 410.0, "learning_rate": 6.596693293702398e-06, "loss": 1.5365, "step": 29740 }, { "epoch": 0.6770134037275561, "grad_norm": 113.5, "learning_rate": 6.592049043284414e-06, "loss": 0.5053, "step": 29750 }, { "epoch": 0.6772409712582209, "grad_norm": 306.0, "learning_rate": 6.587404792866432e-06, "loss": 0.7087, "step": 29760 }, { "epoch": 0.6774685387888856, "grad_norm": 94.0, "learning_rate": 6.58276054244845e-06, "loss": 0.659, "step": 29770 }, { "epoch": 0.6776961063195504, "grad_norm": 220.0, "learning_rate": 6.578116292030466e-06, "loss": 1.0416, "step": 29780 }, { "epoch": 0.677923673850215, "grad_norm": 264.0, "learning_rate": 6.573472041612484e-06, "loss": 0.4819, "step": 29790 }, { "epoch": 0.6781512413808798, "grad_norm": 165.0, "learning_rate": 6.568827791194502e-06, "loss": 0.8764, "step": 29800 }, { "epoch": 0.6783788089115445, "grad_norm": 458.0, "learning_rate": 6.56418354077652e-06, "loss": 0.9902, "step": 29810 }, { "epoch": 0.6786063764422092, "grad_norm": 38.25, "learning_rate": 6.559539290358537e-06, "loss": 1.1581, "step": 29820 }, { "epoch": 0.678833943972874, "grad_norm": 370.0, "learning_rate": 6.554895039940554e-06, "loss": 0.8237, "step": 29830 }, { "epoch": 0.6790615115035387, "grad_norm": 163.0, "learning_rate": 6.550250789522572e-06, "loss": 0.188, "step": 29840 }, { "epoch": 0.6792890790342034, "grad_norm": 69.5, "learning_rate": 6.545606539104589e-06, "loss": 0.6225, "step": 29850 }, { "epoch": 0.6795166465648681, "grad_norm": 1.015625, "learning_rate": 6.540962288686607e-06, "loss": 0.3744, "step": 29860 }, { "epoch": 0.6797442140955329, "grad_norm": 110.5, "learning_rate": 6.536318038268624e-06, "loss": 1.0366, "step": 29870 }, { "epoch": 0.6799717816261975, "grad_norm": 132.0, "learning_rate": 6.531673787850642e-06, "loss": 0.8929, "step": 29880 }, { "epoch": 0.6801993491568623, "grad_norm": 3.25, "learning_rate": 6.527029537432659e-06, "loss": 0.6263, "step": 29890 }, { "epoch": 0.680426916687527, "grad_norm": 145.0, "learning_rate": 6.522385287014677e-06, "loss": 0.8416, "step": 29900 }, { "epoch": 0.6806544842181917, "grad_norm": 167.0, "learning_rate": 6.5177410365966946e-06, "loss": 1.3066, "step": 29910 }, { "epoch": 0.6808820517488565, "grad_norm": 42.25, "learning_rate": 6.513096786178711e-06, "loss": 0.8588, "step": 29920 }, { "epoch": 0.6811096192795212, "grad_norm": 3.625, "learning_rate": 6.508452535760729e-06, "loss": 0.5016, "step": 29930 }, { "epoch": 0.681337186810186, "grad_norm": 0.00164794921875, "learning_rate": 6.5038082853427466e-06, "loss": 0.6076, "step": 29940 }, { "epoch": 0.6815647543408506, "grad_norm": 314.0, "learning_rate": 6.4991640349247645e-06, "loss": 0.8688, "step": 29950 }, { "epoch": 0.6817923218715154, "grad_norm": 150.0, "learning_rate": 6.494519784506781e-06, "loss": 0.6828, "step": 29960 }, { "epoch": 0.6820198894021801, "grad_norm": 0.10791015625, "learning_rate": 6.4898755340887986e-06, "loss": 1.1902, "step": 29970 }, { "epoch": 0.6822474569328448, "grad_norm": 135.0, "learning_rate": 6.4852312836708165e-06, "loss": 0.6441, "step": 29980 }, { "epoch": 0.6824750244635095, "grad_norm": 0.5390625, "learning_rate": 6.4805870332528335e-06, "loss": 0.6523, "step": 29990 }, { "epoch": 0.6827025919941743, "grad_norm": 0.00860595703125, "learning_rate": 6.475942782834851e-06, "loss": 0.3291, "step": 30000 }, { "epoch": 0.682930159524839, "grad_norm": 33.25, "learning_rate": 6.4712985324168685e-06, "loss": 0.4691, "step": 30010 }, { "epoch": 0.6831577270555037, "grad_norm": 294.0, "learning_rate": 6.466654281998886e-06, "loss": 1.0303, "step": 30020 }, { "epoch": 0.6833852945861685, "grad_norm": 0.001007080078125, "learning_rate": 6.4620100315809034e-06, "loss": 0.0157, "step": 30030 }, { "epoch": 0.6836128621168331, "grad_norm": 132.0, "learning_rate": 6.4573657811629205e-06, "loss": 0.5983, "step": 30040 }, { "epoch": 0.6838404296474979, "grad_norm": 0.00118255615234375, "learning_rate": 6.452721530744938e-06, "loss": 0.6903, "step": 30050 }, { "epoch": 0.6840679971781626, "grad_norm": 89.0, "learning_rate": 6.4480772803269555e-06, "loss": 0.6733, "step": 30060 }, { "epoch": 0.6842955647088274, "grad_norm": 0.015869140625, "learning_rate": 6.443433029908973e-06, "loss": 0.48, "step": 30070 }, { "epoch": 0.684523132239492, "grad_norm": 8.0625, "learning_rate": 6.438788779490991e-06, "loss": 1.8604, "step": 30080 }, { "epoch": 0.6847506997701568, "grad_norm": 0.0034637451171875, "learning_rate": 6.434144529073008e-06, "loss": 0.2588, "step": 30090 }, { "epoch": 0.6849782673008216, "grad_norm": 0.00665283203125, "learning_rate": 6.429500278655025e-06, "loss": 0.9941, "step": 30100 }, { "epoch": 0.6852058348314862, "grad_norm": 304.0, "learning_rate": 6.424856028237043e-06, "loss": 0.9489, "step": 30110 }, { "epoch": 0.685433402362151, "grad_norm": 98.0, "learning_rate": 6.420211777819061e-06, "loss": 1.0994, "step": 30120 }, { "epoch": 0.6856609698928157, "grad_norm": 328.0, "learning_rate": 6.415567527401077e-06, "loss": 1.1752, "step": 30130 }, { "epoch": 0.6858885374234804, "grad_norm": 80.5, "learning_rate": 6.410923276983095e-06, "loss": 0.5964, "step": 30140 }, { "epoch": 0.6861161049541451, "grad_norm": 219.0, "learning_rate": 6.406279026565113e-06, "loss": 1.1011, "step": 30150 }, { "epoch": 0.6863436724848099, "grad_norm": 107.0, "learning_rate": 6.401634776147131e-06, "loss": 0.3195, "step": 30160 }, { "epoch": 0.6865712400154745, "grad_norm": 138.0, "learning_rate": 6.396990525729147e-06, "loss": 0.55, "step": 30170 }, { "epoch": 0.6867988075461393, "grad_norm": 0.625, "learning_rate": 6.392346275311165e-06, "loss": 0.2786, "step": 30180 }, { "epoch": 0.6870263750768041, "grad_norm": 34.25, "learning_rate": 6.387702024893183e-06, "loss": 0.835, "step": 30190 }, { "epoch": 0.6872539426074687, "grad_norm": 0.0011444091796875, "learning_rate": 6.3830577744752e-06, "loss": 0.9243, "step": 30200 }, { "epoch": 0.6874815101381335, "grad_norm": 0.006011962890625, "learning_rate": 6.378413524057217e-06, "loss": 1.5308, "step": 30210 }, { "epoch": 0.6877090776687982, "grad_norm": 0.2099609375, "learning_rate": 6.373769273639235e-06, "loss": 0.4434, "step": 30220 }, { "epoch": 0.687936645199463, "grad_norm": 0.51171875, "learning_rate": 6.369125023221253e-06, "loss": 0.951, "step": 30230 }, { "epoch": 0.6881642127301276, "grad_norm": 0.00104522705078125, "learning_rate": 6.36448077280327e-06, "loss": 1.0154, "step": 30240 }, { "epoch": 0.6883917802607924, "grad_norm": 324.0, "learning_rate": 6.359836522385288e-06, "loss": 0.5114, "step": 30250 }, { "epoch": 0.6886193477914572, "grad_norm": 0.431640625, "learning_rate": 6.355192271967305e-06, "loss": 0.5994, "step": 30260 }, { "epoch": 0.6888469153221218, "grad_norm": 178.0, "learning_rate": 6.350548021549322e-06, "loss": 0.9954, "step": 30270 }, { "epoch": 0.6890744828527866, "grad_norm": 55.25, "learning_rate": 6.34590377113134e-06, "loss": 0.2583, "step": 30280 }, { "epoch": 0.6893020503834513, "grad_norm": 0.003448486328125, "learning_rate": 6.341259520713358e-06, "loss": 1.5482, "step": 30290 }, { "epoch": 0.689529617914116, "grad_norm": 58.5, "learning_rate": 6.336615270295376e-06, "loss": 0.2008, "step": 30300 }, { "epoch": 0.6897571854447807, "grad_norm": 67.0, "learning_rate": 6.331971019877392e-06, "loss": 0.5176, "step": 30310 }, { "epoch": 0.6899847529754455, "grad_norm": 150.0, "learning_rate": 6.32732676945941e-06, "loss": 1.0199, "step": 30320 }, { "epoch": 0.6902123205061101, "grad_norm": 182.0, "learning_rate": 6.322682519041428e-06, "loss": 1.6286, "step": 30330 }, { "epoch": 0.6904398880367749, "grad_norm": 181.0, "learning_rate": 6.318038268623444e-06, "loss": 0.6565, "step": 30340 }, { "epoch": 0.6906674555674397, "grad_norm": 0.003082275390625, "learning_rate": 6.313394018205462e-06, "loss": 1.4196, "step": 30350 }, { "epoch": 0.6908950230981044, "grad_norm": 0.046630859375, "learning_rate": 6.30874976778748e-06, "loss": 1.3721, "step": 30360 }, { "epoch": 0.6911225906287691, "grad_norm": 110.5, "learning_rate": 6.304105517369498e-06, "loss": 0.9599, "step": 30370 }, { "epoch": 0.6913501581594338, "grad_norm": 0.005584716796875, "learning_rate": 6.299461266951514e-06, "loss": 0.5141, "step": 30380 }, { "epoch": 0.6915777256900986, "grad_norm": 472.0, "learning_rate": 6.294817016533532e-06, "loss": 0.603, "step": 30390 }, { "epoch": 0.6918052932207632, "grad_norm": 48.0, "learning_rate": 6.29017276611555e-06, "loss": 0.706, "step": 30400 }, { "epoch": 0.692032860751428, "grad_norm": 0.00176239013671875, "learning_rate": 6.285528515697567e-06, "loss": 0.2772, "step": 30410 }, { "epoch": 0.6922604282820927, "grad_norm": 138.0, "learning_rate": 6.280884265279585e-06, "loss": 1.1302, "step": 30420 }, { "epoch": 0.6924879958127574, "grad_norm": 398.0, "learning_rate": 6.276240014861602e-06, "loss": 0.9018, "step": 30430 }, { "epoch": 0.6927155633434222, "grad_norm": 201.0, "learning_rate": 6.27159576444362e-06, "loss": 1.1761, "step": 30440 }, { "epoch": 0.6929431308740869, "grad_norm": 66.0, "learning_rate": 6.266951514025637e-06, "loss": 1.1284, "step": 30450 }, { "epoch": 0.6931706984047517, "grad_norm": 0.00018787384033203125, "learning_rate": 6.262307263607655e-06, "loss": 0.2196, "step": 30460 }, { "epoch": 0.6933982659354163, "grad_norm": 0.000396728515625, "learning_rate": 6.257663013189672e-06, "loss": 0.837, "step": 30470 }, { "epoch": 0.6936258334660811, "grad_norm": 0.012939453125, "learning_rate": 6.253018762771689e-06, "loss": 1.163, "step": 30480 }, { "epoch": 0.6938534009967458, "grad_norm": 227.0, "learning_rate": 6.248374512353707e-06, "loss": 1.0812, "step": 30490 }, { "epoch": 0.6940809685274105, "grad_norm": 0.0026092529296875, "learning_rate": 6.2437302619357245e-06, "loss": 0.4629, "step": 30500 }, { "epoch": 0.6943085360580752, "grad_norm": 0.0004291534423828125, "learning_rate": 6.239086011517742e-06, "loss": 1.4385, "step": 30510 }, { "epoch": 0.69453610358874, "grad_norm": 161.0, "learning_rate": 6.234441761099759e-06, "loss": 0.5595, "step": 30520 }, { "epoch": 0.6947636711194047, "grad_norm": 181.0, "learning_rate": 6.2297975106817765e-06, "loss": 0.2773, "step": 30530 }, { "epoch": 0.6949912386500694, "grad_norm": 52.0, "learning_rate": 6.225153260263794e-06, "loss": 1.1629, "step": 30540 }, { "epoch": 0.6952188061807342, "grad_norm": 0.921875, "learning_rate": 6.220509009845811e-06, "loss": 0.9512, "step": 30550 }, { "epoch": 0.6954463737113988, "grad_norm": 68.5, "learning_rate": 6.2158647594278285e-06, "loss": 0.8913, "step": 30560 }, { "epoch": 0.6956739412420636, "grad_norm": 103.0, "learning_rate": 6.2112205090098464e-06, "loss": 0.5669, "step": 30570 }, { "epoch": 0.6959015087727283, "grad_norm": 0.1552734375, "learning_rate": 6.206576258591864e-06, "loss": 1.1229, "step": 30580 }, { "epoch": 0.696129076303393, "grad_norm": 34.5, "learning_rate": 6.201932008173881e-06, "loss": 0.775, "step": 30590 }, { "epoch": 0.6963566438340577, "grad_norm": 164.0, "learning_rate": 6.1972877577558984e-06, "loss": 0.8508, "step": 30600 }, { "epoch": 0.6965842113647225, "grad_norm": 39.5, "learning_rate": 6.192643507337916e-06, "loss": 0.4268, "step": 30610 }, { "epoch": 0.6968117788953873, "grad_norm": 232.0, "learning_rate": 6.187999256919933e-06, "loss": 1.3293, "step": 30620 }, { "epoch": 0.6970393464260519, "grad_norm": 220.0, "learning_rate": 6.183355006501951e-06, "loss": 0.8973, "step": 30630 }, { "epoch": 0.6972669139567167, "grad_norm": 0.01416015625, "learning_rate": 6.178710756083968e-06, "loss": 0.1674, "step": 30640 }, { "epoch": 0.6974944814873814, "grad_norm": 149.0, "learning_rate": 6.174066505665986e-06, "loss": 0.3363, "step": 30650 }, { "epoch": 0.6977220490180461, "grad_norm": 140.0, "learning_rate": 6.169422255248003e-06, "loss": 1.1134, "step": 30660 }, { "epoch": 0.6979496165487108, "grad_norm": 0.0009765625, "learning_rate": 6.164778004830021e-06, "loss": 0.8335, "step": 30670 }, { "epoch": 0.6981771840793756, "grad_norm": 290.0, "learning_rate": 6.160133754412039e-06, "loss": 0.6245, "step": 30680 }, { "epoch": 0.6984047516100402, "grad_norm": 4.625, "learning_rate": 6.155489503994055e-06, "loss": 1.3146, "step": 30690 }, { "epoch": 0.698632319140705, "grad_norm": 16.625, "learning_rate": 6.150845253576073e-06, "loss": 0.3938, "step": 30700 }, { "epoch": 0.6988598866713698, "grad_norm": 0.00372314453125, "learning_rate": 6.146201003158091e-06, "loss": 1.1865, "step": 30710 }, { "epoch": 0.6990874542020344, "grad_norm": 0.51953125, "learning_rate": 6.141556752740109e-06, "loss": 0.7717, "step": 30720 }, { "epoch": 0.6993150217326992, "grad_norm": 73.0, "learning_rate": 6.136912502322125e-06, "loss": 0.9626, "step": 30730 }, { "epoch": 0.6995425892633639, "grad_norm": 16.5, "learning_rate": 6.132268251904143e-06, "loss": 1.1349, "step": 30740 }, { "epoch": 0.6997701567940287, "grad_norm": 338.0, "learning_rate": 6.127624001486161e-06, "loss": 1.0597, "step": 30750 }, { "epoch": 0.6999977243246933, "grad_norm": 16.375, "learning_rate": 6.122979751068177e-06, "loss": 0.5158, "step": 30760 }, { "epoch": 0.7002252918553581, "grad_norm": 0.115234375, "learning_rate": 6.118335500650195e-06, "loss": 0.7253, "step": 30770 }, { "epoch": 0.7004528593860228, "grad_norm": 290.0, "learning_rate": 6.113691250232213e-06, "loss": 0.7678, "step": 30780 }, { "epoch": 0.7006804269166875, "grad_norm": 146.0, "learning_rate": 6.109046999814231e-06, "loss": 0.3002, "step": 30790 }, { "epoch": 0.7009079944473523, "grad_norm": 2.34375, "learning_rate": 6.104402749396248e-06, "loss": 1.1036, "step": 30800 }, { "epoch": 0.701135561978017, "grad_norm": 148.0, "learning_rate": 6.099758498978265e-06, "loss": 0.801, "step": 30810 }, { "epoch": 0.7013631295086817, "grad_norm": 0.478515625, "learning_rate": 6.095114248560283e-06, "loss": 0.4756, "step": 30820 }, { "epoch": 0.7015906970393464, "grad_norm": 31.75, "learning_rate": 6.0904699981423e-06, "loss": 1.7041, "step": 30830 }, { "epoch": 0.7018182645700112, "grad_norm": 0.0003337860107421875, "learning_rate": 6.085825747724318e-06, "loss": 0.7673, "step": 30840 }, { "epoch": 0.7020458321006758, "grad_norm": 0.0014495849609375, "learning_rate": 6.081181497306336e-06, "loss": 1.1488, "step": 30850 }, { "epoch": 0.7022733996313406, "grad_norm": 0.002838134765625, "learning_rate": 6.076537246888353e-06, "loss": 0.2106, "step": 30860 }, { "epoch": 0.7025009671620053, "grad_norm": 245.0, "learning_rate": 6.07189299647037e-06, "loss": 0.8626, "step": 30870 }, { "epoch": 0.70272853469267, "grad_norm": 96.0, "learning_rate": 6.067248746052388e-06, "loss": 0.9122, "step": 30880 }, { "epoch": 0.7029561022233348, "grad_norm": 0.00213623046875, "learning_rate": 6.062604495634406e-06, "loss": 0.7019, "step": 30890 }, { "epoch": 0.7031836697539995, "grad_norm": 81.5, "learning_rate": 6.057960245216422e-06, "loss": 0.4238, "step": 30900 }, { "epoch": 0.7034112372846643, "grad_norm": 0.0184326171875, "learning_rate": 6.05331599479844e-06, "loss": 0.3054, "step": 30910 }, { "epoch": 0.7036388048153289, "grad_norm": 88.5, "learning_rate": 6.048671744380458e-06, "loss": 1.6876, "step": 30920 }, { "epoch": 0.7038663723459937, "grad_norm": 38.75, "learning_rate": 6.044027493962476e-06, "loss": 0.3755, "step": 30930 }, { "epoch": 0.7040939398766584, "grad_norm": 72.5, "learning_rate": 6.039383243544492e-06, "loss": 1.0602, "step": 30940 }, { "epoch": 0.7043215074073231, "grad_norm": 138.0, "learning_rate": 6.03473899312651e-06, "loss": 1.0472, "step": 30950 }, { "epoch": 0.7045490749379878, "grad_norm": 0.02099609375, "learning_rate": 6.030094742708528e-06, "loss": 0.3505, "step": 30960 }, { "epoch": 0.7047766424686526, "grad_norm": 121.5, "learning_rate": 6.025450492290545e-06, "loss": 0.9165, "step": 30970 }, { "epoch": 0.7050042099993173, "grad_norm": 0.00130462646484375, "learning_rate": 6.020806241872562e-06, "loss": 0.6073, "step": 30980 }, { "epoch": 0.705231777529982, "grad_norm": 2.03125, "learning_rate": 6.01616199145458e-06, "loss": 1.0795, "step": 30990 }, { "epoch": 0.7054593450606468, "grad_norm": 139.0, "learning_rate": 6.011517741036598e-06, "loss": 1.1143, "step": 31000 }, { "epoch": 0.7056869125913114, "grad_norm": 0.36328125, "learning_rate": 6.006873490618615e-06, "loss": 0.0494, "step": 31010 }, { "epoch": 0.7059144801219762, "grad_norm": 21.25, "learning_rate": 6.0022292402006325e-06, "loss": 1.1029, "step": 31020 }, { "epoch": 0.7061420476526409, "grad_norm": 680.0, "learning_rate": 5.99758498978265e-06, "loss": 0.5868, "step": 31030 }, { "epoch": 0.7063696151833057, "grad_norm": 109.5, "learning_rate": 5.992940739364667e-06, "loss": 1.2648, "step": 31040 }, { "epoch": 0.7065971827139703, "grad_norm": 115.5, "learning_rate": 5.9882964889466846e-06, "loss": 0.6155, "step": 31050 }, { "epoch": 0.7068247502446351, "grad_norm": 73.5, "learning_rate": 5.9836522385287025e-06, "loss": 0.7303, "step": 31060 }, { "epoch": 0.7070523177752999, "grad_norm": 163.0, "learning_rate": 5.9790079881107195e-06, "loss": 0.5064, "step": 31070 }, { "epoch": 0.7072798853059645, "grad_norm": 159.0, "learning_rate": 5.9743637376927366e-06, "loss": 1.2467, "step": 31080 }, { "epoch": 0.7075074528366293, "grad_norm": 228.0, "learning_rate": 5.9697194872747545e-06, "loss": 1.5694, "step": 31090 }, { "epoch": 0.707735020367294, "grad_norm": 4.71875, "learning_rate": 5.965075236856772e-06, "loss": 0.4212, "step": 31100 }, { "epoch": 0.7079625878979587, "grad_norm": 174.0, "learning_rate": 5.960430986438789e-06, "loss": 0.4361, "step": 31110 }, { "epoch": 0.7081901554286234, "grad_norm": 18.0, "learning_rate": 5.9557867360208065e-06, "loss": 1.3537, "step": 31120 }, { "epoch": 0.7084177229592882, "grad_norm": 185.0, "learning_rate": 5.951142485602824e-06, "loss": 0.4701, "step": 31130 }, { "epoch": 0.7086452904899528, "grad_norm": 59.25, "learning_rate": 5.946498235184842e-06, "loss": 0.8827, "step": 31140 }, { "epoch": 0.7088728580206176, "grad_norm": 1.0859375, "learning_rate": 5.9418539847668585e-06, "loss": 0.7444, "step": 31150 }, { "epoch": 0.7091004255512824, "grad_norm": 98.5, "learning_rate": 5.937209734348876e-06, "loss": 0.9574, "step": 31160 }, { "epoch": 0.709327993081947, "grad_norm": 76.0, "learning_rate": 5.932565483930894e-06, "loss": 0.5572, "step": 31170 }, { "epoch": 0.7095555606126118, "grad_norm": 47.25, "learning_rate": 5.927921233512911e-06, "loss": 0.5339, "step": 31180 }, { "epoch": 0.7097831281432765, "grad_norm": 60.5, "learning_rate": 5.923276983094928e-06, "loss": 0.6078, "step": 31190 }, { "epoch": 0.7100106956739413, "grad_norm": 217.0, "learning_rate": 5.918632732676946e-06, "loss": 0.435, "step": 31200 }, { "epoch": 0.7102382632046059, "grad_norm": 24.375, "learning_rate": 5.913988482258964e-06, "loss": 0.721, "step": 31210 }, { "epoch": 0.7104658307352707, "grad_norm": 78.0, "learning_rate": 5.909344231840981e-06, "loss": 0.2426, "step": 31220 }, { "epoch": 0.7106933982659354, "grad_norm": 146.0, "learning_rate": 5.904699981422999e-06, "loss": 0.5741, "step": 31230 }, { "epoch": 0.7109209657966001, "grad_norm": 0.09814453125, "learning_rate": 5.900055731005016e-06, "loss": 0.6006, "step": 31240 }, { "epoch": 0.7111485333272649, "grad_norm": 3.296875, "learning_rate": 5.895411480587033e-06, "loss": 0.7801, "step": 31250 }, { "epoch": 0.7113761008579296, "grad_norm": 100.5, "learning_rate": 5.890767230169051e-06, "loss": 0.7449, "step": 31260 }, { "epoch": 0.7116036683885943, "grad_norm": 161.0, "learning_rate": 5.886122979751069e-06, "loss": 0.9067, "step": 31270 }, { "epoch": 0.711831235919259, "grad_norm": 77.0, "learning_rate": 5.881478729333087e-06, "loss": 1.1141, "step": 31280 }, { "epoch": 0.7120588034499238, "grad_norm": 8.25, "learning_rate": 5.876834478915103e-06, "loss": 0.5541, "step": 31290 }, { "epoch": 0.7122863709805884, "grad_norm": 81.5, "learning_rate": 5.872190228497121e-06, "loss": 0.3083, "step": 31300 }, { "epoch": 0.7125139385112532, "grad_norm": 0.050048828125, "learning_rate": 5.867545978079139e-06, "loss": 0.4452, "step": 31310 }, { "epoch": 0.712741506041918, "grad_norm": 171.0, "learning_rate": 5.862901727661155e-06, "loss": 0.6841, "step": 31320 }, { "epoch": 0.7129690735725827, "grad_norm": 0.0036773681640625, "learning_rate": 5.858257477243173e-06, "loss": 1.1581, "step": 31330 }, { "epoch": 0.7131966411032474, "grad_norm": 43.5, "learning_rate": 5.853613226825191e-06, "loss": 1.094, "step": 31340 }, { "epoch": 0.7134242086339121, "grad_norm": 0.71875, "learning_rate": 5.848968976407209e-06, "loss": 0.7785, "step": 31350 }, { "epoch": 0.7136517761645769, "grad_norm": 113.0, "learning_rate": 5.844324725989225e-06, "loss": 1.1154, "step": 31360 }, { "epoch": 0.7138793436952415, "grad_norm": 0.0986328125, "learning_rate": 5.839680475571243e-06, "loss": 0.8071, "step": 31370 }, { "epoch": 0.7141069112259063, "grad_norm": 374.0, "learning_rate": 5.835036225153261e-06, "loss": 1.7172, "step": 31380 }, { "epoch": 0.714334478756571, "grad_norm": 41.0, "learning_rate": 5.830391974735278e-06, "loss": 0.2336, "step": 31390 }, { "epoch": 0.7145620462872357, "grad_norm": 294.0, "learning_rate": 5.825747724317296e-06, "loss": 2.3095, "step": 31400 }, { "epoch": 0.7147896138179005, "grad_norm": 145.0, "learning_rate": 5.821103473899313e-06, "loss": 0.9765, "step": 31410 }, { "epoch": 0.7150171813485652, "grad_norm": 72.5, "learning_rate": 5.816459223481331e-06, "loss": 0.4695, "step": 31420 }, { "epoch": 0.71524474887923, "grad_norm": 0.0025177001953125, "learning_rate": 5.811814973063348e-06, "loss": 0.3084, "step": 31430 }, { "epoch": 0.7154723164098946, "grad_norm": 0.1396484375, "learning_rate": 5.807170722645366e-06, "loss": 0.7108, "step": 31440 }, { "epoch": 0.7156998839405594, "grad_norm": 161.0, "learning_rate": 5.802526472227384e-06, "loss": 0.707, "step": 31450 }, { "epoch": 0.715927451471224, "grad_norm": 228.0, "learning_rate": 5.7978822218094e-06, "loss": 0.9455, "step": 31460 }, { "epoch": 0.7161550190018888, "grad_norm": 0.01336669921875, "learning_rate": 5.793237971391418e-06, "loss": 0.4935, "step": 31470 }, { "epoch": 0.7163825865325535, "grad_norm": 145.0, "learning_rate": 5.788593720973436e-06, "loss": 1.5777, "step": 31480 }, { "epoch": 0.7166101540632183, "grad_norm": 9.9375, "learning_rate": 5.783949470555454e-06, "loss": 0.4491, "step": 31490 }, { "epoch": 0.716837721593883, "grad_norm": 116.5, "learning_rate": 5.77930522013747e-06, "loss": 1.0652, "step": 31500 }, { "epoch": 0.7170652891245477, "grad_norm": 44.25, "learning_rate": 5.774660969719488e-06, "loss": 0.9927, "step": 31510 }, { "epoch": 0.7172928566552125, "grad_norm": 0.0025482177734375, "learning_rate": 5.770016719301506e-06, "loss": 0.1149, "step": 31520 }, { "epoch": 0.7175204241858771, "grad_norm": 173.0, "learning_rate": 5.765372468883522e-06, "loss": 1.3964, "step": 31530 }, { "epoch": 0.7177479917165419, "grad_norm": 0.034912109375, "learning_rate": 5.76072821846554e-06, "loss": 0.9465, "step": 31540 }, { "epoch": 0.7179755592472066, "grad_norm": 115.5, "learning_rate": 5.756083968047558e-06, "loss": 0.6977, "step": 31550 }, { "epoch": 0.7182031267778713, "grad_norm": 0.376953125, "learning_rate": 5.7514397176295755e-06, "loss": 0.3787, "step": 31560 }, { "epoch": 0.718430694308536, "grad_norm": 37.0, "learning_rate": 5.746795467211593e-06, "loss": 0.9214, "step": 31570 }, { "epoch": 0.7186582618392008, "grad_norm": 87.0, "learning_rate": 5.74215121679361e-06, "loss": 0.5925, "step": 31580 }, { "epoch": 0.7188858293698656, "grad_norm": 75.5, "learning_rate": 5.7375069663756275e-06, "loss": 0.4311, "step": 31590 }, { "epoch": 0.7191133969005302, "grad_norm": 131.0, "learning_rate": 5.732862715957645e-06, "loss": 1.9153, "step": 31600 }, { "epoch": 0.719340964431195, "grad_norm": 0.007659912109375, "learning_rate": 5.7282184655396625e-06, "loss": 0.8574, "step": 31610 }, { "epoch": 0.7195685319618597, "grad_norm": 242.0, "learning_rate": 5.72357421512168e-06, "loss": 1.5322, "step": 31620 }, { "epoch": 0.7197960994925244, "grad_norm": 340.0, "learning_rate": 5.7189299647036975e-06, "loss": 1.0505, "step": 31630 }, { "epoch": 0.7200236670231891, "grad_norm": 103.5, "learning_rate": 5.7142857142857145e-06, "loss": 0.8903, "step": 31640 }, { "epoch": 0.7202512345538539, "grad_norm": 0.00347900390625, "learning_rate": 5.709641463867732e-06, "loss": 1.0776, "step": 31650 }, { "epoch": 0.7204788020845185, "grad_norm": 0.00213623046875, "learning_rate": 5.70499721344975e-06, "loss": 0.3684, "step": 31660 }, { "epoch": 0.7207063696151833, "grad_norm": 52.75, "learning_rate": 5.7003529630317665e-06, "loss": 0.3727, "step": 31670 }, { "epoch": 0.7209339371458481, "grad_norm": 39.5, "learning_rate": 5.6957087126137844e-06, "loss": 1.0637, "step": 31680 }, { "epoch": 0.7211615046765127, "grad_norm": 0.146484375, "learning_rate": 5.691064462195802e-06, "loss": 1.0417, "step": 31690 }, { "epoch": 0.7213890722071775, "grad_norm": 76.0, "learning_rate": 5.68642021177782e-06, "loss": 0.9001, "step": 31700 }, { "epoch": 0.7216166397378422, "grad_norm": 164.0, "learning_rate": 5.6817759613598364e-06, "loss": 0.4436, "step": 31710 }, { "epoch": 0.721844207268507, "grad_norm": 0.2470703125, "learning_rate": 5.677131710941854e-06, "loss": 1.2977, "step": 31720 }, { "epoch": 0.7220717747991716, "grad_norm": 253.0, "learning_rate": 5.672487460523872e-06, "loss": 1.0016, "step": 31730 }, { "epoch": 0.7222993423298364, "grad_norm": 0.003204345703125, "learning_rate": 5.667843210105889e-06, "loss": 1.0249, "step": 31740 }, { "epoch": 0.722526909860501, "grad_norm": 201.0, "learning_rate": 5.663198959687906e-06, "loss": 0.7059, "step": 31750 }, { "epoch": 0.7227544773911658, "grad_norm": 728.0, "learning_rate": 5.658554709269924e-06, "loss": 1.3763, "step": 31760 }, { "epoch": 0.7229820449218306, "grad_norm": 0.062255859375, "learning_rate": 5.653910458851942e-06, "loss": 1.6671, "step": 31770 }, { "epoch": 0.7232096124524953, "grad_norm": 0.00040435791015625, "learning_rate": 5.649266208433959e-06, "loss": 0.4813, "step": 31780 }, { "epoch": 0.72343717998316, "grad_norm": 23.5, "learning_rate": 5.644621958015976e-06, "loss": 0.474, "step": 31790 }, { "epoch": 0.7236647475138247, "grad_norm": 0.99609375, "learning_rate": 5.639977707597994e-06, "loss": 1.1278, "step": 31800 }, { "epoch": 0.7238923150444895, "grad_norm": 221.0, "learning_rate": 5.635333457180011e-06, "loss": 1.5057, "step": 31810 }, { "epoch": 0.7241198825751541, "grad_norm": 48.25, "learning_rate": 5.630689206762029e-06, "loss": 1.008, "step": 31820 }, { "epoch": 0.7243474501058189, "grad_norm": 116.5, "learning_rate": 5.626044956344047e-06, "loss": 0.5513, "step": 31830 }, { "epoch": 0.7245750176364836, "grad_norm": 0.000400543212890625, "learning_rate": 5.621400705926064e-06, "loss": 0.3141, "step": 31840 }, { "epoch": 0.7248025851671483, "grad_norm": 64.5, "learning_rate": 5.616756455508081e-06, "loss": 1.5741, "step": 31850 }, { "epoch": 0.7250301526978131, "grad_norm": 104.0, "learning_rate": 5.612112205090099e-06, "loss": 0.8305, "step": 31860 }, { "epoch": 0.7252577202284778, "grad_norm": 0.021728515625, "learning_rate": 5.607467954672117e-06, "loss": 0.254, "step": 31870 }, { "epoch": 0.7254852877591426, "grad_norm": 308.0, "learning_rate": 5.602823704254133e-06, "loss": 0.692, "step": 31880 }, { "epoch": 0.7257128552898072, "grad_norm": 105.0, "learning_rate": 5.598179453836151e-06, "loss": 0.6012, "step": 31890 }, { "epoch": 0.725940422820472, "grad_norm": 33.0, "learning_rate": 5.593535203418169e-06, "loss": 0.9286, "step": 31900 }, { "epoch": 0.7261679903511367, "grad_norm": 0.0155029296875, "learning_rate": 5.588890953000187e-06, "loss": 0.8108, "step": 31910 }, { "epoch": 0.7263955578818014, "grad_norm": 0.201171875, "learning_rate": 5.584246702582203e-06, "loss": 0.8751, "step": 31920 }, { "epoch": 0.7266231254124661, "grad_norm": 0.107421875, "learning_rate": 5.579602452164221e-06, "loss": 2.6684, "step": 31930 }, { "epoch": 0.7268506929431309, "grad_norm": 0.007659912109375, "learning_rate": 5.574958201746239e-06, "loss": 0.6196, "step": 31940 }, { "epoch": 0.7270782604737956, "grad_norm": 247.0, "learning_rate": 5.570313951328256e-06, "loss": 1.0708, "step": 31950 }, { "epoch": 0.7273058280044603, "grad_norm": 0.0040283203125, "learning_rate": 5.565669700910273e-06, "loss": 0.2569, "step": 31960 }, { "epoch": 0.7275333955351251, "grad_norm": 71.5, "learning_rate": 5.561025450492291e-06, "loss": 0.4322, "step": 31970 }, { "epoch": 0.7277609630657897, "grad_norm": 183.0, "learning_rate": 5.556381200074309e-06, "loss": 0.7901, "step": 31980 }, { "epoch": 0.7279885305964545, "grad_norm": 368.0, "learning_rate": 5.551736949656326e-06, "loss": 0.9497, "step": 31990 }, { "epoch": 0.7282160981271192, "grad_norm": 166.0, "learning_rate": 5.547092699238344e-06, "loss": 1.1271, "step": 32000 }, { "epoch": 0.728443665657784, "grad_norm": 13.3125, "learning_rate": 5.542448448820361e-06, "loss": 0.2628, "step": 32010 }, { "epoch": 0.7286712331884486, "grad_norm": 98.5, "learning_rate": 5.537804198402378e-06, "loss": 1.0449, "step": 32020 }, { "epoch": 0.7288988007191134, "grad_norm": 2.375, "learning_rate": 5.533159947984396e-06, "loss": 0.9997, "step": 32030 }, { "epoch": 0.7291263682497782, "grad_norm": 94.5, "learning_rate": 5.528515697566414e-06, "loss": 0.9473, "step": 32040 }, { "epoch": 0.7293539357804428, "grad_norm": 154.0, "learning_rate": 5.5238714471484316e-06, "loss": 1.2952, "step": 32050 }, { "epoch": 0.7295815033111076, "grad_norm": 0.010498046875, "learning_rate": 5.519227196730448e-06, "loss": 1.7574, "step": 32060 }, { "epoch": 0.7298090708417723, "grad_norm": 1.1484375, "learning_rate": 5.514582946312466e-06, "loss": 1.4025, "step": 32070 }, { "epoch": 0.730036638372437, "grad_norm": 224.0, "learning_rate": 5.5099386958944836e-06, "loss": 1.054, "step": 32080 }, { "epoch": 0.7302642059031017, "grad_norm": 89.5, "learning_rate": 5.5052944454765e-06, "loss": 0.471, "step": 32090 }, { "epoch": 0.7304917734337665, "grad_norm": 51.5, "learning_rate": 5.500650195058518e-06, "loss": 0.9985, "step": 32100 }, { "epoch": 0.7307193409644311, "grad_norm": 8.5, "learning_rate": 5.4960059446405356e-06, "loss": 0.9002, "step": 32110 }, { "epoch": 0.7309469084950959, "grad_norm": 3.859375, "learning_rate": 5.4913616942225535e-06, "loss": 0.9282, "step": 32120 }, { "epoch": 0.7311744760257607, "grad_norm": 232.0, "learning_rate": 5.48671744380457e-06, "loss": 0.916, "step": 32130 }, { "epoch": 0.7314020435564254, "grad_norm": 108.0, "learning_rate": 5.482073193386588e-06, "loss": 0.5656, "step": 32140 }, { "epoch": 0.7316296110870901, "grad_norm": 46.75, "learning_rate": 5.4774289429686055e-06, "loss": 1.0033, "step": 32150 }, { "epoch": 0.7318571786177548, "grad_norm": 61.5, "learning_rate": 5.4727846925506225e-06, "loss": 0.5867, "step": 32160 }, { "epoch": 0.7320847461484196, "grad_norm": 0.00469970703125, "learning_rate": 5.4681404421326404e-06, "loss": 0.4891, "step": 32170 }, { "epoch": 0.7323123136790842, "grad_norm": 51.5, "learning_rate": 5.4634961917146575e-06, "loss": 0.7372, "step": 32180 }, { "epoch": 0.732539881209749, "grad_norm": 185.0, "learning_rate": 5.458851941296675e-06, "loss": 0.9012, "step": 32190 }, { "epoch": 0.7327674487404137, "grad_norm": 179.0, "learning_rate": 5.4542076908786925e-06, "loss": 0.9028, "step": 32200 }, { "epoch": 0.7329950162710784, "grad_norm": 0.0007171630859375, "learning_rate": 5.44956344046071e-06, "loss": 0.7397, "step": 32210 }, { "epoch": 0.7332225838017432, "grad_norm": 87.5, "learning_rate": 5.444919190042728e-06, "loss": 0.8098, "step": 32220 }, { "epoch": 0.7334501513324079, "grad_norm": 0.306640625, "learning_rate": 5.4402749396247445e-06, "loss": 0.9793, "step": 32230 }, { "epoch": 0.7336777188630726, "grad_norm": 0.0087890625, "learning_rate": 5.435630689206762e-06, "loss": 0.4178, "step": 32240 }, { "epoch": 0.7339052863937373, "grad_norm": 0.00958251953125, "learning_rate": 5.43098643878878e-06, "loss": 0.3515, "step": 32250 }, { "epoch": 0.7341328539244021, "grad_norm": 262.0, "learning_rate": 5.426342188370798e-06, "loss": 1.0374, "step": 32260 }, { "epoch": 0.7343604214550667, "grad_norm": 1.59375, "learning_rate": 5.421697937952814e-06, "loss": 0.4201, "step": 32270 }, { "epoch": 0.7345879889857315, "grad_norm": 164.0, "learning_rate": 5.417053687534832e-06, "loss": 1.2203, "step": 32280 }, { "epoch": 0.7348155565163963, "grad_norm": 428.0, "learning_rate": 5.41240943711685e-06, "loss": 1.3076, "step": 32290 }, { "epoch": 0.735043124047061, "grad_norm": 0.1201171875, "learning_rate": 5.407765186698866e-06, "loss": 0.6184, "step": 32300 }, { "epoch": 0.7352706915777257, "grad_norm": 0.006195068359375, "learning_rate": 5.403120936280884e-06, "loss": 0.5561, "step": 32310 }, { "epoch": 0.7354982591083904, "grad_norm": 119.0, "learning_rate": 5.398476685862902e-06, "loss": 0.403, "step": 32320 }, { "epoch": 0.7357258266390552, "grad_norm": 33.75, "learning_rate": 5.39383243544492e-06, "loss": 0.548, "step": 32330 }, { "epoch": 0.7359533941697198, "grad_norm": 0.103515625, "learning_rate": 5.389188185026937e-06, "loss": 0.4353, "step": 32340 }, { "epoch": 0.7361809617003846, "grad_norm": 82.5, "learning_rate": 5.384543934608954e-06, "loss": 0.5967, "step": 32350 }, { "epoch": 0.7364085292310493, "grad_norm": 0.00921630859375, "learning_rate": 5.379899684190972e-06, "loss": 0.8002, "step": 32360 }, { "epoch": 0.736636096761714, "grad_norm": 276.0, "learning_rate": 5.375255433772989e-06, "loss": 0.8929, "step": 32370 }, { "epoch": 0.7368636642923788, "grad_norm": 6.34375, "learning_rate": 5.370611183355007e-06, "loss": 0.6049, "step": 32380 }, { "epoch": 0.7370912318230435, "grad_norm": 162.0, "learning_rate": 5.365966932937024e-06, "loss": 0.4219, "step": 32390 }, { "epoch": 0.7373187993537083, "grad_norm": 52.5, "learning_rate": 5.361322682519042e-06, "loss": 0.5342, "step": 32400 }, { "epoch": 0.7375463668843729, "grad_norm": 0.0155029296875, "learning_rate": 5.356678432101059e-06, "loss": 0.0915, "step": 32410 }, { "epoch": 0.7377739344150377, "grad_norm": 302.0, "learning_rate": 5.352034181683077e-06, "loss": 1.2751, "step": 32420 }, { "epoch": 0.7380015019457024, "grad_norm": 0.000629425048828125, "learning_rate": 5.347389931265095e-06, "loss": 1.1275, "step": 32430 }, { "epoch": 0.7382290694763671, "grad_norm": 40.75, "learning_rate": 5.342745680847111e-06, "loss": 0.5124, "step": 32440 }, { "epoch": 0.7384566370070318, "grad_norm": 134.0, "learning_rate": 5.338101430429129e-06, "loss": 0.8746, "step": 32450 }, { "epoch": 0.7386842045376966, "grad_norm": 200.0, "learning_rate": 5.333457180011147e-06, "loss": 0.8715, "step": 32460 }, { "epoch": 0.7389117720683613, "grad_norm": 292.0, "learning_rate": 5.328812929593165e-06, "loss": 0.9141, "step": 32470 }, { "epoch": 0.739139339599026, "grad_norm": 100.0, "learning_rate": 5.324168679175181e-06, "loss": 1.0076, "step": 32480 }, { "epoch": 0.7393669071296908, "grad_norm": 278.0, "learning_rate": 5.319524428757199e-06, "loss": 0.5397, "step": 32490 }, { "epoch": 0.7395944746603554, "grad_norm": 55.75, "learning_rate": 5.314880178339217e-06, "loss": 0.8196, "step": 32500 }, { "epoch": 0.7398220421910202, "grad_norm": 0.00677490234375, "learning_rate": 5.310235927921233e-06, "loss": 0.9053, "step": 32510 }, { "epoch": 0.7400496097216849, "grad_norm": 122.5, "learning_rate": 5.305591677503251e-06, "loss": 0.3075, "step": 32520 }, { "epoch": 0.7402771772523496, "grad_norm": 374.0, "learning_rate": 5.300947427085269e-06, "loss": 0.8844, "step": 32530 }, { "epoch": 0.7405047447830143, "grad_norm": 0.007568359375, "learning_rate": 5.296303176667287e-06, "loss": 0.5985, "step": 32540 }, { "epoch": 0.7407323123136791, "grad_norm": 0.003936767578125, "learning_rate": 5.291658926249304e-06, "loss": 0.7109, "step": 32550 }, { "epoch": 0.7409598798443439, "grad_norm": 0.0033721923828125, "learning_rate": 5.287014675831321e-06, "loss": 0.3126, "step": 32560 }, { "epoch": 0.7411874473750085, "grad_norm": 1.53125, "learning_rate": 5.282370425413339e-06, "loss": 0.5435, "step": 32570 }, { "epoch": 0.7414150149056733, "grad_norm": 0.00421142578125, "learning_rate": 5.277726174995356e-06, "loss": 0.4986, "step": 32580 }, { "epoch": 0.741642582436338, "grad_norm": 0.0517578125, "learning_rate": 5.273081924577374e-06, "loss": 1.4821, "step": 32590 }, { "epoch": 0.7418701499670027, "grad_norm": 12.75, "learning_rate": 5.268437674159392e-06, "loss": 0.1919, "step": 32600 }, { "epoch": 0.7420977174976674, "grad_norm": 139.0, "learning_rate": 5.263793423741409e-06, "loss": 0.9076, "step": 32610 }, { "epoch": 0.7423252850283322, "grad_norm": 19.875, "learning_rate": 5.259149173323426e-06, "loss": 0.8514, "step": 32620 }, { "epoch": 0.7425528525589968, "grad_norm": 96.5, "learning_rate": 5.254504922905444e-06, "loss": 0.6062, "step": 32630 }, { "epoch": 0.7427804200896616, "grad_norm": 9.0, "learning_rate": 5.2498606724874615e-06, "loss": 0.6512, "step": 32640 }, { "epoch": 0.7430079876203264, "grad_norm": 0.03662109375, "learning_rate": 5.245216422069478e-06, "loss": 0.3303, "step": 32650 }, { "epoch": 0.743235555150991, "grad_norm": 104.5, "learning_rate": 5.240572171651496e-06, "loss": 1.6047, "step": 32660 }, { "epoch": 0.7434631226816558, "grad_norm": 0.0033721923828125, "learning_rate": 5.2359279212335135e-06, "loss": 0.8769, "step": 32670 }, { "epoch": 0.7436906902123205, "grad_norm": 290.0, "learning_rate": 5.231283670815531e-06, "loss": 0.5621, "step": 32680 }, { "epoch": 0.7439182577429853, "grad_norm": 10.9375, "learning_rate": 5.226639420397548e-06, "loss": 0.2461, "step": 32690 }, { "epoch": 0.7441458252736499, "grad_norm": 191.0, "learning_rate": 5.2219951699795655e-06, "loss": 1.0081, "step": 32700 }, { "epoch": 0.7443733928043147, "grad_norm": 0.0003108978271484375, "learning_rate": 5.2173509195615834e-06, "loss": 0.4176, "step": 32710 }, { "epoch": 0.7446009603349794, "grad_norm": 163.0, "learning_rate": 5.2127066691436005e-06, "loss": 0.5811, "step": 32720 }, { "epoch": 0.7448285278656441, "grad_norm": 0.061767578125, "learning_rate": 5.2080624187256175e-06, "loss": 1.1029, "step": 32730 }, { "epoch": 0.7450560953963089, "grad_norm": 0.0002498626708984375, "learning_rate": 5.2034181683076354e-06, "loss": 0.5179, "step": 32740 }, { "epoch": 0.7452836629269736, "grad_norm": 102.0, "learning_rate": 5.198773917889653e-06, "loss": 0.9308, "step": 32750 }, { "epoch": 0.7455112304576383, "grad_norm": 332.0, "learning_rate": 5.19412966747167e-06, "loss": 0.3359, "step": 32760 }, { "epoch": 0.745738797988303, "grad_norm": 0.11279296875, "learning_rate": 5.189485417053688e-06, "loss": 0.9387, "step": 32770 }, { "epoch": 0.7459663655189678, "grad_norm": 65.0, "learning_rate": 5.184841166635705e-06, "loss": 0.4667, "step": 32780 }, { "epoch": 0.7461939330496324, "grad_norm": 0.004302978515625, "learning_rate": 5.180196916217722e-06, "loss": 0.3268, "step": 32790 }, { "epoch": 0.7464215005802972, "grad_norm": 212.0, "learning_rate": 5.17555266579974e-06, "loss": 0.9751, "step": 32800 }, { "epoch": 0.7466490681109619, "grad_norm": 5.375, "learning_rate": 5.170908415381758e-06, "loss": 1.1386, "step": 32810 }, { "epoch": 0.7468766356416267, "grad_norm": 0.01007080078125, "learning_rate": 5.166264164963775e-06, "loss": 0.4075, "step": 32820 }, { "epoch": 0.7471042031722914, "grad_norm": 0.1650390625, "learning_rate": 5.161619914545792e-06, "loss": 0.9777, "step": 32830 }, { "epoch": 0.7473317707029561, "grad_norm": 160.0, "learning_rate": 5.15697566412781e-06, "loss": 0.752, "step": 32840 }, { "epoch": 0.7475593382336209, "grad_norm": 193.0, "learning_rate": 5.152331413709828e-06, "loss": 1.0562, "step": 32850 }, { "epoch": 0.7477869057642855, "grad_norm": 58.5, "learning_rate": 5.147687163291844e-06, "loss": 0.3038, "step": 32860 }, { "epoch": 0.7480144732949503, "grad_norm": 33.0, "learning_rate": 5.143042912873862e-06, "loss": 0.5354, "step": 32870 }, { "epoch": 0.748242040825615, "grad_norm": 0.055908203125, "learning_rate": 5.13839866245588e-06, "loss": 1.0672, "step": 32880 }, { "epoch": 0.7484696083562797, "grad_norm": 123.5, "learning_rate": 5.133754412037898e-06, "loss": 0.4356, "step": 32890 }, { "epoch": 0.7486971758869444, "grad_norm": 78.5, "learning_rate": 5.129110161619914e-06, "loss": 0.7123, "step": 32900 }, { "epoch": 0.7489247434176092, "grad_norm": 0.003021240234375, "learning_rate": 5.124465911201932e-06, "loss": 1.1179, "step": 32910 }, { "epoch": 0.749152310948274, "grad_norm": 132.0, "learning_rate": 5.11982166078395e-06, "loss": 0.5054, "step": 32920 }, { "epoch": 0.7493798784789386, "grad_norm": 0.01507568359375, "learning_rate": 5.115177410365967e-06, "loss": 0.3702, "step": 32930 }, { "epoch": 0.7496074460096034, "grad_norm": 0.0037994384765625, "learning_rate": 5.110533159947985e-06, "loss": 0.8239, "step": 32940 }, { "epoch": 0.749835013540268, "grad_norm": 0.0002880096435546875, "learning_rate": 5.105888909530002e-06, "loss": 0.5888, "step": 32950 }, { "epoch": 0.7500625810709328, "grad_norm": 0.0059814453125, "learning_rate": 5.10124465911202e-06, "loss": 0.587, "step": 32960 }, { "epoch": 0.7502901486015975, "grad_norm": 130.0, "learning_rate": 5.096600408694037e-06, "loss": 0.9237, "step": 32970 }, { "epoch": 0.7505177161322623, "grad_norm": 3.171875, "learning_rate": 5.091956158276055e-06, "loss": 1.1101, "step": 32980 }, { "epoch": 0.7507452836629269, "grad_norm": 340.0, "learning_rate": 5.087311907858072e-06, "loss": 1.1635, "step": 32990 }, { "epoch": 0.7509728511935917, "grad_norm": 0.1865234375, "learning_rate": 5.082667657440089e-06, "loss": 0.6334, "step": 33000 }, { "epoch": 0.7512004187242565, "grad_norm": 260.0, "learning_rate": 5.078023407022107e-06, "loss": 0.2618, "step": 33010 }, { "epoch": 0.7514279862549211, "grad_norm": 282.0, "learning_rate": 5.073379156604125e-06, "loss": 0.5989, "step": 33020 }, { "epoch": 0.7516555537855859, "grad_norm": 0.0027618408203125, "learning_rate": 5.068734906186143e-06, "loss": 0.4591, "step": 33030 }, { "epoch": 0.7518831213162506, "grad_norm": 0.0013275146484375, "learning_rate": 5.064090655768159e-06, "loss": 1.3463, "step": 33040 }, { "epoch": 0.7521106888469153, "grad_norm": 0.0240478515625, "learning_rate": 5.059446405350177e-06, "loss": 0.3583, "step": 33050 }, { "epoch": 0.75233825637758, "grad_norm": 218.0, "learning_rate": 5.054802154932195e-06, "loss": 0.8252, "step": 33060 }, { "epoch": 0.7525658239082448, "grad_norm": 3.09375, "learning_rate": 5.050157904514211e-06, "loss": 0.8593, "step": 33070 }, { "epoch": 0.7527933914389094, "grad_norm": 0.00592041015625, "learning_rate": 5.045513654096229e-06, "loss": 0.9507, "step": 33080 }, { "epoch": 0.7530209589695742, "grad_norm": 89.0, "learning_rate": 5.040869403678247e-06, "loss": 1.0212, "step": 33090 }, { "epoch": 0.753248526500239, "grad_norm": 0.86328125, "learning_rate": 5.036225153260265e-06, "loss": 0.5734, "step": 33100 }, { "epoch": 0.7534760940309037, "grad_norm": 384.0, "learning_rate": 5.031580902842281e-06, "loss": 1.2742, "step": 33110 }, { "epoch": 0.7537036615615684, "grad_norm": 0.00347900390625, "learning_rate": 5.026936652424299e-06, "loss": 0.754, "step": 33120 }, { "epoch": 0.7539312290922331, "grad_norm": 71.5, "learning_rate": 5.022292402006317e-06, "loss": 1.6129, "step": 33130 }, { "epoch": 0.7541587966228979, "grad_norm": 0.00396728515625, "learning_rate": 5.017648151588334e-06, "loss": 0.4926, "step": 33140 }, { "epoch": 0.7543863641535625, "grad_norm": 53.0, "learning_rate": 5.013003901170352e-06, "loss": 0.5782, "step": 33150 }, { "epoch": 0.7546139316842273, "grad_norm": 59.0, "learning_rate": 5.008359650752369e-06, "loss": 0.3223, "step": 33160 }, { "epoch": 0.754841499214892, "grad_norm": 8.6875, "learning_rate": 5.003715400334387e-06, "loss": 0.774, "step": 33170 }, { "epoch": 0.7550690667455567, "grad_norm": 108.5, "learning_rate": 4.9990711499164045e-06, "loss": 0.6756, "step": 33180 }, { "epoch": 0.7552966342762215, "grad_norm": 260.0, "learning_rate": 4.9944268994984216e-06, "loss": 1.5512, "step": 33190 }, { "epoch": 0.7555242018068862, "grad_norm": 112.0, "learning_rate": 4.9897826490804395e-06, "loss": 1.3506, "step": 33200 }, { "epoch": 0.755751769337551, "grad_norm": 115.0, "learning_rate": 4.9851383986624565e-06, "loss": 0.6016, "step": 33210 }, { "epoch": 0.7559793368682156, "grad_norm": 82.0, "learning_rate": 4.9804941482444736e-06, "loss": 1.1684, "step": 33220 }, { "epoch": 0.7562069043988804, "grad_norm": 5.0625, "learning_rate": 4.9758498978264915e-06, "loss": 0.6942, "step": 33230 }, { "epoch": 0.756434471929545, "grad_norm": 96.0, "learning_rate": 4.9712056474085085e-06, "loss": 0.3083, "step": 33240 }, { "epoch": 0.7566620394602098, "grad_norm": 282.0, "learning_rate": 4.966561396990526e-06, "loss": 1.321, "step": 33250 }, { "epoch": 0.7568896069908746, "grad_norm": 46.75, "learning_rate": 4.9619171465725435e-06, "loss": 0.6418, "step": 33260 }, { "epoch": 0.7571171745215393, "grad_norm": 0.0032196044921875, "learning_rate": 4.957272896154561e-06, "loss": 0.6699, "step": 33270 }, { "epoch": 0.757344742052204, "grad_norm": 0.0030059814453125, "learning_rate": 4.9526286457365784e-06, "loss": 0.2823, "step": 33280 }, { "epoch": 0.7575723095828687, "grad_norm": 0.037353515625, "learning_rate": 4.9479843953185955e-06, "loss": 0.7372, "step": 33290 }, { "epoch": 0.7577998771135335, "grad_norm": 2.671875, "learning_rate": 4.943340144900613e-06, "loss": 0.7365, "step": 33300 }, { "epoch": 0.7580274446441981, "grad_norm": 128.0, "learning_rate": 4.9386958944826304e-06, "loss": 1.7636, "step": 33310 }, { "epoch": 0.7582550121748629, "grad_norm": 330.0, "learning_rate": 4.934051644064648e-06, "loss": 1.3305, "step": 33320 }, { "epoch": 0.7584825797055276, "grad_norm": 23.5, "learning_rate": 4.929407393646665e-06, "loss": 0.9756, "step": 33330 }, { "epoch": 0.7587101472361923, "grad_norm": 0.109375, "learning_rate": 4.924763143228683e-06, "loss": 0.7742, "step": 33340 }, { "epoch": 0.7589377147668571, "grad_norm": 0.0159912109375, "learning_rate": 4.9201188928107e-06, "loss": 0.7168, "step": 33350 }, { "epoch": 0.7591652822975218, "grad_norm": 209.0, "learning_rate": 4.915474642392718e-06, "loss": 0.4259, "step": 33360 }, { "epoch": 0.7593928498281866, "grad_norm": 116.5, "learning_rate": 4.910830391974736e-06, "loss": 0.9307, "step": 33370 }, { "epoch": 0.7596204173588512, "grad_norm": 330.0, "learning_rate": 4.906186141556753e-06, "loss": 0.6475, "step": 33380 }, { "epoch": 0.759847984889516, "grad_norm": 4.34375, "learning_rate": 4.901541891138771e-06, "loss": 0.9563, "step": 33390 }, { "epoch": 0.7600755524201807, "grad_norm": 0.001068115234375, "learning_rate": 4.896897640720788e-06, "loss": 0.2594, "step": 33400 }, { "epoch": 0.7603031199508454, "grad_norm": 97.5, "learning_rate": 4.892253390302806e-06, "loss": 0.7482, "step": 33410 }, { "epoch": 0.7605306874815101, "grad_norm": 0.0018463134765625, "learning_rate": 4.887609139884823e-06, "loss": 0.6553, "step": 33420 }, { "epoch": 0.7607582550121749, "grad_norm": 169.0, "learning_rate": 4.88296488946684e-06, "loss": 0.9909, "step": 33430 }, { "epoch": 0.7609858225428396, "grad_norm": 270.0, "learning_rate": 4.878320639048858e-06, "loss": 0.5479, "step": 33440 }, { "epoch": 0.7612133900735043, "grad_norm": 17.25, "learning_rate": 4.873676388630875e-06, "loss": 1.0176, "step": 33450 }, { "epoch": 0.7614409576041691, "grad_norm": 50.75, "learning_rate": 4.869032138212893e-06, "loss": 1.0031, "step": 33460 }, { "epoch": 0.7616685251348337, "grad_norm": 932.0, "learning_rate": 4.86438788779491e-06, "loss": 0.4872, "step": 33470 }, { "epoch": 0.7618960926654985, "grad_norm": 0.060302734375, "learning_rate": 4.859743637376928e-06, "loss": 0.3155, "step": 33480 }, { "epoch": 0.7621236601961632, "grad_norm": 18.25, "learning_rate": 4.855099386958945e-06, "loss": 0.2922, "step": 33490 }, { "epoch": 0.762351227726828, "grad_norm": 5.78125, "learning_rate": 4.850455136540962e-06, "loss": 0.1818, "step": 33500 }, { "epoch": 0.7625787952574926, "grad_norm": 0.0693359375, "learning_rate": 4.84581088612298e-06, "loss": 0.7284, "step": 33510 }, { "epoch": 0.7628063627881574, "grad_norm": 174.0, "learning_rate": 4.841166635704997e-06, "loss": 1.8108, "step": 33520 }, { "epoch": 0.7630339303188222, "grad_norm": 0.53515625, "learning_rate": 4.836522385287015e-06, "loss": 0.7205, "step": 33530 }, { "epoch": 0.7632614978494868, "grad_norm": 42.5, "learning_rate": 4.831878134869033e-06, "loss": 0.3432, "step": 33540 }, { "epoch": 0.7634890653801516, "grad_norm": 1.03125, "learning_rate": 4.82723388445105e-06, "loss": 0.5571, "step": 33550 }, { "epoch": 0.7637166329108163, "grad_norm": 5.125, "learning_rate": 4.822589634033068e-06, "loss": 0.4755, "step": 33560 }, { "epoch": 0.763944200441481, "grad_norm": 0.005584716796875, "learning_rate": 4.817945383615085e-06, "loss": 0.7014, "step": 33570 }, { "epoch": 0.7641717679721457, "grad_norm": 212.0, "learning_rate": 4.813301133197103e-06, "loss": 0.8947, "step": 33580 }, { "epoch": 0.7643993355028105, "grad_norm": 109.0, "learning_rate": 4.80865688277912e-06, "loss": 0.7993, "step": 33590 }, { "epoch": 0.7646269030334751, "grad_norm": 356.0, "learning_rate": 4.804012632361138e-06, "loss": 0.4398, "step": 33600 }, { "epoch": 0.7648544705641399, "grad_norm": 58.25, "learning_rate": 4.799368381943155e-06, "loss": 0.9515, "step": 33610 }, { "epoch": 0.7650820380948047, "grad_norm": 0.455078125, "learning_rate": 4.794724131525173e-06, "loss": 1.6853, "step": 33620 }, { "epoch": 0.7653096056254693, "grad_norm": 147.0, "learning_rate": 4.79007988110719e-06, "loss": 1.0366, "step": 33630 }, { "epoch": 0.7655371731561341, "grad_norm": 1.5703125, "learning_rate": 4.785435630689207e-06, "loss": 0.8633, "step": 33640 }, { "epoch": 0.7657647406867988, "grad_norm": 210.0, "learning_rate": 4.780791380271225e-06, "loss": 1.2256, "step": 33650 }, { "epoch": 0.7659923082174636, "grad_norm": 374.0, "learning_rate": 4.776147129853242e-06, "loss": 1.2271, "step": 33660 }, { "epoch": 0.7662198757481282, "grad_norm": 49.5, "learning_rate": 4.77150287943526e-06, "loss": 1.1173, "step": 33670 }, { "epoch": 0.766447443278793, "grad_norm": 37.25, "learning_rate": 4.766858629017277e-06, "loss": 0.5934, "step": 33680 }, { "epoch": 0.7666750108094577, "grad_norm": 294.0, "learning_rate": 4.762214378599295e-06, "loss": 0.5344, "step": 33690 }, { "epoch": 0.7669025783401224, "grad_norm": 30.625, "learning_rate": 4.757570128181312e-06, "loss": 1.0645, "step": 33700 }, { "epoch": 0.7671301458707872, "grad_norm": 360.0, "learning_rate": 4.752925877763329e-06, "loss": 1.3593, "step": 33710 }, { "epoch": 0.7673577134014519, "grad_norm": 34.5, "learning_rate": 4.748281627345347e-06, "loss": 0.5545, "step": 33720 }, { "epoch": 0.7675852809321166, "grad_norm": 0.01495361328125, "learning_rate": 4.7436373769273645e-06, "loss": 0.9161, "step": 33730 }, { "epoch": 0.7678128484627813, "grad_norm": 0.1953125, "learning_rate": 4.738993126509382e-06, "loss": 0.3554, "step": 33740 }, { "epoch": 0.7680404159934461, "grad_norm": 0.004638671875, "learning_rate": 4.7343488760913995e-06, "loss": 1.0434, "step": 33750 }, { "epoch": 0.7682679835241107, "grad_norm": 0.0625, "learning_rate": 4.7297046256734166e-06, "loss": 0.8742, "step": 33760 }, { "epoch": 0.7684955510547755, "grad_norm": 312.0, "learning_rate": 4.7250603752554345e-06, "loss": 0.7768, "step": 33770 }, { "epoch": 0.7687231185854402, "grad_norm": 194.0, "learning_rate": 4.7204161248374515e-06, "loss": 0.5586, "step": 33780 }, { "epoch": 0.768950686116105, "grad_norm": 170.0, "learning_rate": 4.715771874419469e-06, "loss": 0.8125, "step": 33790 }, { "epoch": 0.7691782536467697, "grad_norm": 123.5, "learning_rate": 4.7111276240014865e-06, "loss": 1.2531, "step": 33800 }, { "epoch": 0.7694058211774344, "grad_norm": 0.00360107421875, "learning_rate": 4.706483373583504e-06, "loss": 0.5784, "step": 33810 }, { "epoch": 0.7696333887080992, "grad_norm": 0.00799560546875, "learning_rate": 4.7018391231655214e-06, "loss": 0.5269, "step": 33820 }, { "epoch": 0.7698609562387638, "grad_norm": 280.0, "learning_rate": 4.697194872747539e-06, "loss": 0.8091, "step": 33830 }, { "epoch": 0.7700885237694286, "grad_norm": 27.75, "learning_rate": 4.692550622329556e-06, "loss": 0.6047, "step": 33840 }, { "epoch": 0.7703160913000933, "grad_norm": 212.0, "learning_rate": 4.6879063719115734e-06, "loss": 0.401, "step": 33850 }, { "epoch": 0.770543658830758, "grad_norm": 0.0016632080078125, "learning_rate": 4.683262121493591e-06, "loss": 0.5788, "step": 33860 }, { "epoch": 0.7707712263614227, "grad_norm": 100.0, "learning_rate": 4.678617871075608e-06, "loss": 1.1948, "step": 33870 }, { "epoch": 0.7709987938920875, "grad_norm": 0.0157470703125, "learning_rate": 4.673973620657626e-06, "loss": 0.1832, "step": 33880 }, { "epoch": 0.7712263614227522, "grad_norm": 92.5, "learning_rate": 4.669329370239643e-06, "loss": 0.7473, "step": 33890 }, { "epoch": 0.7714539289534169, "grad_norm": 50.5, "learning_rate": 4.664685119821661e-06, "loss": 0.5739, "step": 33900 }, { "epoch": 0.7716814964840817, "grad_norm": 184.0, "learning_rate": 4.660040869403678e-06, "loss": 0.8654, "step": 33910 }, { "epoch": 0.7719090640147463, "grad_norm": 0.259765625, "learning_rate": 4.655396618985696e-06, "loss": 0.4546, "step": 33920 }, { "epoch": 0.7721366315454111, "grad_norm": 101.5, "learning_rate": 4.650752368567713e-06, "loss": 0.7966, "step": 33930 }, { "epoch": 0.7723641990760758, "grad_norm": 92.0, "learning_rate": 4.646108118149731e-06, "loss": 1.1002, "step": 33940 }, { "epoch": 0.7725917666067406, "grad_norm": 117.0, "learning_rate": 4.641463867731748e-06, "loss": 0.4622, "step": 33950 }, { "epoch": 0.7728193341374052, "grad_norm": 217.0, "learning_rate": 4.636819617313766e-06, "loss": 0.5537, "step": 33960 }, { "epoch": 0.77304690166807, "grad_norm": 0.01263427734375, "learning_rate": 4.632175366895784e-06, "loss": 1.1942, "step": 33970 }, { "epoch": 0.7732744691987348, "grad_norm": 0.0048828125, "learning_rate": 4.627531116477801e-06, "loss": 0.6684, "step": 33980 }, { "epoch": 0.7735020367293994, "grad_norm": 0.73828125, "learning_rate": 4.622886866059818e-06, "loss": 0.5888, "step": 33990 }, { "epoch": 0.7737296042600642, "grad_norm": 0.0020599365234375, "learning_rate": 4.618242615641836e-06, "loss": 0.6542, "step": 34000 }, { "epoch": 0.7739571717907289, "grad_norm": 13.75, "learning_rate": 4.613598365223853e-06, "loss": 0.1006, "step": 34010 }, { "epoch": 0.7741847393213936, "grad_norm": 93.0, "learning_rate": 4.608954114805871e-06, "loss": 0.3673, "step": 34020 }, { "epoch": 0.7744123068520583, "grad_norm": 0.75390625, "learning_rate": 4.604309864387888e-06, "loss": 0.3281, "step": 34030 }, { "epoch": 0.7746398743827231, "grad_norm": 121.5, "learning_rate": 4.599665613969906e-06, "loss": 0.6437, "step": 34040 }, { "epoch": 0.7748674419133877, "grad_norm": 151.0, "learning_rate": 4.595021363551923e-06, "loss": 0.7988, "step": 34050 }, { "epoch": 0.7750950094440525, "grad_norm": 162.0, "learning_rate": 4.59037711313394e-06, "loss": 0.9661, "step": 34060 }, { "epoch": 0.7753225769747173, "grad_norm": 680.0, "learning_rate": 4.585732862715958e-06, "loss": 0.9771, "step": 34070 }, { "epoch": 0.775550144505382, "grad_norm": 0.01611328125, "learning_rate": 4.581088612297975e-06, "loss": 0.2354, "step": 34080 }, { "epoch": 0.7757777120360467, "grad_norm": 57.0, "learning_rate": 4.576444361879993e-06, "loss": 0.7108, "step": 34090 }, { "epoch": 0.7760052795667114, "grad_norm": 156.0, "learning_rate": 4.57180011146201e-06, "loss": 0.8346, "step": 34100 }, { "epoch": 0.7762328470973762, "grad_norm": 74.0, "learning_rate": 4.567155861044028e-06, "loss": 1.8852, "step": 34110 }, { "epoch": 0.7764604146280408, "grad_norm": 67.5, "learning_rate": 4.562511610626045e-06, "loss": 0.6649, "step": 34120 }, { "epoch": 0.7766879821587056, "grad_norm": 5.15625, "learning_rate": 4.557867360208063e-06, "loss": 0.8679, "step": 34130 }, { "epoch": 0.7769155496893703, "grad_norm": 42.5, "learning_rate": 4.55322310979008e-06, "loss": 0.5629, "step": 34140 }, { "epoch": 0.777143117220035, "grad_norm": 0.006622314453125, "learning_rate": 4.548578859372098e-06, "loss": 0.6572, "step": 34150 }, { "epoch": 0.7773706847506998, "grad_norm": 95.0, "learning_rate": 4.543934608954116e-06, "loss": 0.6961, "step": 34160 }, { "epoch": 0.7775982522813645, "grad_norm": 35.5, "learning_rate": 4.539290358536133e-06, "loss": 0.8068, "step": 34170 }, { "epoch": 0.7778258198120293, "grad_norm": 0.00083160400390625, "learning_rate": 4.534646108118151e-06, "loss": 0.5425, "step": 34180 }, { "epoch": 0.7780533873426939, "grad_norm": 0.0069580078125, "learning_rate": 4.530001857700168e-06, "loss": 0.6577, "step": 34190 }, { "epoch": 0.7782809548733587, "grad_norm": 338.0, "learning_rate": 4.525357607282185e-06, "loss": 1.3804, "step": 34200 }, { "epoch": 0.7785085224040234, "grad_norm": 0.000732421875, "learning_rate": 4.520713356864203e-06, "loss": 0.8354, "step": 34210 }, { "epoch": 0.7787360899346881, "grad_norm": 75.0, "learning_rate": 4.51606910644622e-06, "loss": 1.2119, "step": 34220 }, { "epoch": 0.7789636574653529, "grad_norm": 211.0, "learning_rate": 4.511424856028238e-06, "loss": 1.4703, "step": 34230 }, { "epoch": 0.7791912249960176, "grad_norm": 209.0, "learning_rate": 4.506780605610255e-06, "loss": 1.4949, "step": 34240 }, { "epoch": 0.7794187925266823, "grad_norm": 286.0, "learning_rate": 4.5021363551922726e-06, "loss": 1.8654, "step": 34250 }, { "epoch": 0.779646360057347, "grad_norm": 81.0, "learning_rate": 4.49749210477429e-06, "loss": 0.2158, "step": 34260 }, { "epoch": 0.7798739275880118, "grad_norm": 221.0, "learning_rate": 4.492847854356307e-06, "loss": 1.5384, "step": 34270 }, { "epoch": 0.7801014951186764, "grad_norm": 68.5, "learning_rate": 4.488203603938325e-06, "loss": 0.6592, "step": 34280 }, { "epoch": 0.7803290626493412, "grad_norm": 0.01611328125, "learning_rate": 4.483559353520342e-06, "loss": 1.2643, "step": 34290 }, { "epoch": 0.7805566301800059, "grad_norm": 0.0184326171875, "learning_rate": 4.4789151031023595e-06, "loss": 0.5136, "step": 34300 }, { "epoch": 0.7807841977106706, "grad_norm": 0.00115966796875, "learning_rate": 4.474270852684377e-06, "loss": 0.2839, "step": 34310 }, { "epoch": 0.7810117652413354, "grad_norm": 0.000667572021484375, "learning_rate": 4.4696266022663945e-06, "loss": 0.6133, "step": 34320 }, { "epoch": 0.7812393327720001, "grad_norm": 314.0, "learning_rate": 4.464982351848412e-06, "loss": 1.2368, "step": 34330 }, { "epoch": 0.7814669003026649, "grad_norm": 258.0, "learning_rate": 4.4603381014304295e-06, "loss": 0.3896, "step": 34340 }, { "epoch": 0.7816944678333295, "grad_norm": 0.0751953125, "learning_rate": 4.455693851012447e-06, "loss": 0.5974, "step": 34350 }, { "epoch": 0.7819220353639943, "grad_norm": 132.0, "learning_rate": 4.451049600594464e-06, "loss": 1.3146, "step": 34360 }, { "epoch": 0.782149602894659, "grad_norm": 0.00022983551025390625, "learning_rate": 4.446405350176482e-06, "loss": 0.1737, "step": 34370 }, { "epoch": 0.7823771704253237, "grad_norm": 181.0, "learning_rate": 4.441761099758499e-06, "loss": 1.1424, "step": 34380 }, { "epoch": 0.7826047379559884, "grad_norm": 161.0, "learning_rate": 4.437116849340517e-06, "loss": 1.2486, "step": 34390 }, { "epoch": 0.7828323054866532, "grad_norm": 97.5, "learning_rate": 4.432472598922534e-06, "loss": 0.9539, "step": 34400 }, { "epoch": 0.7830598730173179, "grad_norm": 0.0087890625, "learning_rate": 4.427828348504551e-06, "loss": 0.5835, "step": 34410 }, { "epoch": 0.7832874405479826, "grad_norm": 66.0, "learning_rate": 4.423184098086569e-06, "loss": 1.2339, "step": 34420 }, { "epoch": 0.7835150080786474, "grad_norm": 171.0, "learning_rate": 4.418539847668586e-06, "loss": 0.7504, "step": 34430 }, { "epoch": 0.783742575609312, "grad_norm": 183.0, "learning_rate": 4.413895597250604e-06, "loss": 1.2637, "step": 34440 }, { "epoch": 0.7839701431399768, "grad_norm": 0.003021240234375, "learning_rate": 4.409251346832621e-06, "loss": 0.4016, "step": 34450 }, { "epoch": 0.7841977106706415, "grad_norm": 0.00183868408203125, "learning_rate": 4.404607096414639e-06, "loss": 1.1838, "step": 34460 }, { "epoch": 0.7844252782013063, "grad_norm": 82.5, "learning_rate": 4.399962845996656e-06, "loss": 0.3881, "step": 34470 }, { "epoch": 0.7846528457319709, "grad_norm": 1.6328125, "learning_rate": 4.395318595578673e-06, "loss": 0.891, "step": 34480 }, { "epoch": 0.7848804132626357, "grad_norm": 62.5, "learning_rate": 4.390674345160691e-06, "loss": 0.4636, "step": 34490 }, { "epoch": 0.7851079807933005, "grad_norm": 290.0, "learning_rate": 4.386030094742709e-06, "loss": 1.4101, "step": 34500 }, { "epoch": 0.7853355483239651, "grad_norm": 0.0026702880859375, "learning_rate": 4.381385844324726e-06, "loss": 0.7543, "step": 34510 }, { "epoch": 0.7855631158546299, "grad_norm": 1000.0, "learning_rate": 4.376741593906744e-06, "loss": 1.5729, "step": 34520 }, { "epoch": 0.7857906833852946, "grad_norm": 35.5, "learning_rate": 4.372097343488761e-06, "loss": 0.8616, "step": 34530 }, { "epoch": 0.7860182509159593, "grad_norm": 174.0, "learning_rate": 4.367453093070779e-06, "loss": 1.3624, "step": 34540 }, { "epoch": 0.786245818446624, "grad_norm": 3.34375, "learning_rate": 4.362808842652796e-06, "loss": 1.049, "step": 34550 }, { "epoch": 0.7864733859772888, "grad_norm": 7.40625, "learning_rate": 4.358164592234814e-06, "loss": 1.1966, "step": 34560 }, { "epoch": 0.7867009535079534, "grad_norm": 0.06787109375, "learning_rate": 4.353520341816831e-06, "loss": 0.5237, "step": 34570 }, { "epoch": 0.7869285210386182, "grad_norm": 51.75, "learning_rate": 4.348876091398849e-06, "loss": 1.0273, "step": 34580 }, { "epoch": 0.787156088569283, "grad_norm": 77.0, "learning_rate": 4.344231840980866e-06, "loss": 0.071, "step": 34590 }, { "epoch": 0.7873836560999476, "grad_norm": 34.5, "learning_rate": 4.339587590562884e-06, "loss": 1.0871, "step": 34600 }, { "epoch": 0.7876112236306124, "grad_norm": 0.000919342041015625, "learning_rate": 4.334943340144901e-06, "loss": 1.2659, "step": 34610 }, { "epoch": 0.7878387911612771, "grad_norm": 102.0, "learning_rate": 4.330299089726919e-06, "loss": 1.93, "step": 34620 }, { "epoch": 0.7880663586919419, "grad_norm": 151.0, "learning_rate": 4.325654839308936e-06, "loss": 1.1869, "step": 34630 }, { "epoch": 0.7882939262226065, "grad_norm": 2.640625, "learning_rate": 4.321010588890953e-06, "loss": 0.6584, "step": 34640 }, { "epoch": 0.7885214937532713, "grad_norm": 17.125, "learning_rate": 4.316366338472971e-06, "loss": 0.9343, "step": 34650 }, { "epoch": 0.788749061283936, "grad_norm": 25.25, "learning_rate": 4.311722088054988e-06, "loss": 0.6961, "step": 34660 }, { "epoch": 0.7889766288146007, "grad_norm": 252.0, "learning_rate": 4.307077837637006e-06, "loss": 0.4789, "step": 34670 }, { "epoch": 0.7892041963452655, "grad_norm": 39.5, "learning_rate": 4.302433587219023e-06, "loss": 0.2024, "step": 34680 }, { "epoch": 0.7894317638759302, "grad_norm": 1.640625, "learning_rate": 4.297789336801041e-06, "loss": 0.7032, "step": 34690 }, { "epoch": 0.7896593314065949, "grad_norm": 0.0021514892578125, "learning_rate": 4.293145086383058e-06, "loss": 0.0536, "step": 34700 }, { "epoch": 0.7898868989372596, "grad_norm": 134.0, "learning_rate": 4.288500835965076e-06, "loss": 0.4407, "step": 34710 }, { "epoch": 0.7901144664679244, "grad_norm": 113.0, "learning_rate": 4.283856585547093e-06, "loss": 0.6476, "step": 34720 }, { "epoch": 0.790342033998589, "grad_norm": 61.0, "learning_rate": 4.279212335129111e-06, "loss": 0.5796, "step": 34730 }, { "epoch": 0.7905696015292538, "grad_norm": 141.0, "learning_rate": 4.274568084711128e-06, "loss": 0.5367, "step": 34740 }, { "epoch": 0.7907971690599185, "grad_norm": 2.453125, "learning_rate": 4.269923834293146e-06, "loss": 0.8599, "step": 34750 }, { "epoch": 0.7910247365905833, "grad_norm": 140.0, "learning_rate": 4.2652795838751636e-06, "loss": 1.1373, "step": 34760 }, { "epoch": 0.791252304121248, "grad_norm": 9.9375, "learning_rate": 4.260635333457181e-06, "loss": 0.7951, "step": 34770 }, { "epoch": 0.7914798716519127, "grad_norm": 0.007080078125, "learning_rate": 4.255991083039198e-06, "loss": 1.0265, "step": 34780 }, { "epoch": 0.7917074391825775, "grad_norm": 620.0, "learning_rate": 4.2513468326212156e-06, "loss": 1.0167, "step": 34790 }, { "epoch": 0.7919350067132421, "grad_norm": 56.75, "learning_rate": 4.246702582203233e-06, "loss": 0.7284, "step": 34800 }, { "epoch": 0.7921625742439069, "grad_norm": 524.0, "learning_rate": 4.2420583317852505e-06, "loss": 0.8765, "step": 34810 }, { "epoch": 0.7923901417745716, "grad_norm": 0.002532958984375, "learning_rate": 4.2374140813672676e-06, "loss": 0.4113, "step": 34820 }, { "epoch": 0.7926177093052363, "grad_norm": 79.0, "learning_rate": 4.2327698309492855e-06, "loss": 0.7494, "step": 34830 }, { "epoch": 0.792845276835901, "grad_norm": 128.0, "learning_rate": 4.2281255805313025e-06, "loss": 0.6965, "step": 34840 }, { "epoch": 0.7930728443665658, "grad_norm": 318.0, "learning_rate": 4.22348133011332e-06, "loss": 1.3472, "step": 34850 }, { "epoch": 0.7933004118972306, "grad_norm": 55.5, "learning_rate": 4.2188370796953375e-06, "loss": 0.6159, "step": 34860 }, { "epoch": 0.7935279794278952, "grad_norm": 141.0, "learning_rate": 4.2141928292773545e-06, "loss": 1.3901, "step": 34870 }, { "epoch": 0.79375554695856, "grad_norm": 0.162109375, "learning_rate": 4.2095485788593724e-06, "loss": 0.7453, "step": 34880 }, { "epoch": 0.7939831144892247, "grad_norm": 10.6875, "learning_rate": 4.2049043284413895e-06, "loss": 0.2888, "step": 34890 }, { "epoch": 0.7942106820198894, "grad_norm": 318.0, "learning_rate": 4.200260078023407e-06, "loss": 0.6738, "step": 34900 }, { "epoch": 0.7944382495505541, "grad_norm": 0.04736328125, "learning_rate": 4.1956158276054245e-06, "loss": 1.5442, "step": 34910 }, { "epoch": 0.7946658170812189, "grad_norm": 31.5, "learning_rate": 4.190971577187442e-06, "loss": 0.5108, "step": 34920 }, { "epoch": 0.7948933846118835, "grad_norm": 112.0, "learning_rate": 4.18632732676946e-06, "loss": 0.9403, "step": 34930 }, { "epoch": 0.7951209521425483, "grad_norm": 0.0341796875, "learning_rate": 4.181683076351477e-06, "loss": 0.8197, "step": 34940 }, { "epoch": 0.7953485196732131, "grad_norm": 89.0, "learning_rate": 4.177038825933495e-06, "loss": 0.5499, "step": 34950 }, { "epoch": 0.7955760872038777, "grad_norm": 55.75, "learning_rate": 4.172394575515512e-06, "loss": 1.1121, "step": 34960 }, { "epoch": 0.7958036547345425, "grad_norm": 0.0771484375, "learning_rate": 4.16775032509753e-06, "loss": 0.064, "step": 34970 }, { "epoch": 0.7960312222652072, "grad_norm": 3.671875, "learning_rate": 4.163106074679547e-06, "loss": 0.4133, "step": 34980 }, { "epoch": 0.796258789795872, "grad_norm": 0.01483154296875, "learning_rate": 4.158461824261564e-06, "loss": 0.8712, "step": 34990 }, { "epoch": 0.7964863573265366, "grad_norm": 0.02587890625, "learning_rate": 4.153817573843582e-06, "loss": 0.4902, "step": 35000 }, { "epoch": 0.7967139248572014, "grad_norm": 133.0, "learning_rate": 4.149173323425599e-06, "loss": 0.965, "step": 35010 }, { "epoch": 0.796941492387866, "grad_norm": 122.0, "learning_rate": 4.144529073007617e-06, "loss": 0.8051, "step": 35020 }, { "epoch": 0.7971690599185308, "grad_norm": 0.10888671875, "learning_rate": 4.139884822589634e-06, "loss": 0.3471, "step": 35030 }, { "epoch": 0.7973966274491956, "grad_norm": 174.0, "learning_rate": 4.135240572171652e-06, "loss": 0.7128, "step": 35040 }, { "epoch": 0.7976241949798603, "grad_norm": 95.5, "learning_rate": 4.130596321753669e-06, "loss": 0.6152, "step": 35050 }, { "epoch": 0.797851762510525, "grad_norm": 11.25, "learning_rate": 4.125952071335686e-06, "loss": 0.2278, "step": 35060 }, { "epoch": 0.7980793300411897, "grad_norm": 60.25, "learning_rate": 4.121307820917704e-06, "loss": 0.9441, "step": 35070 }, { "epoch": 0.7983068975718545, "grad_norm": 148.0, "learning_rate": 4.116663570499721e-06, "loss": 0.3205, "step": 35080 }, { "epoch": 0.7985344651025191, "grad_norm": 41.25, "learning_rate": 4.112019320081739e-06, "loss": 0.4517, "step": 35090 }, { "epoch": 0.7987620326331839, "grad_norm": 145.0, "learning_rate": 4.107375069663756e-06, "loss": 0.5093, "step": 35100 }, { "epoch": 0.7989896001638486, "grad_norm": 56.75, "learning_rate": 4.102730819245774e-06, "loss": 1.5531, "step": 35110 }, { "epoch": 0.7992171676945133, "grad_norm": 107.5, "learning_rate": 4.098086568827792e-06, "loss": 0.331, "step": 35120 }, { "epoch": 0.7994447352251781, "grad_norm": 223.0, "learning_rate": 4.093442318409809e-06, "loss": 1.4207, "step": 35130 }, { "epoch": 0.7996723027558428, "grad_norm": 0.64453125, "learning_rate": 4.088798067991827e-06, "loss": 0.5879, "step": 35140 }, { "epoch": 0.7998998702865076, "grad_norm": 0.005340576171875, "learning_rate": 4.084153817573844e-06, "loss": 0.6367, "step": 35150 }, { "epoch": 0.8001274378171722, "grad_norm": 0.7578125, "learning_rate": 4.079509567155862e-06, "loss": 0.9204, "step": 35160 }, { "epoch": 0.800355005347837, "grad_norm": 0.0031280517578125, "learning_rate": 4.074865316737879e-06, "loss": 1.5267, "step": 35170 }, { "epoch": 0.8005825728785017, "grad_norm": 109.5, "learning_rate": 4.070221066319897e-06, "loss": 1.774, "step": 35180 }, { "epoch": 0.8008101404091664, "grad_norm": 82.0, "learning_rate": 4.065576815901914e-06, "loss": 0.5863, "step": 35190 }, { "epoch": 0.8010377079398311, "grad_norm": 104.5, "learning_rate": 4.060932565483931e-06, "loss": 0.4524, "step": 35200 }, { "epoch": 0.8012652754704959, "grad_norm": 167.0, "learning_rate": 4.056288315065949e-06, "loss": 1.6478, "step": 35210 }, { "epoch": 0.8014928430011606, "grad_norm": 0.006805419921875, "learning_rate": 4.051644064647966e-06, "loss": 0.7998, "step": 35220 }, { "epoch": 0.8017204105318253, "grad_norm": 19.875, "learning_rate": 4.046999814229984e-06, "loss": 0.4662, "step": 35230 }, { "epoch": 0.8019479780624901, "grad_norm": 104.5, "learning_rate": 4.042355563812001e-06, "loss": 0.1962, "step": 35240 }, { "epoch": 0.8021755455931547, "grad_norm": 123.0, "learning_rate": 4.037711313394019e-06, "loss": 0.9709, "step": 35250 }, { "epoch": 0.8024031131238195, "grad_norm": 113.0, "learning_rate": 4.033067062976036e-06, "loss": 0.5296, "step": 35260 }, { "epoch": 0.8026306806544842, "grad_norm": 0.0277099609375, "learning_rate": 4.028422812558053e-06, "loss": 0.8251, "step": 35270 }, { "epoch": 0.802858248185149, "grad_norm": 26.25, "learning_rate": 4.023778562140071e-06, "loss": 0.2832, "step": 35280 }, { "epoch": 0.8030858157158137, "grad_norm": 0.005767822265625, "learning_rate": 4.019134311722089e-06, "loss": 0.2567, "step": 35290 }, { "epoch": 0.8033133832464784, "grad_norm": 88.5, "learning_rate": 4.014490061304106e-06, "loss": 1.0019, "step": 35300 }, { "epoch": 0.8035409507771432, "grad_norm": 130.0, "learning_rate": 4.009845810886124e-06, "loss": 0.6091, "step": 35310 }, { "epoch": 0.8037685183078078, "grad_norm": 96.0, "learning_rate": 4.005201560468141e-06, "loss": 0.8902, "step": 35320 }, { "epoch": 0.8039960858384726, "grad_norm": 0.024169921875, "learning_rate": 4.0005573100501586e-06, "loss": 0.6822, "step": 35330 }, { "epoch": 0.8042236533691373, "grad_norm": 0.002044677734375, "learning_rate": 3.995913059632176e-06, "loss": 0.1501, "step": 35340 }, { "epoch": 0.804451220899802, "grad_norm": 378.0, "learning_rate": 3.9912688092141935e-06, "loss": 1.0958, "step": 35350 }, { "epoch": 0.8046787884304667, "grad_norm": 0.005859375, "learning_rate": 3.9866245587962106e-06, "loss": 0.781, "step": 35360 }, { "epoch": 0.8049063559611315, "grad_norm": 131.0, "learning_rate": 3.9819803083782285e-06, "loss": 0.8034, "step": 35370 }, { "epoch": 0.8051339234917962, "grad_norm": 43.25, "learning_rate": 3.9773360579602455e-06, "loss": 0.5361, "step": 35380 }, { "epoch": 0.8053614910224609, "grad_norm": 121.0, "learning_rate": 3.972691807542263e-06, "loss": 2.5527, "step": 35390 }, { "epoch": 0.8055890585531257, "grad_norm": 276.0, "learning_rate": 3.9680475571242805e-06, "loss": 0.3911, "step": 35400 }, { "epoch": 0.8058166260837903, "grad_norm": 0.0012664794921875, "learning_rate": 3.9634033067062975e-06, "loss": 0.5361, "step": 35410 }, { "epoch": 0.8060441936144551, "grad_norm": 0.34375, "learning_rate": 3.9587590562883154e-06, "loss": 0.7028, "step": 35420 }, { "epoch": 0.8062717611451198, "grad_norm": 132.0, "learning_rate": 3.9541148058703325e-06, "loss": 0.6728, "step": 35430 }, { "epoch": 0.8064993286757846, "grad_norm": 1.703125, "learning_rate": 3.94947055545235e-06, "loss": 0.6725, "step": 35440 }, { "epoch": 0.8067268962064492, "grad_norm": 0.00372314453125, "learning_rate": 3.9448263050343674e-06, "loss": 0.7857, "step": 35450 }, { "epoch": 0.806954463737114, "grad_norm": 55.0, "learning_rate": 3.940182054616385e-06, "loss": 0.3697, "step": 35460 }, { "epoch": 0.8071820312677788, "grad_norm": 0.1376953125, "learning_rate": 3.935537804198402e-06, "loss": 0.6536, "step": 35470 }, { "epoch": 0.8074095987984434, "grad_norm": 0.0537109375, "learning_rate": 3.93089355378042e-06, "loss": 1.2075, "step": 35480 }, { "epoch": 0.8076371663291082, "grad_norm": 100.5, "learning_rate": 3.926249303362437e-06, "loss": 0.8859, "step": 35490 }, { "epoch": 0.8078647338597729, "grad_norm": 9.4375, "learning_rate": 3.921605052944455e-06, "loss": 0.2887, "step": 35500 }, { "epoch": 0.8080923013904376, "grad_norm": 632.0, "learning_rate": 3.916960802526472e-06, "loss": 1.3845, "step": 35510 }, { "epoch": 0.8083198689211023, "grad_norm": 61.5, "learning_rate": 3.91231655210849e-06, "loss": 0.7365, "step": 35520 }, { "epoch": 0.8085474364517671, "grad_norm": 0.007110595703125, "learning_rate": 3.907672301690508e-06, "loss": 0.4621, "step": 35530 }, { "epoch": 0.8087750039824317, "grad_norm": 207.0, "learning_rate": 3.903028051272525e-06, "loss": 0.7739, "step": 35540 }, { "epoch": 0.8090025715130965, "grad_norm": 104.0, "learning_rate": 3.898383800854542e-06, "loss": 0.356, "step": 35550 }, { "epoch": 0.8092301390437613, "grad_norm": 176.0, "learning_rate": 3.89373955043656e-06, "loss": 0.8192, "step": 35560 }, { "epoch": 0.809457706574426, "grad_norm": 211.0, "learning_rate": 3.889095300018577e-06, "loss": 0.3281, "step": 35570 }, { "epoch": 0.8096852741050907, "grad_norm": 118.5, "learning_rate": 3.884451049600595e-06, "loss": 1.1694, "step": 35580 }, { "epoch": 0.8099128416357554, "grad_norm": 0.00408935546875, "learning_rate": 3.879806799182612e-06, "loss": 1.1135, "step": 35590 }, { "epoch": 0.8101404091664202, "grad_norm": 0.006072998046875, "learning_rate": 3.87516254876463e-06, "loss": 0.8995, "step": 35600 }, { "epoch": 0.8103679766970848, "grad_norm": 0.000545501708984375, "learning_rate": 3.870518298346647e-06, "loss": 0.9028, "step": 35610 }, { "epoch": 0.8105955442277496, "grad_norm": 0.0023651123046875, "learning_rate": 3.865874047928664e-06, "loss": 0.2737, "step": 35620 }, { "epoch": 0.8108231117584143, "grad_norm": 101.5, "learning_rate": 3.861229797510682e-06, "loss": 0.788, "step": 35630 }, { "epoch": 0.811050679289079, "grad_norm": 320.0, "learning_rate": 3.856585547092699e-06, "loss": 1.619, "step": 35640 }, { "epoch": 0.8112782468197438, "grad_norm": 165.0, "learning_rate": 3.851941296674717e-06, "loss": 1.0128, "step": 35650 }, { "epoch": 0.8115058143504085, "grad_norm": 133.0, "learning_rate": 3.847297046256734e-06, "loss": 0.8878, "step": 35660 }, { "epoch": 0.8117333818810732, "grad_norm": 154.0, "learning_rate": 3.842652795838752e-06, "loss": 0.7354, "step": 35670 }, { "epoch": 0.8119609494117379, "grad_norm": 127.0, "learning_rate": 3.838008545420769e-06, "loss": 1.0767, "step": 35680 }, { "epoch": 0.8121885169424027, "grad_norm": 187.0, "learning_rate": 3.833364295002787e-06, "loss": 0.9246, "step": 35690 }, { "epoch": 0.8124160844730673, "grad_norm": 0.2109375, "learning_rate": 3.828720044584804e-06, "loss": 0.9109, "step": 35700 }, { "epoch": 0.8126436520037321, "grad_norm": 99.5, "learning_rate": 3.824075794166822e-06, "loss": 0.9211, "step": 35710 }, { "epoch": 0.8128712195343968, "grad_norm": 102.5, "learning_rate": 3.81943154374884e-06, "loss": 0.6501, "step": 35720 }, { "epoch": 0.8130987870650616, "grad_norm": 0.5859375, "learning_rate": 3.814787293330857e-06, "loss": 0.9523, "step": 35730 }, { "epoch": 0.8133263545957263, "grad_norm": 0.0038299560546875, "learning_rate": 3.8101430429128743e-06, "loss": 0.3959, "step": 35740 }, { "epoch": 0.813553922126391, "grad_norm": 91.5, "learning_rate": 3.805498792494892e-06, "loss": 0.4729, "step": 35750 }, { "epoch": 0.8137814896570558, "grad_norm": 12.9375, "learning_rate": 3.800854542076909e-06, "loss": 0.4477, "step": 35760 }, { "epoch": 0.8140090571877204, "grad_norm": 87.5, "learning_rate": 3.7962102916589268e-06, "loss": 1.039, "step": 35770 }, { "epoch": 0.8142366247183852, "grad_norm": 106.5, "learning_rate": 3.791566041240944e-06, "loss": 0.542, "step": 35780 }, { "epoch": 0.8144641922490499, "grad_norm": 0.000377655029296875, "learning_rate": 3.7869217908229617e-06, "loss": 1.3955, "step": 35790 }, { "epoch": 0.8146917597797146, "grad_norm": 6.90625, "learning_rate": 3.7822775404049788e-06, "loss": 0.7089, "step": 35800 }, { "epoch": 0.8149193273103793, "grad_norm": 472.0, "learning_rate": 3.7776332899869967e-06, "loss": 1.2375, "step": 35810 }, { "epoch": 0.8151468948410441, "grad_norm": 74.5, "learning_rate": 3.7729890395690137e-06, "loss": 0.2331, "step": 35820 }, { "epoch": 0.8153744623717089, "grad_norm": 83.0, "learning_rate": 3.768344789151031e-06, "loss": 1.3552, "step": 35830 }, { "epoch": 0.8156020299023735, "grad_norm": 0.00457763671875, "learning_rate": 3.7637005387330487e-06, "loss": 0.8349, "step": 35840 }, { "epoch": 0.8158295974330383, "grad_norm": 44.75, "learning_rate": 3.759056288315066e-06, "loss": 0.71, "step": 35850 }, { "epoch": 0.816057164963703, "grad_norm": 0.109375, "learning_rate": 3.754412037897084e-06, "loss": 1.0302, "step": 35860 }, { "epoch": 0.8162847324943677, "grad_norm": 29.625, "learning_rate": 3.749767787479101e-06, "loss": 0.4066, "step": 35870 }, { "epoch": 0.8165123000250324, "grad_norm": 0.5703125, "learning_rate": 3.745123537061119e-06, "loss": 0.4334, "step": 35880 }, { "epoch": 0.8167398675556972, "grad_norm": 398.0, "learning_rate": 3.740479286643136e-06, "loss": 1.0484, "step": 35890 }, { "epoch": 0.8169674350863618, "grad_norm": 86.5, "learning_rate": 3.735835036225153e-06, "loss": 0.5296, "step": 35900 }, { "epoch": 0.8171950026170266, "grad_norm": 88.5, "learning_rate": 3.731190785807171e-06, "loss": 0.4069, "step": 35910 }, { "epoch": 0.8174225701476914, "grad_norm": 1032.0, "learning_rate": 3.7265465353891885e-06, "loss": 1.0553, "step": 35920 }, { "epoch": 0.817650137678356, "grad_norm": 83.5, "learning_rate": 3.721902284971206e-06, "loss": 0.7672, "step": 35930 }, { "epoch": 0.8178777052090208, "grad_norm": 86.5, "learning_rate": 3.7172580345532235e-06, "loss": 1.2129, "step": 35940 }, { "epoch": 0.8181052727396855, "grad_norm": 0.0888671875, "learning_rate": 3.712613784135241e-06, "loss": 1.0739, "step": 35950 }, { "epoch": 0.8183328402703502, "grad_norm": 0.451171875, "learning_rate": 3.7079695337172584e-06, "loss": 0.5745, "step": 35960 }, { "epoch": 0.8185604078010149, "grad_norm": 0.007476806640625, "learning_rate": 3.7033252832992755e-06, "loss": 0.6136, "step": 35970 }, { "epoch": 0.8187879753316797, "grad_norm": 196.0, "learning_rate": 3.6986810328812934e-06, "loss": 0.4984, "step": 35980 }, { "epoch": 0.8190155428623443, "grad_norm": 6.59375, "learning_rate": 3.6940367824633104e-06, "loss": 1.1809, "step": 35990 }, { "epoch": 0.8192431103930091, "grad_norm": 142.0, "learning_rate": 3.6893925320453283e-06, "loss": 0.4723, "step": 36000 }, { "epoch": 0.8194706779236739, "grad_norm": 0.1376953125, "learning_rate": 3.6847482816273454e-06, "loss": 0.2952, "step": 36010 }, { "epoch": 0.8196982454543386, "grad_norm": 0.03759765625, "learning_rate": 3.6801040312093633e-06, "loss": 0.4858, "step": 36020 }, { "epoch": 0.8199258129850033, "grad_norm": 200.0, "learning_rate": 3.6754597807913803e-06, "loss": 0.8237, "step": 36030 }, { "epoch": 0.820153380515668, "grad_norm": 203.0, "learning_rate": 3.670815530373398e-06, "loss": 0.9794, "step": 36040 }, { "epoch": 0.8203809480463328, "grad_norm": 0.004730224609375, "learning_rate": 3.6661712799554157e-06, "loss": 1.0119, "step": 36050 }, { "epoch": 0.8206085155769974, "grad_norm": 1432.0, "learning_rate": 3.6615270295374328e-06, "loss": 1.1913, "step": 36060 }, { "epoch": 0.8208360831076622, "grad_norm": 0.000385284423828125, "learning_rate": 3.6568827791194507e-06, "loss": 1.1917, "step": 36070 }, { "epoch": 0.8210636506383269, "grad_norm": 197.0, "learning_rate": 3.6522385287014677e-06, "loss": 0.7417, "step": 36080 }, { "epoch": 0.8212912181689916, "grad_norm": 0.0947265625, "learning_rate": 3.6475942782834856e-06, "loss": 0.8677, "step": 36090 }, { "epoch": 0.8215187856996564, "grad_norm": 18.25, "learning_rate": 3.6429500278655027e-06, "loss": 0.4982, "step": 36100 }, { "epoch": 0.8217463532303211, "grad_norm": 204.0, "learning_rate": 3.63830577744752e-06, "loss": 1.2077, "step": 36110 }, { "epoch": 0.8219739207609859, "grad_norm": 62.25, "learning_rate": 3.6336615270295377e-06, "loss": 0.8629, "step": 36120 }, { "epoch": 0.8222014882916505, "grad_norm": 254.0, "learning_rate": 3.629017276611555e-06, "loss": 1.0013, "step": 36130 }, { "epoch": 0.8224290558223153, "grad_norm": 211.0, "learning_rate": 3.6243730261935726e-06, "loss": 1.5575, "step": 36140 }, { "epoch": 0.82265662335298, "grad_norm": 84.0, "learning_rate": 3.61972877577559e-06, "loss": 0.8066, "step": 36150 }, { "epoch": 0.8228841908836447, "grad_norm": 191.0, "learning_rate": 3.615084525357608e-06, "loss": 0.7149, "step": 36160 }, { "epoch": 0.8231117584143094, "grad_norm": 10.5, "learning_rate": 3.610440274939625e-06, "loss": 0.5763, "step": 36170 }, { "epoch": 0.8233393259449742, "grad_norm": 0.003936767578125, "learning_rate": 3.605796024521642e-06, "loss": 0.6642, "step": 36180 }, { "epoch": 0.8235668934756389, "grad_norm": 3.65625, "learning_rate": 3.60115177410366e-06, "loss": 0.9072, "step": 36190 }, { "epoch": 0.8237944610063036, "grad_norm": 0.00433349609375, "learning_rate": 3.596507523685677e-06, "loss": 0.792, "step": 36200 }, { "epoch": 0.8240220285369684, "grad_norm": 0.578125, "learning_rate": 3.591863273267695e-06, "loss": 0.4959, "step": 36210 }, { "epoch": 0.824249596067633, "grad_norm": 174.0, "learning_rate": 3.5872190228497124e-06, "loss": 1.0424, "step": 36220 }, { "epoch": 0.8244771635982978, "grad_norm": 0.026123046875, "learning_rate": 3.58257477243173e-06, "loss": 0.7105, "step": 36230 }, { "epoch": 0.8247047311289625, "grad_norm": 144.0, "learning_rate": 3.5779305220137474e-06, "loss": 0.7311, "step": 36240 }, { "epoch": 0.8249322986596273, "grad_norm": 84.0, "learning_rate": 3.5732862715957645e-06, "loss": 0.3587, "step": 36250 }, { "epoch": 0.825159866190292, "grad_norm": 107.0, "learning_rate": 3.5686420211777824e-06, "loss": 0.3666, "step": 36260 }, { "epoch": 0.8253874337209567, "grad_norm": 15.6875, "learning_rate": 3.5639977707597994e-06, "loss": 1.1279, "step": 36270 }, { "epoch": 0.8256150012516215, "grad_norm": 3.25, "learning_rate": 3.5593535203418173e-06, "loss": 0.4852, "step": 36280 }, { "epoch": 0.8258425687822861, "grad_norm": 203.0, "learning_rate": 3.5547092699238344e-06, "loss": 0.2943, "step": 36290 }, { "epoch": 0.8260701363129509, "grad_norm": 0.2412109375, "learning_rate": 3.5500650195058523e-06, "loss": 0.4492, "step": 36300 }, { "epoch": 0.8262977038436156, "grad_norm": 752.0, "learning_rate": 3.5454207690878693e-06, "loss": 1.364, "step": 36310 }, { "epoch": 0.8265252713742803, "grad_norm": 194.0, "learning_rate": 3.540776518669887e-06, "loss": 0.3604, "step": 36320 }, { "epoch": 0.826752838904945, "grad_norm": 115.5, "learning_rate": 3.5361322682519043e-06, "loss": 1.0495, "step": 36330 }, { "epoch": 0.8269804064356098, "grad_norm": 0.01373291015625, "learning_rate": 3.5314880178339218e-06, "loss": 0.9358, "step": 36340 }, { "epoch": 0.8272079739662745, "grad_norm": 0.003692626953125, "learning_rate": 3.5268437674159397e-06, "loss": 0.7612, "step": 36350 }, { "epoch": 0.8274355414969392, "grad_norm": 94.5, "learning_rate": 3.5221995169979567e-06, "loss": 0.8492, "step": 36360 }, { "epoch": 0.827663109027604, "grad_norm": 169.0, "learning_rate": 3.5175552665799746e-06, "loss": 0.6884, "step": 36370 }, { "epoch": 0.8278906765582686, "grad_norm": 274.0, "learning_rate": 3.5129110161619917e-06, "loss": 0.8339, "step": 36380 }, { "epoch": 0.8281182440889334, "grad_norm": 42.5, "learning_rate": 3.5082667657440087e-06, "loss": 0.7294, "step": 36390 }, { "epoch": 0.8283458116195981, "grad_norm": 0.01953125, "learning_rate": 3.5036225153260266e-06, "loss": 0.5342, "step": 36400 }, { "epoch": 0.8285733791502629, "grad_norm": 0.01519775390625, "learning_rate": 3.498978264908044e-06, "loss": 0.5502, "step": 36410 }, { "epoch": 0.8288009466809275, "grad_norm": 35.5, "learning_rate": 3.4943340144900616e-06, "loss": 1.4139, "step": 36420 }, { "epoch": 0.8290285142115923, "grad_norm": 146.0, "learning_rate": 3.489689764072079e-06, "loss": 1.5905, "step": 36430 }, { "epoch": 0.8292560817422571, "grad_norm": 428.0, "learning_rate": 3.4850455136540965e-06, "loss": 1.185, "step": 36440 }, { "epoch": 0.8294836492729217, "grad_norm": 0.00946044921875, "learning_rate": 3.480401263236114e-06, "loss": 0.7056, "step": 36450 }, { "epoch": 0.8297112168035865, "grad_norm": 70.0, "learning_rate": 3.475757012818132e-06, "loss": 0.5355, "step": 36460 }, { "epoch": 0.8299387843342512, "grad_norm": 94.5, "learning_rate": 3.471112762400149e-06, "loss": 0.8507, "step": 36470 }, { "epoch": 0.8301663518649159, "grad_norm": 152.0, "learning_rate": 3.466468511982166e-06, "loss": 1.4564, "step": 36480 }, { "epoch": 0.8303939193955806, "grad_norm": 0.294921875, "learning_rate": 3.461824261564184e-06, "loss": 0.8107, "step": 36490 }, { "epoch": 0.8306214869262454, "grad_norm": 122.5, "learning_rate": 3.457180011146201e-06, "loss": 0.249, "step": 36500 }, { "epoch": 0.83084905445691, "grad_norm": 0.341796875, "learning_rate": 3.452535760728219e-06, "loss": 0.7191, "step": 36510 }, { "epoch": 0.8310766219875748, "grad_norm": 0.0004825592041015625, "learning_rate": 3.4478915103102364e-06, "loss": 0.6621, "step": 36520 }, { "epoch": 0.8313041895182396, "grad_norm": 156.0, "learning_rate": 3.443247259892254e-06, "loss": 1.1363, "step": 36530 }, { "epoch": 0.8315317570489043, "grad_norm": 162.0, "learning_rate": 3.4386030094742713e-06, "loss": 0.3841, "step": 36540 }, { "epoch": 0.831759324579569, "grad_norm": 48.5, "learning_rate": 3.4339587590562884e-06, "loss": 0.9027, "step": 36550 }, { "epoch": 0.8319868921102337, "grad_norm": 0.01055908203125, "learning_rate": 3.4293145086383063e-06, "loss": 0.2517, "step": 36560 }, { "epoch": 0.8322144596408985, "grad_norm": 107.0, "learning_rate": 3.4246702582203233e-06, "loss": 0.5416, "step": 36570 }, { "epoch": 0.8324420271715631, "grad_norm": 197.0, "learning_rate": 3.4200260078023412e-06, "loss": 0.8416, "step": 36580 }, { "epoch": 0.8326695947022279, "grad_norm": 0.0012969970703125, "learning_rate": 3.4153817573843583e-06, "loss": 0.8415, "step": 36590 }, { "epoch": 0.8328971622328926, "grad_norm": 2.359375, "learning_rate": 3.410737506966376e-06, "loss": 0.5321, "step": 36600 }, { "epoch": 0.8331247297635573, "grad_norm": 74.5, "learning_rate": 3.4060932565483933e-06, "loss": 0.9315, "step": 36610 }, { "epoch": 0.8333522972942221, "grad_norm": 0.0140380859375, "learning_rate": 3.4014490061304107e-06, "loss": 0.379, "step": 36620 }, { "epoch": 0.8335798648248868, "grad_norm": 0.01123046875, "learning_rate": 3.396804755712428e-06, "loss": 0.8327, "step": 36630 }, { "epoch": 0.8338074323555515, "grad_norm": 90.5, "learning_rate": 3.3921605052944457e-06, "loss": 0.9565, "step": 36640 }, { "epoch": 0.8340349998862162, "grad_norm": 78.5, "learning_rate": 3.3875162548764636e-06, "loss": 0.6883, "step": 36650 }, { "epoch": 0.834262567416881, "grad_norm": 183.0, "learning_rate": 3.3828720044584806e-06, "loss": 0.9348, "step": 36660 }, { "epoch": 0.8344901349475456, "grad_norm": 0.03125, "learning_rate": 3.3782277540404985e-06, "loss": 1.6461, "step": 36670 }, { "epoch": 0.8347177024782104, "grad_norm": 0.0089111328125, "learning_rate": 3.3735835036225156e-06, "loss": 1.2373, "step": 36680 }, { "epoch": 0.8349452700088751, "grad_norm": 59.25, "learning_rate": 3.3689392532045327e-06, "loss": 1.2515, "step": 36690 }, { "epoch": 0.8351728375395399, "grad_norm": 0.00138092041015625, "learning_rate": 3.3642950027865506e-06, "loss": 0.3459, "step": 36700 }, { "epoch": 0.8354004050702046, "grad_norm": 51.0, "learning_rate": 3.359650752368568e-06, "loss": 0.2199, "step": 36710 }, { "epoch": 0.8356279726008693, "grad_norm": 158.0, "learning_rate": 3.3550065019505855e-06, "loss": 0.7559, "step": 36720 }, { "epoch": 0.8358555401315341, "grad_norm": 324.0, "learning_rate": 3.350362251532603e-06, "loss": 0.5995, "step": 36730 }, { "epoch": 0.8360831076621987, "grad_norm": 0.003753662109375, "learning_rate": 3.3457180011146205e-06, "loss": 0.2693, "step": 36740 }, { "epoch": 0.8363106751928635, "grad_norm": 1.9296875, "learning_rate": 3.341073750696638e-06, "loss": 1.778, "step": 36750 }, { "epoch": 0.8365382427235282, "grad_norm": 143.0, "learning_rate": 3.336429500278655e-06, "loss": 0.7647, "step": 36760 }, { "epoch": 0.8367658102541929, "grad_norm": 0.84765625, "learning_rate": 3.331785249860673e-06, "loss": 0.5585, "step": 36770 }, { "epoch": 0.8369933777848576, "grad_norm": 0.0087890625, "learning_rate": 3.32714099944269e-06, "loss": 1.1354, "step": 36780 }, { "epoch": 0.8372209453155224, "grad_norm": 158.0, "learning_rate": 3.322496749024708e-06, "loss": 0.3952, "step": 36790 }, { "epoch": 0.8374485128461872, "grad_norm": 126.0, "learning_rate": 3.317852498606725e-06, "loss": 0.7406, "step": 36800 }, { "epoch": 0.8376760803768518, "grad_norm": 0.267578125, "learning_rate": 3.313208248188743e-06, "loss": 0.2815, "step": 36810 }, { "epoch": 0.8379036479075166, "grad_norm": 96.0, "learning_rate": 3.3085639977707603e-06, "loss": 0.8999, "step": 36820 }, { "epoch": 0.8381312154381813, "grad_norm": 151.0, "learning_rate": 3.3039197473527774e-06, "loss": 0.4653, "step": 36830 }, { "epoch": 0.838358782968846, "grad_norm": 552.0, "learning_rate": 3.2992754969347953e-06, "loss": 0.233, "step": 36840 }, { "epoch": 0.8385863504995107, "grad_norm": 0.00103759765625, "learning_rate": 3.2946312465168123e-06, "loss": 0.6373, "step": 36850 }, { "epoch": 0.8388139180301755, "grad_norm": 193.0, "learning_rate": 3.2899869960988302e-06, "loss": 1.2057, "step": 36860 }, { "epoch": 0.8390414855608401, "grad_norm": 7.53125, "learning_rate": 3.2853427456808473e-06, "loss": 0.3808, "step": 36870 }, { "epoch": 0.8392690530915049, "grad_norm": 0.0027313232421875, "learning_rate": 3.280698495262865e-06, "loss": 0.6334, "step": 36880 }, { "epoch": 0.8394966206221697, "grad_norm": 0.0732421875, "learning_rate": 3.2760542448448822e-06, "loss": 0.4672, "step": 36890 }, { "epoch": 0.8397241881528343, "grad_norm": 368.0, "learning_rate": 3.2714099944268997e-06, "loss": 1.2501, "step": 36900 }, { "epoch": 0.8399517556834991, "grad_norm": 86.5, "learning_rate": 3.266765744008917e-06, "loss": 0.6497, "step": 36910 }, { "epoch": 0.8401793232141638, "grad_norm": 21.75, "learning_rate": 3.2621214935909347e-06, "loss": 0.3415, "step": 36920 }, { "epoch": 0.8404068907448285, "grad_norm": 234.0, "learning_rate": 3.257477243172952e-06, "loss": 1.5189, "step": 36930 }, { "epoch": 0.8406344582754932, "grad_norm": 92.0, "learning_rate": 3.2528329927549696e-06, "loss": 0.5246, "step": 36940 }, { "epoch": 0.840862025806158, "grad_norm": 101.0, "learning_rate": 3.2481887423369875e-06, "loss": 1.515, "step": 36950 }, { "epoch": 0.8410895933368226, "grad_norm": 152.0, "learning_rate": 3.2435444919190046e-06, "loss": 0.4679, "step": 36960 }, { "epoch": 0.8413171608674874, "grad_norm": 77.5, "learning_rate": 3.2389002415010216e-06, "loss": 0.8459, "step": 36970 }, { "epoch": 0.8415447283981522, "grad_norm": 420.0, "learning_rate": 3.2342559910830395e-06, "loss": 0.7959, "step": 36980 }, { "epoch": 0.8417722959288169, "grad_norm": 390.0, "learning_rate": 3.2296117406650566e-06, "loss": 0.7391, "step": 36990 }, { "epoch": 0.8419998634594816, "grad_norm": 47.75, "learning_rate": 3.2249674902470745e-06, "loss": 1.3744, "step": 37000 }, { "epoch": 0.8422274309901463, "grad_norm": 0.043212890625, "learning_rate": 3.220323239829092e-06, "loss": 0.2153, "step": 37010 }, { "epoch": 0.8424549985208111, "grad_norm": 486.0, "learning_rate": 3.2156789894111094e-06, "loss": 1.0625, "step": 37020 }, { "epoch": 0.8426825660514757, "grad_norm": 0.00927734375, "learning_rate": 3.211034738993127e-06, "loss": 0.4297, "step": 37030 }, { "epoch": 0.8429101335821405, "grad_norm": 0.001312255859375, "learning_rate": 3.206390488575144e-06, "loss": 1.2573, "step": 37040 }, { "epoch": 0.8431377011128052, "grad_norm": 0.052978515625, "learning_rate": 3.201746238157162e-06, "loss": 1.4472, "step": 37050 }, { "epoch": 0.84336526864347, "grad_norm": 2.953125, "learning_rate": 3.197101987739179e-06, "loss": 0.861, "step": 37060 }, { "epoch": 0.8435928361741347, "grad_norm": 164.0, "learning_rate": 3.192457737321197e-06, "loss": 1.1493, "step": 37070 }, { "epoch": 0.8438204037047994, "grad_norm": 5.34375, "learning_rate": 3.187813486903214e-06, "loss": 0.6872, "step": 37080 }, { "epoch": 0.8440479712354642, "grad_norm": 204.0, "learning_rate": 3.183169236485232e-06, "loss": 0.7484, "step": 37090 }, { "epoch": 0.8442755387661288, "grad_norm": 3.9375, "learning_rate": 3.178524986067249e-06, "loss": 1.6068, "step": 37100 }, { "epoch": 0.8445031062967936, "grad_norm": 1.015625, "learning_rate": 3.1738807356492663e-06, "loss": 1.2351, "step": 37110 }, { "epoch": 0.8447306738274583, "grad_norm": 57.25, "learning_rate": 3.1692364852312842e-06, "loss": 0.5311, "step": 37120 }, { "epoch": 0.844958241358123, "grad_norm": 0.373046875, "learning_rate": 3.1645922348133013e-06, "loss": 0.3901, "step": 37130 }, { "epoch": 0.8451858088887877, "grad_norm": 0.028564453125, "learning_rate": 3.159947984395319e-06, "loss": 0.2723, "step": 37140 }, { "epoch": 0.8454133764194525, "grad_norm": 6.0, "learning_rate": 3.1553037339773362e-06, "loss": 0.8138, "step": 37150 }, { "epoch": 0.8456409439501172, "grad_norm": 47.25, "learning_rate": 3.150659483559354e-06, "loss": 0.4094, "step": 37160 }, { "epoch": 0.8458685114807819, "grad_norm": 2.53125, "learning_rate": 3.146015233141371e-06, "loss": 0.6714, "step": 37170 }, { "epoch": 0.8460960790114467, "grad_norm": 0.007049560546875, "learning_rate": 3.1413709827233887e-06, "loss": 0.4559, "step": 37180 }, { "epoch": 0.8463236465421113, "grad_norm": 128.0, "learning_rate": 3.136726732305406e-06, "loss": 0.3019, "step": 37190 }, { "epoch": 0.8465512140727761, "grad_norm": 1624.0, "learning_rate": 3.1320824818874236e-06, "loss": 0.9804, "step": 37200 }, { "epoch": 0.8467787816034408, "grad_norm": 22.75, "learning_rate": 3.127438231469441e-06, "loss": 0.6202, "step": 37210 }, { "epoch": 0.8470063491341056, "grad_norm": 0.0016021728515625, "learning_rate": 3.1227939810514586e-06, "loss": 0.9937, "step": 37220 }, { "epoch": 0.8472339166647703, "grad_norm": 4.5625, "learning_rate": 3.118149730633476e-06, "loss": 0.2964, "step": 37230 }, { "epoch": 0.847461484195435, "grad_norm": 0.00101470947265625, "learning_rate": 3.1135054802154935e-06, "loss": 0.6394, "step": 37240 }, { "epoch": 0.8476890517260998, "grad_norm": 260.0, "learning_rate": 3.1088612297975106e-06, "loss": 0.958, "step": 37250 }, { "epoch": 0.8479166192567644, "grad_norm": 107.5, "learning_rate": 3.1042169793795285e-06, "loss": 0.7758, "step": 37260 }, { "epoch": 0.8481441867874292, "grad_norm": 79.0, "learning_rate": 3.0995727289615456e-06, "loss": 1.1645, "step": 37270 }, { "epoch": 0.8483717543180939, "grad_norm": 0.515625, "learning_rate": 3.0949284785435635e-06, "loss": 0.9962, "step": 37280 }, { "epoch": 0.8485993218487586, "grad_norm": 580.0, "learning_rate": 3.0902842281255805e-06, "loss": 0.9814, "step": 37290 }, { "epoch": 0.8488268893794233, "grad_norm": 0.28125, "learning_rate": 3.0856399777075984e-06, "loss": 0.5737, "step": 37300 }, { "epoch": 0.8490544569100881, "grad_norm": 3.609375, "learning_rate": 3.080995727289616e-06, "loss": 0.9064, "step": 37310 }, { "epoch": 0.8492820244407528, "grad_norm": 68.0, "learning_rate": 3.076351476871633e-06, "loss": 0.6521, "step": 37320 }, { "epoch": 0.8495095919714175, "grad_norm": 79.5, "learning_rate": 3.071707226453651e-06, "loss": 1.1957, "step": 37330 }, { "epoch": 0.8497371595020823, "grad_norm": 0.322265625, "learning_rate": 3.067062976035668e-06, "loss": 0.964, "step": 37340 }, { "epoch": 0.849964727032747, "grad_norm": 0.037353515625, "learning_rate": 3.062418725617686e-06, "loss": 1.2012, "step": 37350 }, { "epoch": 0.8501922945634117, "grad_norm": 46.75, "learning_rate": 3.057774475199703e-06, "loss": 0.6458, "step": 37360 }, { "epoch": 0.8504198620940764, "grad_norm": 0.0341796875, "learning_rate": 3.0531302247817208e-06, "loss": 0.4323, "step": 37370 }, { "epoch": 0.8506474296247412, "grad_norm": 0.0013275146484375, "learning_rate": 3.048485974363738e-06, "loss": 0.5469, "step": 37380 }, { "epoch": 0.8508749971554058, "grad_norm": 0.017578125, "learning_rate": 3.0438417239457553e-06, "loss": 0.709, "step": 37390 }, { "epoch": 0.8511025646860706, "grad_norm": 0.000152587890625, "learning_rate": 3.0391974735277728e-06, "loss": 0.5888, "step": 37400 }, { "epoch": 0.8513301322167354, "grad_norm": 17.0, "learning_rate": 3.0345532231097903e-06, "loss": 0.9634, "step": 37410 }, { "epoch": 0.8515576997474, "grad_norm": 207.0, "learning_rate": 3.029908972691808e-06, "loss": 0.9425, "step": 37420 }, { "epoch": 0.8517852672780648, "grad_norm": 181.0, "learning_rate": 3.0252647222738252e-06, "loss": 0.6937, "step": 37430 }, { "epoch": 0.8520128348087295, "grad_norm": 720.0, "learning_rate": 3.020620471855843e-06, "loss": 0.6997, "step": 37440 }, { "epoch": 0.8522404023393942, "grad_norm": 148.0, "learning_rate": 3.01597622143786e-06, "loss": 0.5342, "step": 37450 }, { "epoch": 0.8524679698700589, "grad_norm": 128.0, "learning_rate": 3.0113319710198772e-06, "loss": 0.8169, "step": 37460 }, { "epoch": 0.8526955374007237, "grad_norm": 386.0, "learning_rate": 3.006687720601895e-06, "loss": 1.1508, "step": 37470 }, { "epoch": 0.8529231049313883, "grad_norm": 199.0, "learning_rate": 3.0020434701839126e-06, "loss": 0.6555, "step": 37480 }, { "epoch": 0.8531506724620531, "grad_norm": 0.45703125, "learning_rate": 2.99739921976593e-06, "loss": 0.3029, "step": 37490 }, { "epoch": 0.8533782399927179, "grad_norm": 490.0, "learning_rate": 2.9927549693479476e-06, "loss": 0.6338, "step": 37500 }, { "epoch": 0.8536058075233826, "grad_norm": 0.0017547607421875, "learning_rate": 2.988110718929965e-06, "loss": 0.6076, "step": 37510 }, { "epoch": 0.8538333750540473, "grad_norm": 125.0, "learning_rate": 2.9834664685119825e-06, "loss": 0.7814, "step": 37520 }, { "epoch": 0.854060942584712, "grad_norm": 156.0, "learning_rate": 2.9788222180939996e-06, "loss": 0.2773, "step": 37530 }, { "epoch": 0.8542885101153768, "grad_norm": 236.0, "learning_rate": 2.9741779676760175e-06, "loss": 0.9726, "step": 37540 }, { "epoch": 0.8545160776460414, "grad_norm": 31.5, "learning_rate": 2.9695337172580345e-06, "loss": 0.3637, "step": 37550 }, { "epoch": 0.8547436451767062, "grad_norm": 0.71875, "learning_rate": 2.9648894668400524e-06, "loss": 1.4841, "step": 37560 }, { "epoch": 0.8549712127073709, "grad_norm": 221.0, "learning_rate": 2.9602452164220695e-06, "loss": 0.5392, "step": 37570 }, { "epoch": 0.8551987802380356, "grad_norm": 23.875, "learning_rate": 2.9556009660040874e-06, "loss": 0.412, "step": 37580 }, { "epoch": 0.8554263477687004, "grad_norm": 142.0, "learning_rate": 2.9509567155861044e-06, "loss": 0.7894, "step": 37590 }, { "epoch": 0.8556539152993651, "grad_norm": 9.918212890625e-05, "learning_rate": 2.946312465168122e-06, "loss": 0.3653, "step": 37600 }, { "epoch": 0.8558814828300298, "grad_norm": 94.0, "learning_rate": 2.94166821475014e-06, "loss": 0.879, "step": 37610 }, { "epoch": 0.8561090503606945, "grad_norm": 0.0269775390625, "learning_rate": 2.937023964332157e-06, "loss": 0.4148, "step": 37620 }, { "epoch": 0.8563366178913593, "grad_norm": 130.0, "learning_rate": 2.9323797139141748e-06, "loss": 0.397, "step": 37630 }, { "epoch": 0.856564185422024, "grad_norm": 0.00164031982421875, "learning_rate": 2.927735463496192e-06, "loss": 0.5978, "step": 37640 }, { "epoch": 0.8567917529526887, "grad_norm": 0.004150390625, "learning_rate": 2.9230912130782097e-06, "loss": 0.2544, "step": 37650 }, { "epoch": 0.8570193204833534, "grad_norm": 223.0, "learning_rate": 2.918446962660227e-06, "loss": 0.6235, "step": 37660 }, { "epoch": 0.8572468880140182, "grad_norm": 28.375, "learning_rate": 2.9138027122422443e-06, "loss": 1.0042, "step": 37670 }, { "epoch": 0.8574744555446829, "grad_norm": 83.5, "learning_rate": 2.9091584618242618e-06, "loss": 0.7284, "step": 37680 }, { "epoch": 0.8577020230753476, "grad_norm": 296.0, "learning_rate": 2.9045142114062792e-06, "loss": 0.6937, "step": 37690 }, { "epoch": 0.8579295906060124, "grad_norm": 0.48828125, "learning_rate": 2.8998699609882967e-06, "loss": 0.5004, "step": 37700 }, { "epoch": 0.858157158136677, "grad_norm": 250.0, "learning_rate": 2.895225710570314e-06, "loss": 0.6805, "step": 37710 }, { "epoch": 0.8583847256673418, "grad_norm": 262.0, "learning_rate": 2.8905814601523317e-06, "loss": 1.0976, "step": 37720 }, { "epoch": 0.8586122931980065, "grad_norm": 0.431640625, "learning_rate": 2.885937209734349e-06, "loss": 0.7135, "step": 37730 }, { "epoch": 0.8588398607286712, "grad_norm": 328.0, "learning_rate": 2.881292959316366e-06, "loss": 0.9131, "step": 37740 }, { "epoch": 0.8590674282593359, "grad_norm": 98.0, "learning_rate": 2.876648708898384e-06, "loss": 1.331, "step": 37750 }, { "epoch": 0.8592949957900007, "grad_norm": 0.046630859375, "learning_rate": 2.872004458480401e-06, "loss": 0.9098, "step": 37760 }, { "epoch": 0.8595225633206655, "grad_norm": 144.0, "learning_rate": 2.867360208062419e-06, "loss": 0.9397, "step": 37770 }, { "epoch": 0.8597501308513301, "grad_norm": 43.75, "learning_rate": 2.8627159576444365e-06, "loss": 1.167, "step": 37780 }, { "epoch": 0.8599776983819949, "grad_norm": 0.0074462890625, "learning_rate": 2.858071707226454e-06, "loss": 0.5232, "step": 37790 }, { "epoch": 0.8602052659126596, "grad_norm": 0.03466796875, "learning_rate": 2.8534274568084715e-06, "loss": 0.6691, "step": 37800 }, { "epoch": 0.8604328334433243, "grad_norm": 168.0, "learning_rate": 2.8487832063904885e-06, "loss": 0.5781, "step": 37810 }, { "epoch": 0.860660400973989, "grad_norm": 149.0, "learning_rate": 2.8441389559725064e-06, "loss": 0.8373, "step": 37820 }, { "epoch": 0.8608879685046538, "grad_norm": 65.0, "learning_rate": 2.8394947055545235e-06, "loss": 0.3185, "step": 37830 }, { "epoch": 0.8611155360353184, "grad_norm": 0.031494140625, "learning_rate": 2.8348504551365414e-06, "loss": 1.0476, "step": 37840 }, { "epoch": 0.8613431035659832, "grad_norm": 284.0, "learning_rate": 2.8302062047185585e-06, "loss": 0.3016, "step": 37850 }, { "epoch": 0.861570671096648, "grad_norm": 115.5, "learning_rate": 2.8255619543005764e-06, "loss": 1.1202, "step": 37860 }, { "epoch": 0.8617982386273126, "grad_norm": 450.0, "learning_rate": 2.8209177038825934e-06, "loss": 0.9652, "step": 37870 }, { "epoch": 0.8620258061579774, "grad_norm": 109.5, "learning_rate": 2.816273453464611e-06, "loss": 0.2732, "step": 37880 }, { "epoch": 0.8622533736886421, "grad_norm": 0.008056640625, "learning_rate": 2.8116292030466284e-06, "loss": 1.2132, "step": 37890 }, { "epoch": 0.8624809412193069, "grad_norm": 141.0, "learning_rate": 2.806984952628646e-06, "loss": 0.7935, "step": 37900 }, { "epoch": 0.8627085087499715, "grad_norm": 117.0, "learning_rate": 2.8023407022106638e-06, "loss": 0.6853, "step": 37910 }, { "epoch": 0.8629360762806363, "grad_norm": 134.0, "learning_rate": 2.797696451792681e-06, "loss": 0.6329, "step": 37920 }, { "epoch": 0.863163643811301, "grad_norm": 0.045654296875, "learning_rate": 2.7930522013746987e-06, "loss": 0.3806, "step": 37930 }, { "epoch": 0.8633912113419657, "grad_norm": 181.0, "learning_rate": 2.7884079509567158e-06, "loss": 0.3467, "step": 37940 }, { "epoch": 0.8636187788726305, "grad_norm": 78.0, "learning_rate": 2.783763700538733e-06, "loss": 0.794, "step": 37950 }, { "epoch": 0.8638463464032952, "grad_norm": 14.9375, "learning_rate": 2.7791194501207507e-06, "loss": 0.4774, "step": 37960 }, { "epoch": 0.8640739139339599, "grad_norm": 288.0, "learning_rate": 2.774475199702768e-06, "loss": 1.1021, "step": 37970 }, { "epoch": 0.8643014814646246, "grad_norm": 41.75, "learning_rate": 2.7698309492847857e-06, "loss": 0.8135, "step": 37980 }, { "epoch": 0.8645290489952894, "grad_norm": 15.375, "learning_rate": 2.765186698866803e-06, "loss": 1.4092, "step": 37990 }, { "epoch": 0.864756616525954, "grad_norm": 1.9921875, "learning_rate": 2.7605424484488206e-06, "loss": 0.7986, "step": 38000 }, { "epoch": 0.8649841840566188, "grad_norm": 9.5367431640625e-05, "learning_rate": 2.755898198030838e-06, "loss": 0.5902, "step": 38010 }, { "epoch": 0.8652117515872835, "grad_norm": 0.0015716552734375, "learning_rate": 2.751253947612855e-06, "loss": 0.6114, "step": 38020 }, { "epoch": 0.8654393191179482, "grad_norm": 104.5, "learning_rate": 2.746609697194873e-06, "loss": 1.7117, "step": 38030 }, { "epoch": 0.865666886648613, "grad_norm": 131.0, "learning_rate": 2.74196544677689e-06, "loss": 1.4261, "step": 38040 }, { "epoch": 0.8658944541792777, "grad_norm": 108.5, "learning_rate": 2.737321196358908e-06, "loss": 1.1102, "step": 38050 }, { "epoch": 0.8661220217099425, "grad_norm": 165.0, "learning_rate": 2.732676945940925e-06, "loss": 1.4162, "step": 38060 }, { "epoch": 0.8663495892406071, "grad_norm": 0.0478515625, "learning_rate": 2.728032695522943e-06, "loss": 0.8285, "step": 38070 }, { "epoch": 0.8665771567712719, "grad_norm": 528.0, "learning_rate": 2.7233884451049605e-06, "loss": 0.6101, "step": 38080 }, { "epoch": 0.8668047243019366, "grad_norm": 366.0, "learning_rate": 2.7187441946869775e-06, "loss": 0.7081, "step": 38090 }, { "epoch": 0.8670322918326013, "grad_norm": 92.0, "learning_rate": 2.7140999442689954e-06, "loss": 0.6953, "step": 38100 }, { "epoch": 0.867259859363266, "grad_norm": 52.25, "learning_rate": 2.7094556938510125e-06, "loss": 1.0633, "step": 38110 }, { "epoch": 0.8674874268939308, "grad_norm": 39.0, "learning_rate": 2.7048114434330304e-06, "loss": 0.5285, "step": 38120 }, { "epoch": 0.8677149944245955, "grad_norm": 0.0301513671875, "learning_rate": 2.7001671930150474e-06, "loss": 0.0967, "step": 38130 }, { "epoch": 0.8679425619552602, "grad_norm": 648.0, "learning_rate": 2.6955229425970653e-06, "loss": 1.3772, "step": 38140 }, { "epoch": 0.868170129485925, "grad_norm": 0.0030517578125, "learning_rate": 2.6908786921790824e-06, "loss": 0.6069, "step": 38150 }, { "epoch": 0.8683976970165896, "grad_norm": 0.009033203125, "learning_rate": 2.6862344417611e-06, "loss": 0.874, "step": 38160 }, { "epoch": 0.8686252645472544, "grad_norm": 0.0014190673828125, "learning_rate": 2.6815901913431173e-06, "loss": 0.5441, "step": 38170 }, { "epoch": 0.8688528320779191, "grad_norm": 213.0, "learning_rate": 2.676945940925135e-06, "loss": 0.3126, "step": 38180 }, { "epoch": 0.8690803996085839, "grad_norm": 13.375, "learning_rate": 2.6723016905071523e-06, "loss": 0.3812, "step": 38190 }, { "epoch": 0.8693079671392486, "grad_norm": 172.0, "learning_rate": 2.6676574400891698e-06, "loss": 1.0021, "step": 38200 }, { "epoch": 0.8695355346699133, "grad_norm": 33.5, "learning_rate": 2.6630131896711877e-06, "loss": 0.0799, "step": 38210 }, { "epoch": 0.8697631022005781, "grad_norm": 117.0, "learning_rate": 2.6583689392532047e-06, "loss": 0.4817, "step": 38220 }, { "epoch": 0.8699906697312427, "grad_norm": 153.0, "learning_rate": 2.653724688835222e-06, "loss": 0.8601, "step": 38230 }, { "epoch": 0.8702182372619075, "grad_norm": 0.00067138671875, "learning_rate": 2.6490804384172397e-06, "loss": 0.6808, "step": 38240 }, { "epoch": 0.8704458047925722, "grad_norm": 174.0, "learning_rate": 2.6444361879992568e-06, "loss": 0.563, "step": 38250 }, { "epoch": 0.8706733723232369, "grad_norm": 268.0, "learning_rate": 2.6397919375812747e-06, "loss": 0.3454, "step": 38260 }, { "epoch": 0.8709009398539016, "grad_norm": 93.0, "learning_rate": 2.635147687163292e-06, "loss": 0.4794, "step": 38270 }, { "epoch": 0.8711285073845664, "grad_norm": 252.0, "learning_rate": 2.6305034367453096e-06, "loss": 0.7577, "step": 38280 }, { "epoch": 0.8713560749152311, "grad_norm": 0.001922607421875, "learning_rate": 2.625859186327327e-06, "loss": 1.0244, "step": 38290 }, { "epoch": 0.8715836424458958, "grad_norm": 37.25, "learning_rate": 2.621214935909344e-06, "loss": 0.8107, "step": 38300 }, { "epoch": 0.8718112099765606, "grad_norm": 88.0, "learning_rate": 2.616570685491362e-06, "loss": 1.0488, "step": 38310 }, { "epoch": 0.8720387775072252, "grad_norm": 624.0, "learning_rate": 2.611926435073379e-06, "loss": 1.5232, "step": 38320 }, { "epoch": 0.87226634503789, "grad_norm": 198.0, "learning_rate": 2.607282184655397e-06, "loss": 0.827, "step": 38330 }, { "epoch": 0.8724939125685547, "grad_norm": 58.0, "learning_rate": 2.602637934237414e-06, "loss": 1.3417, "step": 38340 }, { "epoch": 0.8727214800992195, "grad_norm": 0.043701171875, "learning_rate": 2.597993683819432e-06, "loss": 0.5065, "step": 38350 }, { "epoch": 0.8729490476298841, "grad_norm": 12.0625, "learning_rate": 2.593349433401449e-06, "loss": 0.4259, "step": 38360 }, { "epoch": 0.8731766151605489, "grad_norm": 152.0, "learning_rate": 2.588705182983467e-06, "loss": 0.7806, "step": 38370 }, { "epoch": 0.8734041826912137, "grad_norm": 38.75, "learning_rate": 2.584060932565484e-06, "loss": 0.8262, "step": 38380 }, { "epoch": 0.8736317502218783, "grad_norm": 197.0, "learning_rate": 2.5794166821475015e-06, "loss": 1.4511, "step": 38390 }, { "epoch": 0.8738593177525431, "grad_norm": 8.5, "learning_rate": 2.5747724317295194e-06, "loss": 0.9592, "step": 38400 }, { "epoch": 0.8740868852832078, "grad_norm": 0.00439453125, "learning_rate": 2.5701281813115364e-06, "loss": 1.0694, "step": 38410 }, { "epoch": 0.8743144528138725, "grad_norm": 0.0045166015625, "learning_rate": 2.5654839308935543e-06, "loss": 0.711, "step": 38420 }, { "epoch": 0.8745420203445372, "grad_norm": 176.0, "learning_rate": 2.5608396804755714e-06, "loss": 1.7328, "step": 38430 }, { "epoch": 0.874769587875202, "grad_norm": 398.0, "learning_rate": 2.5561954300575893e-06, "loss": 1.7606, "step": 38440 }, { "epoch": 0.8749971554058666, "grad_norm": 0.00323486328125, "learning_rate": 2.5515511796396063e-06, "loss": 0.4511, "step": 38450 }, { "epoch": 0.8752247229365314, "grad_norm": 107.5, "learning_rate": 2.546906929221624e-06, "loss": 0.7491, "step": 38460 }, { "epoch": 0.8754522904671962, "grad_norm": 0.006683349609375, "learning_rate": 2.5422626788036413e-06, "loss": 0.1302, "step": 38470 }, { "epoch": 0.8756798579978609, "grad_norm": 0.0019683837890625, "learning_rate": 2.5376184283856588e-06, "loss": 0.123, "step": 38480 }, { "epoch": 0.8759074255285256, "grad_norm": 1.3515625, "learning_rate": 2.5329741779676762e-06, "loss": 0.4095, "step": 38490 }, { "epoch": 0.8761349930591903, "grad_norm": 0.001007080078125, "learning_rate": 2.5283299275496937e-06, "loss": 0.6416, "step": 38500 }, { "epoch": 0.8763625605898551, "grad_norm": 158.0, "learning_rate": 2.5236856771317116e-06, "loss": 0.6458, "step": 38510 }, { "epoch": 0.8765901281205197, "grad_norm": 74.0, "learning_rate": 2.5190414267137287e-06, "loss": 0.5827, "step": 38520 }, { "epoch": 0.8768176956511845, "grad_norm": 0.000705718994140625, "learning_rate": 2.5143971762957457e-06, "loss": 0.3351, "step": 38530 }, { "epoch": 0.8770452631818492, "grad_norm": 692.0, "learning_rate": 2.5097529258777636e-06, "loss": 0.6072, "step": 38540 }, { "epoch": 0.8772728307125139, "grad_norm": 140.0, "learning_rate": 2.5051086754597807e-06, "loss": 1.0702, "step": 38550 }, { "epoch": 0.8775003982431787, "grad_norm": 178.0, "learning_rate": 2.5004644250417986e-06, "loss": 0.5, "step": 38560 }, { "epoch": 0.8777279657738434, "grad_norm": 0.006683349609375, "learning_rate": 2.495820174623816e-06, "loss": 0.4341, "step": 38570 }, { "epoch": 0.8779555333045082, "grad_norm": 0.06982421875, "learning_rate": 2.4911759242058335e-06, "loss": 0.414, "step": 38580 }, { "epoch": 0.8781831008351728, "grad_norm": 0.0625, "learning_rate": 2.486531673787851e-06, "loss": 1.1987, "step": 38590 }, { "epoch": 0.8784106683658376, "grad_norm": 0.1171875, "learning_rate": 2.4818874233698685e-06, "loss": 0.0302, "step": 38600 }, { "epoch": 0.8786382358965023, "grad_norm": 30.5, "learning_rate": 2.477243172951886e-06, "loss": 0.5378, "step": 38610 }, { "epoch": 0.878865803427167, "grad_norm": 9.4375, "learning_rate": 2.472598922533903e-06, "loss": 0.4307, "step": 38620 }, { "epoch": 0.8790933709578317, "grad_norm": 185.0, "learning_rate": 2.4679546721159205e-06, "loss": 0.5764, "step": 38630 }, { "epoch": 0.8793209384884965, "grad_norm": 462.0, "learning_rate": 2.463310421697938e-06, "loss": 0.8343, "step": 38640 }, { "epoch": 0.8795485060191612, "grad_norm": 199.0, "learning_rate": 2.4586661712799555e-06, "loss": 1.9169, "step": 38650 }, { "epoch": 0.8797760735498259, "grad_norm": 0.08740234375, "learning_rate": 2.454021920861973e-06, "loss": 0.2283, "step": 38660 }, { "epoch": 0.8800036410804907, "grad_norm": 0.72265625, "learning_rate": 2.4493776704439904e-06, "loss": 0.57, "step": 38670 }, { "epoch": 0.8802312086111553, "grad_norm": 106.0, "learning_rate": 2.444733420026008e-06, "loss": 1.2208, "step": 38680 }, { "epoch": 0.8804587761418201, "grad_norm": 121.5, "learning_rate": 2.4400891696080254e-06, "loss": 0.7081, "step": 38690 }, { "epoch": 0.8806863436724848, "grad_norm": 106.0, "learning_rate": 2.435444919190043e-06, "loss": 0.6047, "step": 38700 }, { "epoch": 0.8809139112031495, "grad_norm": 60.75, "learning_rate": 2.4308006687720603e-06, "loss": 0.5824, "step": 38710 }, { "epoch": 0.8811414787338142, "grad_norm": 139.0, "learning_rate": 2.426156418354078e-06, "loss": 0.7305, "step": 38720 }, { "epoch": 0.881369046264479, "grad_norm": 108.5, "learning_rate": 2.4215121679360953e-06, "loss": 1.3065, "step": 38730 }, { "epoch": 0.8815966137951438, "grad_norm": 27.5, "learning_rate": 2.4168679175181128e-06, "loss": 0.6733, "step": 38740 }, { "epoch": 0.8818241813258084, "grad_norm": 266.0, "learning_rate": 2.4122236671001303e-06, "loss": 0.7179, "step": 38750 }, { "epoch": 0.8820517488564732, "grad_norm": 122.5, "learning_rate": 2.4075794166821477e-06, "loss": 1.0378, "step": 38760 }, { "epoch": 0.8822793163871379, "grad_norm": 26.75, "learning_rate": 2.402935166264165e-06, "loss": 1.1369, "step": 38770 }, { "epoch": 0.8825068839178026, "grad_norm": 153.0, "learning_rate": 2.3982909158461827e-06, "loss": 1.0131, "step": 38780 }, { "epoch": 0.8827344514484673, "grad_norm": 328.0, "learning_rate": 2.3936466654282e-06, "loss": 0.7188, "step": 38790 }, { "epoch": 0.8829620189791321, "grad_norm": 87.5, "learning_rate": 2.3890024150102176e-06, "loss": 0.9656, "step": 38800 }, { "epoch": 0.8831895865097967, "grad_norm": 0.047607421875, "learning_rate": 2.384358164592235e-06, "loss": 0.2546, "step": 38810 }, { "epoch": 0.8834171540404615, "grad_norm": 232.0, "learning_rate": 2.3797139141742526e-06, "loss": 1.4063, "step": 38820 }, { "epoch": 0.8836447215711263, "grad_norm": 143.0, "learning_rate": 2.3750696637562697e-06, "loss": 1.1688, "step": 38830 }, { "epoch": 0.8838722891017909, "grad_norm": 19.625, "learning_rate": 2.370425413338287e-06, "loss": 1.2766, "step": 38840 }, { "epoch": 0.8840998566324557, "grad_norm": 876.0, "learning_rate": 2.3657811629203046e-06, "loss": 1.361, "step": 38850 }, { "epoch": 0.8843274241631204, "grad_norm": 202.0, "learning_rate": 2.361136912502322e-06, "loss": 1.0456, "step": 38860 }, { "epoch": 0.8845549916937852, "grad_norm": 53.0, "learning_rate": 2.35649266208434e-06, "loss": 0.3813, "step": 38870 }, { "epoch": 0.8847825592244498, "grad_norm": 45.0, "learning_rate": 2.3518484116663575e-06, "loss": 0.7371, "step": 38880 }, { "epoch": 0.8850101267551146, "grad_norm": 100.5, "learning_rate": 2.347204161248375e-06, "loss": 0.2555, "step": 38890 }, { "epoch": 0.8852376942857793, "grad_norm": 98.0, "learning_rate": 2.3425599108303924e-06, "loss": 0.7791, "step": 38900 }, { "epoch": 0.885465261816444, "grad_norm": 308.0, "learning_rate": 2.3379156604124095e-06, "loss": 0.5103, "step": 38910 }, { "epoch": 0.8856928293471088, "grad_norm": 153.0, "learning_rate": 2.333271409994427e-06, "loss": 0.8535, "step": 38920 }, { "epoch": 0.8859203968777735, "grad_norm": 102.5, "learning_rate": 2.3286271595764444e-06, "loss": 0.6356, "step": 38930 }, { "epoch": 0.8861479644084382, "grad_norm": 0.00148773193359375, "learning_rate": 2.323982909158462e-06, "loss": 1.1517, "step": 38940 }, { "epoch": 0.8863755319391029, "grad_norm": 193.0, "learning_rate": 2.3193386587404794e-06, "loss": 0.4882, "step": 38950 }, { "epoch": 0.8866030994697677, "grad_norm": 106.0, "learning_rate": 2.314694408322497e-06, "loss": 0.8464, "step": 38960 }, { "epoch": 0.8868306670004323, "grad_norm": 0.00191497802734375, "learning_rate": 2.3100501579045144e-06, "loss": 1.453, "step": 38970 }, { "epoch": 0.8870582345310971, "grad_norm": 1.515625, "learning_rate": 2.305405907486532e-06, "loss": 0.4114, "step": 38980 }, { "epoch": 0.8872858020617618, "grad_norm": 0.43359375, "learning_rate": 2.3007616570685493e-06, "loss": 0.6132, "step": 38990 }, { "epoch": 0.8875133695924265, "grad_norm": 0.875, "learning_rate": 2.296117406650567e-06, "loss": 0.7628, "step": 39000 }, { "epoch": 0.8877409371230913, "grad_norm": 146.0, "learning_rate": 2.2914731562325843e-06, "loss": 0.8817, "step": 39010 }, { "epoch": 0.887968504653756, "grad_norm": 87.5, "learning_rate": 2.2868289058146017e-06, "loss": 0.4443, "step": 39020 }, { "epoch": 0.8881960721844208, "grad_norm": 0.04736328125, "learning_rate": 2.2821846553966192e-06, "loss": 0.2328, "step": 39030 }, { "epoch": 0.8884236397150854, "grad_norm": 124.5, "learning_rate": 2.2775404049786367e-06, "loss": 0.4294, "step": 39040 }, { "epoch": 0.8886512072457502, "grad_norm": 25.875, "learning_rate": 2.272896154560654e-06, "loss": 0.7789, "step": 39050 }, { "epoch": 0.8888787747764149, "grad_norm": 0.275390625, "learning_rate": 2.2682519041426717e-06, "loss": 1.4648, "step": 39060 }, { "epoch": 0.8891063423070796, "grad_norm": 0.0206298828125, "learning_rate": 2.263607653724689e-06, "loss": 0.3648, "step": 39070 }, { "epoch": 0.8893339098377443, "grad_norm": 0.1220703125, "learning_rate": 2.2589634033067066e-06, "loss": 0.4381, "step": 39080 }, { "epoch": 0.8895614773684091, "grad_norm": 0.026611328125, "learning_rate": 2.254319152888724e-06, "loss": 1.4355, "step": 39090 }, { "epoch": 0.8897890448990738, "grad_norm": 0.04345703125, "learning_rate": 2.2496749024707416e-06, "loss": 1.5589, "step": 39100 }, { "epoch": 0.8900166124297385, "grad_norm": 0.00830078125, "learning_rate": 2.245030652052759e-06, "loss": 0.4328, "step": 39110 }, { "epoch": 0.8902441799604033, "grad_norm": 141.0, "learning_rate": 2.240386401634776e-06, "loss": 1.0442, "step": 39120 }, { "epoch": 0.8904717474910679, "grad_norm": 129.0, "learning_rate": 2.2357421512167936e-06, "loss": 0.6684, "step": 39130 }, { "epoch": 0.8906993150217327, "grad_norm": 0.482421875, "learning_rate": 2.231097900798811e-06, "loss": 0.9156, "step": 39140 }, { "epoch": 0.8909268825523974, "grad_norm": 50.5, "learning_rate": 2.2264536503808285e-06, "loss": 0.6132, "step": 39150 }, { "epoch": 0.8911544500830622, "grad_norm": 1.59375, "learning_rate": 2.221809399962846e-06, "loss": 0.6362, "step": 39160 }, { "epoch": 0.8913820176137268, "grad_norm": 175.0, "learning_rate": 2.217165149544864e-06, "loss": 0.834, "step": 39170 }, { "epoch": 0.8916095851443916, "grad_norm": 115.5, "learning_rate": 2.2125208991268814e-06, "loss": 0.411, "step": 39180 }, { "epoch": 0.8918371526750564, "grad_norm": 197.0, "learning_rate": 2.2078766487088985e-06, "loss": 0.7732, "step": 39190 }, { "epoch": 0.892064720205721, "grad_norm": 243.0, "learning_rate": 2.203232398290916e-06, "loss": 0.994, "step": 39200 }, { "epoch": 0.8922922877363858, "grad_norm": 27.875, "learning_rate": 2.1985881478729334e-06, "loss": 0.3322, "step": 39210 }, { "epoch": 0.8925198552670505, "grad_norm": 26.0, "learning_rate": 2.193943897454951e-06, "loss": 1.1581, "step": 39220 }, { "epoch": 0.8927474227977152, "grad_norm": 172.0, "learning_rate": 2.1892996470369684e-06, "loss": 0.8136, "step": 39230 }, { "epoch": 0.8929749903283799, "grad_norm": 1.1484375, "learning_rate": 2.184655396618986e-06, "loss": 0.8088, "step": 39240 }, { "epoch": 0.8932025578590447, "grad_norm": 94.0, "learning_rate": 2.1800111462010033e-06, "loss": 0.5515, "step": 39250 }, { "epoch": 0.8934301253897095, "grad_norm": 260.0, "learning_rate": 2.175366895783021e-06, "loss": 1.0181, "step": 39260 }, { "epoch": 0.8936576929203741, "grad_norm": 45.25, "learning_rate": 2.1707226453650383e-06, "loss": 1.3966, "step": 39270 }, { "epoch": 0.8938852604510389, "grad_norm": 146.0, "learning_rate": 2.1660783949470558e-06, "loss": 1.2331, "step": 39280 }, { "epoch": 0.8941128279817036, "grad_norm": 215.0, "learning_rate": 2.1614341445290732e-06, "loss": 0.3853, "step": 39290 }, { "epoch": 0.8943403955123683, "grad_norm": 0.0498046875, "learning_rate": 2.1567898941110907e-06, "loss": 0.5673, "step": 39300 }, { "epoch": 0.894567963043033, "grad_norm": 0.00118255615234375, "learning_rate": 2.152145643693108e-06, "loss": 0.9072, "step": 39310 }, { "epoch": 0.8947955305736978, "grad_norm": 93.5, "learning_rate": 2.1475013932751257e-06, "loss": 0.3777, "step": 39320 }, { "epoch": 0.8950230981043624, "grad_norm": 100.0, "learning_rate": 2.1428571428571427e-06, "loss": 0.5091, "step": 39330 }, { "epoch": 0.8952506656350272, "grad_norm": 6.818771362304688e-05, "learning_rate": 2.13821289243916e-06, "loss": 0.3332, "step": 39340 }, { "epoch": 0.895478233165692, "grad_norm": 0.70703125, "learning_rate": 2.133568642021178e-06, "loss": 0.2085, "step": 39350 }, { "epoch": 0.8957058006963566, "grad_norm": 0.00506591796875, "learning_rate": 2.1289243916031956e-06, "loss": 0.3323, "step": 39360 }, { "epoch": 0.8959333682270214, "grad_norm": 81.0, "learning_rate": 2.124280141185213e-06, "loss": 0.3852, "step": 39370 }, { "epoch": 0.8961609357576861, "grad_norm": 0.0311279296875, "learning_rate": 2.1196358907672305e-06, "loss": 0.9096, "step": 39380 }, { "epoch": 0.8963885032883508, "grad_norm": 4.9375, "learning_rate": 2.114991640349248e-06, "loss": 1.2164, "step": 39390 }, { "epoch": 0.8966160708190155, "grad_norm": 0.1884765625, "learning_rate": 2.110347389931265e-06, "loss": 0.7443, "step": 39400 }, { "epoch": 0.8968436383496803, "grad_norm": 106.5, "learning_rate": 2.1057031395132826e-06, "loss": 0.422, "step": 39410 }, { "epoch": 0.897071205880345, "grad_norm": 0.0115966796875, "learning_rate": 2.1010588890953e-06, "loss": 0.9382, "step": 39420 }, { "epoch": 0.8972987734110097, "grad_norm": 64.0, "learning_rate": 2.0964146386773175e-06, "loss": 0.5984, "step": 39430 }, { "epoch": 0.8975263409416745, "grad_norm": 151.0, "learning_rate": 2.091770388259335e-06, "loss": 0.8815, "step": 39440 }, { "epoch": 0.8977539084723392, "grad_norm": 348.0, "learning_rate": 2.0871261378413525e-06, "loss": 1.0328, "step": 39450 }, { "epoch": 0.8979814760030039, "grad_norm": 0.0027923583984375, "learning_rate": 2.08248188742337e-06, "loss": 0.6886, "step": 39460 }, { "epoch": 0.8982090435336686, "grad_norm": 116.0, "learning_rate": 2.0778376370053874e-06, "loss": 0.7691, "step": 39470 }, { "epoch": 0.8984366110643334, "grad_norm": 48.0, "learning_rate": 2.073193386587405e-06, "loss": 0.3808, "step": 39480 }, { "epoch": 0.898664178594998, "grad_norm": 0.01025390625, "learning_rate": 2.0685491361694224e-06, "loss": 0.3899, "step": 39490 }, { "epoch": 0.8988917461256628, "grad_norm": 0.015869140625, "learning_rate": 2.06390488575144e-06, "loss": 0.2678, "step": 39500 }, { "epoch": 0.8991193136563275, "grad_norm": 31.125, "learning_rate": 2.0592606353334573e-06, "loss": 0.2722, "step": 39510 }, { "epoch": 0.8993468811869922, "grad_norm": 5.25, "learning_rate": 2.054616384915475e-06, "loss": 0.6616, "step": 39520 }, { "epoch": 0.899574448717657, "grad_norm": 179.0, "learning_rate": 2.0499721344974923e-06, "loss": 1.2084, "step": 39530 }, { "epoch": 0.8998020162483217, "grad_norm": 102.5, "learning_rate": 2.0453278840795098e-06, "loss": 0.8453, "step": 39540 }, { "epoch": 0.9000295837789865, "grad_norm": 52.0, "learning_rate": 2.0406836336615273e-06, "loss": 0.5723, "step": 39550 }, { "epoch": 0.9002571513096511, "grad_norm": 251.0, "learning_rate": 2.0360393832435447e-06, "loss": 1.3839, "step": 39560 }, { "epoch": 0.9004847188403159, "grad_norm": 31.875, "learning_rate": 2.0313951328255622e-06, "loss": 0.6992, "step": 39570 }, { "epoch": 0.9007122863709806, "grad_norm": 112.5, "learning_rate": 2.0267508824075797e-06, "loss": 1.8413, "step": 39580 }, { "epoch": 0.9009398539016453, "grad_norm": 105.0, "learning_rate": 2.022106631989597e-06, "loss": 0.617, "step": 39590 }, { "epoch": 0.90116742143231, "grad_norm": 38.75, "learning_rate": 2.0174623815716146e-06, "loss": 1.247, "step": 39600 }, { "epoch": 0.9013949889629748, "grad_norm": 86.0, "learning_rate": 2.0128181311536317e-06, "loss": 0.4337, "step": 39610 }, { "epoch": 0.9016225564936395, "grad_norm": 148.0, "learning_rate": 2.008173880735649e-06, "loss": 0.685, "step": 39620 }, { "epoch": 0.9018501240243042, "grad_norm": 2.171875, "learning_rate": 2.0035296303176667e-06, "loss": 0.4842, "step": 39630 }, { "epoch": 0.902077691554969, "grad_norm": 260.0, "learning_rate": 1.998885379899684e-06, "loss": 1.1652, "step": 39640 }, { "epoch": 0.9023052590856336, "grad_norm": 37.5, "learning_rate": 1.994241129481702e-06, "loss": 0.6106, "step": 39650 }, { "epoch": 0.9025328266162984, "grad_norm": 102.0, "learning_rate": 1.9895968790637195e-06, "loss": 0.7818, "step": 39660 }, { "epoch": 0.9027603941469631, "grad_norm": 85.0, "learning_rate": 1.984952628645737e-06, "loss": 1.0692, "step": 39670 }, { "epoch": 0.9029879616776278, "grad_norm": 42.0, "learning_rate": 1.980308378227754e-06, "loss": 0.8222, "step": 39680 }, { "epoch": 0.9032155292082925, "grad_norm": 74.0, "learning_rate": 1.9756641278097715e-06, "loss": 0.3199, "step": 39690 }, { "epoch": 0.9034430967389573, "grad_norm": 0.000537872314453125, "learning_rate": 1.971019877391789e-06, "loss": 0.184, "step": 39700 }, { "epoch": 0.9036706642696221, "grad_norm": 165.0, "learning_rate": 1.9663756269738065e-06, "loss": 1.0572, "step": 39710 }, { "epoch": 0.9038982318002867, "grad_norm": 59.5, "learning_rate": 1.961731376555824e-06, "loss": 0.4825, "step": 39720 }, { "epoch": 0.9041257993309515, "grad_norm": 6.6875, "learning_rate": 1.9570871261378414e-06, "loss": 1.1153, "step": 39730 }, { "epoch": 0.9043533668616162, "grad_norm": 45.75, "learning_rate": 1.952442875719859e-06, "loss": 1.3418, "step": 39740 }, { "epoch": 0.9045809343922809, "grad_norm": 0.0064697265625, "learning_rate": 1.9477986253018764e-06, "loss": 0.6987, "step": 39750 }, { "epoch": 0.9048085019229456, "grad_norm": 370.0, "learning_rate": 1.943154374883894e-06, "loss": 0.4526, "step": 39760 }, { "epoch": 0.9050360694536104, "grad_norm": 0.0201416015625, "learning_rate": 1.9385101244659114e-06, "loss": 0.286, "step": 39770 }, { "epoch": 0.905263636984275, "grad_norm": 123.0, "learning_rate": 1.933865874047929e-06, "loss": 0.5775, "step": 39780 }, { "epoch": 0.9054912045149398, "grad_norm": 0.62109375, "learning_rate": 1.9292216236299463e-06, "loss": 1.9849, "step": 39790 }, { "epoch": 0.9057187720456046, "grad_norm": 41.75, "learning_rate": 1.924577373211964e-06, "loss": 0.4552, "step": 39800 }, { "epoch": 0.9059463395762692, "grad_norm": 101.0, "learning_rate": 1.9199331227939813e-06, "loss": 1.2271, "step": 39810 }, { "epoch": 0.906173907106934, "grad_norm": 5.1875, "learning_rate": 1.9152888723759988e-06, "loss": 0.6729, "step": 39820 }, { "epoch": 0.9064014746375987, "grad_norm": 1.0390625, "learning_rate": 1.9106446219580162e-06, "loss": 1.6097, "step": 39830 }, { "epoch": 0.9066290421682635, "grad_norm": 0.0220947265625, "learning_rate": 1.9060003715400335e-06, "loss": 0.8023, "step": 39840 }, { "epoch": 0.9068566096989281, "grad_norm": 2496.0, "learning_rate": 1.901356121122051e-06, "loss": 0.7743, "step": 39850 }, { "epoch": 0.9070841772295929, "grad_norm": 8.5, "learning_rate": 1.8967118707040685e-06, "loss": 0.9532, "step": 39860 }, { "epoch": 0.9073117447602576, "grad_norm": 0.2890625, "learning_rate": 1.8920676202860861e-06, "loss": 1.4144, "step": 39870 }, { "epoch": 0.9075393122909223, "grad_norm": 87.0, "learning_rate": 1.8874233698681036e-06, "loss": 1.1352, "step": 39880 }, { "epoch": 0.9077668798215871, "grad_norm": 0.00994873046875, "learning_rate": 1.882779119450121e-06, "loss": 0.3861, "step": 39890 }, { "epoch": 0.9079944473522518, "grad_norm": 0.005340576171875, "learning_rate": 1.8781348690321384e-06, "loss": 0.3776, "step": 39900 }, { "epoch": 0.9082220148829165, "grad_norm": 354.0, "learning_rate": 1.8734906186141558e-06, "loss": 1.0079, "step": 39910 }, { "epoch": 0.9084495824135812, "grad_norm": 249.0, "learning_rate": 1.8688463681961733e-06, "loss": 0.2589, "step": 39920 }, { "epoch": 0.908677149944246, "grad_norm": 203.0, "learning_rate": 1.8642021177781908e-06, "loss": 0.7516, "step": 39930 }, { "epoch": 0.9089047174749106, "grad_norm": 138.0, "learning_rate": 1.8595578673602083e-06, "loss": 0.5554, "step": 39940 }, { "epoch": 0.9091322850055754, "grad_norm": 82.0, "learning_rate": 1.8549136169422258e-06, "loss": 1.1278, "step": 39950 }, { "epoch": 0.9093598525362401, "grad_norm": 1.0390625, "learning_rate": 1.8502693665242432e-06, "loss": 0.6187, "step": 39960 }, { "epoch": 0.9095874200669049, "grad_norm": 0.01068115234375, "learning_rate": 1.8456251161062605e-06, "loss": 0.7118, "step": 39970 }, { "epoch": 0.9098149875975696, "grad_norm": 79.0, "learning_rate": 1.840980865688278e-06, "loss": 1.0158, "step": 39980 }, { "epoch": 0.9100425551282343, "grad_norm": 0.001312255859375, "learning_rate": 1.8363366152702955e-06, "loss": 0.4229, "step": 39990 }, { "epoch": 0.9102701226588991, "grad_norm": 0.0027618408203125, "learning_rate": 1.831692364852313e-06, "loss": 0.2937, "step": 40000 }, { "epoch": 0.9104976901895637, "grad_norm": 25.875, "learning_rate": 1.8270481144343304e-06, "loss": 0.1502, "step": 40010 }, { "epoch": 0.9107252577202285, "grad_norm": 220.0, "learning_rate": 1.822403864016348e-06, "loss": 0.9015, "step": 40020 }, { "epoch": 0.9109528252508932, "grad_norm": 4.6875, "learning_rate": 1.8177596135983656e-06, "loss": 0.3266, "step": 40030 }, { "epoch": 0.9111803927815579, "grad_norm": 192.0, "learning_rate": 1.8131153631803826e-06, "loss": 1.4997, "step": 40040 }, { "epoch": 0.9114079603122226, "grad_norm": 51.5, "learning_rate": 1.8084711127624003e-06, "loss": 0.5175, "step": 40050 }, { "epoch": 0.9116355278428874, "grad_norm": 0.04052734375, "learning_rate": 1.8038268623444178e-06, "loss": 0.2849, "step": 40060 }, { "epoch": 0.9118630953735521, "grad_norm": 6.4375, "learning_rate": 1.7991826119264353e-06, "loss": 2.3122, "step": 40070 }, { "epoch": 0.9120906629042168, "grad_norm": 178.0, "learning_rate": 1.7945383615084528e-06, "loss": 0.773, "step": 40080 }, { "epoch": 0.9123182304348816, "grad_norm": 0.443359375, "learning_rate": 1.7898941110904702e-06, "loss": 0.5007, "step": 40090 }, { "epoch": 0.9125457979655462, "grad_norm": 0.92578125, "learning_rate": 1.7852498606724877e-06, "loss": 0.5697, "step": 40100 }, { "epoch": 0.912773365496211, "grad_norm": 54.0, "learning_rate": 1.780605610254505e-06, "loss": 0.6472, "step": 40110 }, { "epoch": 0.9130009330268757, "grad_norm": 139.0, "learning_rate": 1.7759613598365225e-06, "loss": 1.1982, "step": 40120 }, { "epoch": 0.9132285005575405, "grad_norm": 0.004730224609375, "learning_rate": 1.77131710941854e-06, "loss": 0.2094, "step": 40130 }, { "epoch": 0.9134560680882051, "grad_norm": 75.5, "learning_rate": 1.7666728590005574e-06, "loss": 0.3811, "step": 40140 }, { "epoch": 0.9136836356188699, "grad_norm": 186.0, "learning_rate": 1.762028608582575e-06, "loss": 1.2799, "step": 40150 }, { "epoch": 0.9139112031495347, "grad_norm": 0.6484375, "learning_rate": 1.7573843581645924e-06, "loss": 0.7681, "step": 40160 }, { "epoch": 0.9141387706801993, "grad_norm": 0.0038604736328125, "learning_rate": 1.7527401077466099e-06, "loss": 2.2484, "step": 40170 }, { "epoch": 0.9143663382108641, "grad_norm": 14.3125, "learning_rate": 1.7480958573286271e-06, "loss": 0.6297, "step": 40180 }, { "epoch": 0.9145939057415288, "grad_norm": 3.9375, "learning_rate": 1.7434516069106446e-06, "loss": 0.3909, "step": 40190 }, { "epoch": 0.9148214732721935, "grad_norm": 0.5859375, "learning_rate": 1.7388073564926623e-06, "loss": 1.346, "step": 40200 }, { "epoch": 0.9150490408028582, "grad_norm": 186.0, "learning_rate": 1.7341631060746798e-06, "loss": 0.6037, "step": 40210 }, { "epoch": 0.915276608333523, "grad_norm": 1.4375, "learning_rate": 1.7295188556566973e-06, "loss": 0.4518, "step": 40220 }, { "epoch": 0.9155041758641878, "grad_norm": 0.0703125, "learning_rate": 1.7248746052387147e-06, "loss": 0.7153, "step": 40230 }, { "epoch": 0.9157317433948524, "grad_norm": 0.026611328125, "learning_rate": 1.7202303548207322e-06, "loss": 0.6838, "step": 40240 }, { "epoch": 0.9159593109255172, "grad_norm": 195.0, "learning_rate": 1.7155861044027495e-06, "loss": 0.5748, "step": 40250 }, { "epoch": 0.9161868784561819, "grad_norm": 125.5, "learning_rate": 1.710941853984767e-06, "loss": 1.8679, "step": 40260 }, { "epoch": 0.9164144459868466, "grad_norm": 126.5, "learning_rate": 1.7062976035667844e-06, "loss": 1.228, "step": 40270 }, { "epoch": 0.9166420135175113, "grad_norm": 0.0145263671875, "learning_rate": 1.701653353148802e-06, "loss": 0.1398, "step": 40280 }, { "epoch": 0.9168695810481761, "grad_norm": 154.0, "learning_rate": 1.6970091027308194e-06, "loss": 1.3622, "step": 40290 }, { "epoch": 0.9170971485788407, "grad_norm": 0.01031494140625, "learning_rate": 1.6923648523128369e-06, "loss": 0.6799, "step": 40300 }, { "epoch": 0.9173247161095055, "grad_norm": 202.0, "learning_rate": 1.6877206018948543e-06, "loss": 1.6579, "step": 40310 }, { "epoch": 0.9175522836401703, "grad_norm": 528.0, "learning_rate": 1.6830763514768716e-06, "loss": 0.9635, "step": 40320 }, { "epoch": 0.9177798511708349, "grad_norm": 77.0, "learning_rate": 1.678432101058889e-06, "loss": 0.7486, "step": 40330 }, { "epoch": 0.9180074187014997, "grad_norm": 490.0, "learning_rate": 1.6737878506409066e-06, "loss": 0.9566, "step": 40340 }, { "epoch": 0.9182349862321644, "grad_norm": 83.5, "learning_rate": 1.669143600222924e-06, "loss": 0.8304, "step": 40350 }, { "epoch": 0.9184625537628291, "grad_norm": 360.0, "learning_rate": 1.6644993498049417e-06, "loss": 0.6268, "step": 40360 }, { "epoch": 0.9186901212934938, "grad_norm": 0.02587890625, "learning_rate": 1.6598550993869592e-06, "loss": 0.6046, "step": 40370 }, { "epoch": 0.9189176888241586, "grad_norm": 239.0, "learning_rate": 1.6552108489689767e-06, "loss": 0.5684, "step": 40380 }, { "epoch": 0.9191452563548232, "grad_norm": 97.5, "learning_rate": 1.650566598550994e-06, "loss": 0.9473, "step": 40390 }, { "epoch": 0.919372823885488, "grad_norm": 9.0, "learning_rate": 1.6459223481330114e-06, "loss": 0.4115, "step": 40400 }, { "epoch": 0.9196003914161528, "grad_norm": 52.25, "learning_rate": 1.641278097715029e-06, "loss": 0.6657, "step": 40410 }, { "epoch": 0.9198279589468175, "grad_norm": 46.0, "learning_rate": 1.6366338472970464e-06, "loss": 1.3878, "step": 40420 }, { "epoch": 0.9200555264774822, "grad_norm": 0.04248046875, "learning_rate": 1.6319895968790639e-06, "loss": 1.0309, "step": 40430 }, { "epoch": 0.9202830940081469, "grad_norm": 0.06201171875, "learning_rate": 1.6273453464610814e-06, "loss": 1.096, "step": 40440 }, { "epoch": 0.9205106615388117, "grad_norm": 1.0859375, "learning_rate": 1.6227010960430988e-06, "loss": 0.6528, "step": 40450 }, { "epoch": 0.9207382290694763, "grad_norm": 0.0023345947265625, "learning_rate": 1.618056845625116e-06, "loss": 1.2704, "step": 40460 }, { "epoch": 0.9209657966001411, "grad_norm": 0.46875, "learning_rate": 1.6134125952071336e-06, "loss": 0.6184, "step": 40470 }, { "epoch": 0.9211933641308058, "grad_norm": 36.25, "learning_rate": 1.608768344789151e-06, "loss": 0.9206, "step": 40480 }, { "epoch": 0.9214209316614705, "grad_norm": 0.056640625, "learning_rate": 1.6041240943711685e-06, "loss": 0.8128, "step": 40490 }, { "epoch": 0.9216484991921353, "grad_norm": 239.0, "learning_rate": 1.599479843953186e-06, "loss": 0.9394, "step": 40500 }, { "epoch": 0.9218760667228, "grad_norm": 193.0, "learning_rate": 1.5948355935352037e-06, "loss": 0.5975, "step": 40510 }, { "epoch": 0.9221036342534648, "grad_norm": 125.5, "learning_rate": 1.5901913431172212e-06, "loss": 0.9721, "step": 40520 }, { "epoch": 0.9223312017841294, "grad_norm": 0.037353515625, "learning_rate": 1.5855470926992384e-06, "loss": 0.3291, "step": 40530 }, { "epoch": 0.9225587693147942, "grad_norm": 2.0625, "learning_rate": 1.580902842281256e-06, "loss": 0.4145, "step": 40540 }, { "epoch": 0.9227863368454589, "grad_norm": 0.0419921875, "learning_rate": 1.5762585918632734e-06, "loss": 0.5751, "step": 40550 }, { "epoch": 0.9230139043761236, "grad_norm": 140.0, "learning_rate": 1.5716143414452909e-06, "loss": 0.9489, "step": 40560 }, { "epoch": 0.9232414719067883, "grad_norm": 496.0, "learning_rate": 1.5669700910273084e-06, "loss": 1.4961, "step": 40570 }, { "epoch": 0.9234690394374531, "grad_norm": 113.0, "learning_rate": 1.5623258406093258e-06, "loss": 1.2155, "step": 40580 }, { "epoch": 0.9236966069681178, "grad_norm": 0.00360107421875, "learning_rate": 1.5576815901913433e-06, "loss": 0.8227, "step": 40590 }, { "epoch": 0.9239241744987825, "grad_norm": 2.15625, "learning_rate": 1.5530373397733606e-06, "loss": 1.0849, "step": 40600 }, { "epoch": 0.9241517420294473, "grad_norm": 0.06298828125, "learning_rate": 1.548393089355378e-06, "loss": 1.0473, "step": 40610 }, { "epoch": 0.9243793095601119, "grad_norm": 62.5, "learning_rate": 1.5437488389373955e-06, "loss": 1.0451, "step": 40620 }, { "epoch": 0.9246068770907767, "grad_norm": 146.0, "learning_rate": 1.539104588519413e-06, "loss": 0.4225, "step": 40630 }, { "epoch": 0.9248344446214414, "grad_norm": 0.03857421875, "learning_rate": 1.5344603381014305e-06, "loss": 0.3821, "step": 40640 }, { "epoch": 0.9250620121521062, "grad_norm": 0.51171875, "learning_rate": 1.529816087683448e-06, "loss": 1.0289, "step": 40650 }, { "epoch": 0.9252895796827708, "grad_norm": 35.75, "learning_rate": 1.5251718372654657e-06, "loss": 0.4319, "step": 40660 }, { "epoch": 0.9255171472134356, "grad_norm": 1.65625, "learning_rate": 1.5205275868474827e-06, "loss": 1.8188, "step": 40670 }, { "epoch": 0.9257447147441004, "grad_norm": 246.0, "learning_rate": 1.5158833364295002e-06, "loss": 0.4857, "step": 40680 }, { "epoch": 0.925972282274765, "grad_norm": 111.5, "learning_rate": 1.5112390860115179e-06, "loss": 0.4388, "step": 40690 }, { "epoch": 0.9261998498054298, "grad_norm": 149.0, "learning_rate": 1.5065948355935354e-06, "loss": 1.21, "step": 40700 }, { "epoch": 0.9264274173360945, "grad_norm": 25.875, "learning_rate": 1.5019505851755528e-06, "loss": 1.0039, "step": 40710 }, { "epoch": 0.9266549848667592, "grad_norm": 0.0181884765625, "learning_rate": 1.4973063347575703e-06, "loss": 0.6732, "step": 40720 }, { "epoch": 0.9268825523974239, "grad_norm": 0.068359375, "learning_rate": 1.4926620843395878e-06, "loss": 0.6945, "step": 40730 }, { "epoch": 0.9271101199280887, "grad_norm": 38.5, "learning_rate": 1.488017833921605e-06, "loss": 0.6279, "step": 40740 }, { "epoch": 0.9273376874587533, "grad_norm": 57.5, "learning_rate": 1.4833735835036226e-06, "loss": 1.4734, "step": 40750 }, { "epoch": 0.9275652549894181, "grad_norm": 2.421875, "learning_rate": 1.47872933308564e-06, "loss": 0.4427, "step": 40760 }, { "epoch": 0.9277928225200829, "grad_norm": 215.0, "learning_rate": 1.4740850826676575e-06, "loss": 0.6717, "step": 40770 }, { "epoch": 0.9280203900507475, "grad_norm": 246.0, "learning_rate": 1.469440832249675e-06, "loss": 0.6142, "step": 40780 }, { "epoch": 0.9282479575814123, "grad_norm": 440.0, "learning_rate": 1.4647965818316925e-06, "loss": 1.0173, "step": 40790 }, { "epoch": 0.928475525112077, "grad_norm": 58.75, "learning_rate": 1.46015233141371e-06, "loss": 0.802, "step": 40800 }, { "epoch": 0.9287030926427418, "grad_norm": 154.0, "learning_rate": 1.4555080809957276e-06, "loss": 0.5632, "step": 40810 }, { "epoch": 0.9289306601734064, "grad_norm": 22.25, "learning_rate": 1.4508638305777447e-06, "loss": 0.6134, "step": 40820 }, { "epoch": 0.9291582277040712, "grad_norm": 0.005218505859375, "learning_rate": 1.4462195801597622e-06, "loss": 0.8432, "step": 40830 }, { "epoch": 0.9293857952347359, "grad_norm": 416.0, "learning_rate": 1.4415753297417799e-06, "loss": 0.9989, "step": 40840 }, { "epoch": 0.9296133627654006, "grad_norm": 0.002471923828125, "learning_rate": 1.4369310793237973e-06, "loss": 0.9845, "step": 40850 }, { "epoch": 0.9298409302960654, "grad_norm": 0.00154876708984375, "learning_rate": 1.4322868289058148e-06, "loss": 0.8265, "step": 40860 }, { "epoch": 0.9300684978267301, "grad_norm": 85.0, "learning_rate": 1.4276425784878323e-06, "loss": 0.7665, "step": 40870 }, { "epoch": 0.9302960653573948, "grad_norm": 151.0, "learning_rate": 1.4229983280698498e-06, "loss": 1.0652, "step": 40880 }, { "epoch": 0.9305236328880595, "grad_norm": 0.0306396484375, "learning_rate": 1.418354077651867e-06, "loss": 1.1202, "step": 40890 }, { "epoch": 0.9307512004187243, "grad_norm": 0.05712890625, "learning_rate": 1.4137098272338845e-06, "loss": 0.5472, "step": 40900 }, { "epoch": 0.9309787679493889, "grad_norm": 96.5, "learning_rate": 1.409065576815902e-06, "loss": 0.32, "step": 40910 }, { "epoch": 0.9312063354800537, "grad_norm": 0.10400390625, "learning_rate": 1.4044213263979195e-06, "loss": 0.7863, "step": 40920 }, { "epoch": 0.9314339030107184, "grad_norm": 167.0, "learning_rate": 1.399777075979937e-06, "loss": 0.8551, "step": 40930 }, { "epoch": 0.9316614705413832, "grad_norm": 3.515625, "learning_rate": 1.3951328255619544e-06, "loss": 0.2319, "step": 40940 }, { "epoch": 0.9318890380720479, "grad_norm": 99.5, "learning_rate": 1.390488575143972e-06, "loss": 0.7769, "step": 40950 }, { "epoch": 0.9321166056027126, "grad_norm": 0.06103515625, "learning_rate": 1.3858443247259892e-06, "loss": 0.4053, "step": 40960 }, { "epoch": 0.9323441731333774, "grad_norm": 392.0, "learning_rate": 1.3812000743080067e-06, "loss": 0.7167, "step": 40970 }, { "epoch": 0.932571740664042, "grad_norm": 624.0, "learning_rate": 1.3765558238900241e-06, "loss": 0.6587, "step": 40980 }, { "epoch": 0.9327993081947068, "grad_norm": 352.0, "learning_rate": 1.3719115734720418e-06, "loss": 1.5605, "step": 40990 }, { "epoch": 0.9330268757253715, "grad_norm": 0.0002651214599609375, "learning_rate": 1.3672673230540593e-06, "loss": 0.0836, "step": 41000 }, { "epoch": 0.9332544432560362, "grad_norm": 156.0, "learning_rate": 1.3626230726360768e-06, "loss": 1.4145, "step": 41010 }, { "epoch": 0.9334820107867009, "grad_norm": 58.0, "learning_rate": 1.3579788222180943e-06, "loss": 0.9058, "step": 41020 }, { "epoch": 0.9337095783173657, "grad_norm": 39.25, "learning_rate": 1.3533345718001115e-06, "loss": 0.9311, "step": 41030 }, { "epoch": 0.9339371458480304, "grad_norm": 215.0, "learning_rate": 1.348690321382129e-06, "loss": 0.4867, "step": 41040 }, { "epoch": 0.9341647133786951, "grad_norm": 89.5, "learning_rate": 1.3440460709641465e-06, "loss": 0.465, "step": 41050 }, { "epoch": 0.9343922809093599, "grad_norm": 109.5, "learning_rate": 1.339401820546164e-06, "loss": 0.4145, "step": 41060 }, { "epoch": 0.9346198484400245, "grad_norm": 137.0, "learning_rate": 1.3347575701281814e-06, "loss": 0.9795, "step": 41070 }, { "epoch": 0.9348474159706893, "grad_norm": 0.00244140625, "learning_rate": 1.330113319710199e-06, "loss": 0.6129, "step": 41080 }, { "epoch": 0.935074983501354, "grad_norm": 0.01092529296875, "learning_rate": 1.3254690692922164e-06, "loss": 0.6366, "step": 41090 }, { "epoch": 0.9353025510320188, "grad_norm": 0.01300048828125, "learning_rate": 1.3208248188742337e-06, "loss": 0.9284, "step": 41100 }, { "epoch": 0.9355301185626834, "grad_norm": 0.77734375, "learning_rate": 1.3161805684562511e-06, "loss": 0.4144, "step": 41110 }, { "epoch": 0.9357576860933482, "grad_norm": 0.005462646484375, "learning_rate": 1.3115363180382686e-06, "loss": 0.3014, "step": 41120 }, { "epoch": 0.935985253624013, "grad_norm": 0.0057373046875, "learning_rate": 1.306892067620286e-06, "loss": 1.7698, "step": 41130 }, { "epoch": 0.9362128211546776, "grad_norm": 205.0, "learning_rate": 1.3022478172023038e-06, "loss": 0.4364, "step": 41140 }, { "epoch": 0.9364403886853424, "grad_norm": 0.003692626953125, "learning_rate": 1.2976035667843213e-06, "loss": 0.7499, "step": 41150 }, { "epoch": 0.9366679562160071, "grad_norm": 0.000385284423828125, "learning_rate": 1.2929593163663387e-06, "loss": 0.3143, "step": 41160 }, { "epoch": 0.9368955237466718, "grad_norm": 162.0, "learning_rate": 1.288315065948356e-06, "loss": 0.6341, "step": 41170 }, { "epoch": 0.9371230912773365, "grad_norm": 16.5, "learning_rate": 1.2836708155303735e-06, "loss": 0.9434, "step": 41180 }, { "epoch": 0.9373506588080013, "grad_norm": 58.5, "learning_rate": 1.279026565112391e-06, "loss": 1.0873, "step": 41190 }, { "epoch": 0.937578226338666, "grad_norm": 163.0, "learning_rate": 1.2743823146944084e-06, "loss": 1.5616, "step": 41200 }, { "epoch": 0.9378057938693307, "grad_norm": 104.5, "learning_rate": 1.269738064276426e-06, "loss": 0.3836, "step": 41210 }, { "epoch": 0.9380333613999955, "grad_norm": 2.53125, "learning_rate": 1.2650938138584434e-06, "loss": 0.9532, "step": 41220 }, { "epoch": 0.9382609289306602, "grad_norm": 167.0, "learning_rate": 1.2604495634404609e-06, "loss": 0.8201, "step": 41230 }, { "epoch": 0.9384884964613249, "grad_norm": 85.5, "learning_rate": 1.2558053130224781e-06, "loss": 1.0521, "step": 41240 }, { "epoch": 0.9387160639919896, "grad_norm": 38.75, "learning_rate": 1.2511610626044956e-06, "loss": 0.8827, "step": 41250 }, { "epoch": 0.9389436315226544, "grad_norm": 4.59375, "learning_rate": 1.246516812186513e-06, "loss": 0.6915, "step": 41260 }, { "epoch": 0.939171199053319, "grad_norm": 101.5, "learning_rate": 1.2418725617685306e-06, "loss": 0.6162, "step": 41270 }, { "epoch": 0.9393987665839838, "grad_norm": 174.0, "learning_rate": 1.237228311350548e-06, "loss": 0.4506, "step": 41280 }, { "epoch": 0.9396263341146486, "grad_norm": 170.0, "learning_rate": 1.2325840609325655e-06, "loss": 0.4677, "step": 41290 }, { "epoch": 0.9398539016453132, "grad_norm": 91.0, "learning_rate": 1.227939810514583e-06, "loss": 0.8939, "step": 41300 }, { "epoch": 0.940081469175978, "grad_norm": 206.0, "learning_rate": 1.2232955600966005e-06, "loss": 0.9096, "step": 41310 }, { "epoch": 0.9403090367066427, "grad_norm": 1.28125, "learning_rate": 1.218651309678618e-06, "loss": 1.1665, "step": 41320 }, { "epoch": 0.9405366042373074, "grad_norm": 584.0, "learning_rate": 1.2140070592606355e-06, "loss": 0.6406, "step": 41330 }, { "epoch": 0.9407641717679721, "grad_norm": 0.1943359375, "learning_rate": 1.209362808842653e-06, "loss": 0.4356, "step": 41340 }, { "epoch": 0.9409917392986369, "grad_norm": 310.0, "learning_rate": 1.2047185584246704e-06, "loss": 0.7865, "step": 41350 }, { "epoch": 0.9412193068293015, "grad_norm": 70.0, "learning_rate": 1.2000743080066877e-06, "loss": 1.5738, "step": 41360 }, { "epoch": 0.9414468743599663, "grad_norm": 144.0, "learning_rate": 1.1954300575887052e-06, "loss": 1.0219, "step": 41370 }, { "epoch": 0.9416744418906311, "grad_norm": 14.1875, "learning_rate": 1.1907858071707228e-06, "loss": 0.6794, "step": 41380 }, { "epoch": 0.9419020094212958, "grad_norm": 92.0, "learning_rate": 1.1861415567527403e-06, "loss": 0.7143, "step": 41390 }, { "epoch": 0.9421295769519605, "grad_norm": 182.0, "learning_rate": 1.1814973063347576e-06, "loss": 0.7581, "step": 41400 }, { "epoch": 0.9423571444826252, "grad_norm": 0.00323486328125, "learning_rate": 1.176853055916775e-06, "loss": 0.4724, "step": 41410 }, { "epoch": 0.94258471201329, "grad_norm": 0.322265625, "learning_rate": 1.1722088054987925e-06, "loss": 0.2051, "step": 41420 }, { "epoch": 0.9428122795439546, "grad_norm": 0.000553131103515625, "learning_rate": 1.16756455508081e-06, "loss": 1.1938, "step": 41430 }, { "epoch": 0.9430398470746194, "grad_norm": 0.1826171875, "learning_rate": 1.1629203046628275e-06, "loss": 0.4131, "step": 41440 }, { "epoch": 0.9432674146052841, "grad_norm": 1.2578125, "learning_rate": 1.158276054244845e-06, "loss": 0.6401, "step": 41450 }, { "epoch": 0.9434949821359488, "grad_norm": 440.0, "learning_rate": 1.1536318038268625e-06, "loss": 0.5336, "step": 41460 }, { "epoch": 0.9437225496666136, "grad_norm": 0.99609375, "learning_rate": 1.14898755340888e-06, "loss": 1.025, "step": 41470 }, { "epoch": 0.9439501171972783, "grad_norm": 135.0, "learning_rate": 1.1443433029908974e-06, "loss": 0.9362, "step": 41480 }, { "epoch": 0.9441776847279431, "grad_norm": 0.0004825592041015625, "learning_rate": 1.139699052572915e-06, "loss": 0.351, "step": 41490 }, { "epoch": 0.9444052522586077, "grad_norm": 55.75, "learning_rate": 1.1350548021549324e-06, "loss": 0.2591, "step": 41500 }, { "epoch": 0.9446328197892725, "grad_norm": 153.0, "learning_rate": 1.1304105517369496e-06, "loss": 0.8268, "step": 41510 }, { "epoch": 0.9448603873199372, "grad_norm": 732.0, "learning_rate": 1.1257663013189671e-06, "loss": 1.0877, "step": 41520 }, { "epoch": 0.9450879548506019, "grad_norm": 155.0, "learning_rate": 1.1211220509009848e-06, "loss": 1.7367, "step": 41530 }, { "epoch": 0.9453155223812666, "grad_norm": 92.5, "learning_rate": 1.116477800483002e-06, "loss": 0.9412, "step": 41540 }, { "epoch": 0.9455430899119314, "grad_norm": 33.0, "learning_rate": 1.1118335500650196e-06, "loss": 0.4394, "step": 41550 }, { "epoch": 0.9457706574425961, "grad_norm": 0.021484375, "learning_rate": 1.107189299647037e-06, "loss": 0.8693, "step": 41560 }, { "epoch": 0.9459982249732608, "grad_norm": 1464.0, "learning_rate": 1.1025450492290545e-06, "loss": 0.7364, "step": 41570 }, { "epoch": 0.9462257925039256, "grad_norm": 59.75, "learning_rate": 1.097900798811072e-06, "loss": 0.7507, "step": 41580 }, { "epoch": 0.9464533600345902, "grad_norm": 0.057861328125, "learning_rate": 1.0932565483930895e-06, "loss": 1.0329, "step": 41590 }, { "epoch": 0.946680927565255, "grad_norm": 214.0, "learning_rate": 1.088612297975107e-06, "loss": 0.2563, "step": 41600 }, { "epoch": 0.9469084950959197, "grad_norm": 223.0, "learning_rate": 1.0839680475571242e-06, "loss": 1.4936, "step": 41610 }, { "epoch": 0.9471360626265845, "grad_norm": 41.25, "learning_rate": 1.079323797139142e-06, "loss": 0.3199, "step": 41620 }, { "epoch": 0.9473636301572491, "grad_norm": 30.875, "learning_rate": 1.0746795467211594e-06, "loss": 0.7364, "step": 41630 }, { "epoch": 0.9475911976879139, "grad_norm": 0.055908203125, "learning_rate": 1.0700352963031769e-06, "loss": 0.1981, "step": 41640 }, { "epoch": 0.9478187652185787, "grad_norm": 186.0, "learning_rate": 1.0653910458851941e-06, "loss": 0.2064, "step": 41650 }, { "epoch": 0.9480463327492433, "grad_norm": 488.0, "learning_rate": 1.0607467954672116e-06, "loss": 1.3921, "step": 41660 }, { "epoch": 0.9482739002799081, "grad_norm": 198.0, "learning_rate": 1.056102545049229e-06, "loss": 1.7618, "step": 41670 }, { "epoch": 0.9485014678105728, "grad_norm": 79.5, "learning_rate": 1.0514582946312466e-06, "loss": 0.6623, "step": 41680 }, { "epoch": 0.9487290353412375, "grad_norm": 10.3125, "learning_rate": 1.046814044213264e-06, "loss": 0.453, "step": 41690 }, { "epoch": 0.9489566028719022, "grad_norm": 9.5, "learning_rate": 1.0421697937952815e-06, "loss": 0.752, "step": 41700 }, { "epoch": 0.949184170402567, "grad_norm": 0.150390625, "learning_rate": 1.037525543377299e-06, "loss": 0.7399, "step": 41710 }, { "epoch": 0.9494117379332316, "grad_norm": 106.0, "learning_rate": 1.0328812929593165e-06, "loss": 0.9454, "step": 41720 }, { "epoch": 0.9496393054638964, "grad_norm": 0.2890625, "learning_rate": 1.028237042541334e-06, "loss": 0.6121, "step": 41730 }, { "epoch": 0.9498668729945612, "grad_norm": 71.0, "learning_rate": 1.0235927921233514e-06, "loss": 1.1535, "step": 41740 }, { "epoch": 0.9500944405252258, "grad_norm": 6.375, "learning_rate": 1.0189485417053687e-06, "loss": 1.9655, "step": 41750 }, { "epoch": 0.9503220080558906, "grad_norm": 1.6796875, "learning_rate": 1.0143042912873862e-06, "loss": 1.015, "step": 41760 }, { "epoch": 0.9505495755865553, "grad_norm": 13.5625, "learning_rate": 1.0096600408694039e-06, "loss": 0.7088, "step": 41770 }, { "epoch": 0.9507771431172201, "grad_norm": 75.5, "learning_rate": 1.0050157904514213e-06, "loss": 0.3242, "step": 41780 }, { "epoch": 0.9510047106478847, "grad_norm": 154.0, "learning_rate": 1.0003715400334386e-06, "loss": 0.4388, "step": 41790 }, { "epoch": 0.9512322781785495, "grad_norm": 101.5, "learning_rate": 9.95727289615456e-07, "loss": 0.3259, "step": 41800 }, { "epoch": 0.9514598457092142, "grad_norm": 33.75, "learning_rate": 9.910830391974736e-07, "loss": 0.5351, "step": 41810 }, { "epoch": 0.9516874132398789, "grad_norm": 0.12255859375, "learning_rate": 9.86438788779491e-07, "loss": 1.1308, "step": 41820 }, { "epoch": 0.9519149807705437, "grad_norm": 39.25, "learning_rate": 9.817945383615085e-07, "loss": 0.2565, "step": 41830 }, { "epoch": 0.9521425483012084, "grad_norm": 0.0308837890625, "learning_rate": 9.77150287943526e-07, "loss": 0.3183, "step": 41840 }, { "epoch": 0.9523701158318731, "grad_norm": 0.50390625, "learning_rate": 9.725060375255435e-07, "loss": 0.4233, "step": 41850 }, { "epoch": 0.9525976833625378, "grad_norm": 0.06689453125, "learning_rate": 9.67861787107561e-07, "loss": 0.3665, "step": 41860 }, { "epoch": 0.9528252508932026, "grad_norm": 171.0, "learning_rate": 9.632175366895784e-07, "loss": 0.5755, "step": 41870 }, { "epoch": 0.9530528184238672, "grad_norm": 212.0, "learning_rate": 9.58573286271596e-07, "loss": 1.2677, "step": 41880 }, { "epoch": 0.953280385954532, "grad_norm": 157.0, "learning_rate": 9.539290358536132e-07, "loss": 0.8514, "step": 41890 }, { "epoch": 0.9535079534851967, "grad_norm": 34.25, "learning_rate": 9.492847854356308e-07, "loss": 0.9848, "step": 41900 }, { "epoch": 0.9537355210158615, "grad_norm": 3.046875, "learning_rate": 9.446405350176483e-07, "loss": 0.2756, "step": 41910 }, { "epoch": 0.9539630885465262, "grad_norm": 430.0, "learning_rate": 9.399962845996657e-07, "loss": 2.0585, "step": 41920 }, { "epoch": 0.9541906560771909, "grad_norm": 7.15625, "learning_rate": 9.353520341816831e-07, "loss": 0.784, "step": 41930 }, { "epoch": 0.9544182236078557, "grad_norm": 2.96875, "learning_rate": 9.307077837637006e-07, "loss": 0.7663, "step": 41940 }, { "epoch": 0.9546457911385203, "grad_norm": 223.0, "learning_rate": 9.260635333457181e-07, "loss": 1.0669, "step": 41950 }, { "epoch": 0.9548733586691851, "grad_norm": 0.00150299072265625, "learning_rate": 9.214192829277354e-07, "loss": 1.0868, "step": 41960 }, { "epoch": 0.9551009261998498, "grad_norm": 200.0, "learning_rate": 9.16775032509753e-07, "loss": 1.2098, "step": 41970 }, { "epoch": 0.9553284937305145, "grad_norm": 79.0, "learning_rate": 9.121307820917705e-07, "loss": 1.1021, "step": 41980 }, { "epoch": 0.9555560612611792, "grad_norm": 210.0, "learning_rate": 9.07486531673788e-07, "loss": 0.9072, "step": 41990 }, { "epoch": 0.955783628791844, "grad_norm": 254.0, "learning_rate": 9.028422812558053e-07, "loss": 0.6898, "step": 42000 }, { "epoch": 0.9560111963225087, "grad_norm": 203.0, "learning_rate": 8.981980308378228e-07, "loss": 0.6469, "step": 42010 }, { "epoch": 0.9562387638531734, "grad_norm": 0.013671875, "learning_rate": 8.935537804198403e-07, "loss": 0.2524, "step": 42020 }, { "epoch": 0.9564663313838382, "grad_norm": 1.5078125, "learning_rate": 8.889095300018579e-07, "loss": 1.061, "step": 42030 }, { "epoch": 0.9566938989145028, "grad_norm": 247.0, "learning_rate": 8.842652795838752e-07, "loss": 1.2864, "step": 42040 }, { "epoch": 0.9569214664451676, "grad_norm": 73.5, "learning_rate": 8.796210291658927e-07, "loss": 1.1352, "step": 42050 }, { "epoch": 0.9571490339758323, "grad_norm": 64.5, "learning_rate": 8.749767787479102e-07, "loss": 0.5545, "step": 42060 }, { "epoch": 0.9573766015064971, "grad_norm": 6.28125, "learning_rate": 8.703325283299276e-07, "loss": 1.3192, "step": 42070 }, { "epoch": 0.9576041690371617, "grad_norm": 202.0, "learning_rate": 8.656882779119451e-07, "loss": 0.5621, "step": 42080 }, { "epoch": 0.9578317365678265, "grad_norm": 3.078125, "learning_rate": 8.610440274939625e-07, "loss": 0.531, "step": 42090 }, { "epoch": 0.9580593040984913, "grad_norm": 0.00830078125, "learning_rate": 8.5639977707598e-07, "loss": 0.681, "step": 42100 }, { "epoch": 0.9582868716291559, "grad_norm": 66.5, "learning_rate": 8.517555266579974e-07, "loss": 0.3645, "step": 42110 }, { "epoch": 0.9585144391598207, "grad_norm": 0.007720947265625, "learning_rate": 8.47111276240015e-07, "loss": 0.0488, "step": 42120 }, { "epoch": 0.9587420066904854, "grad_norm": 0.056640625, "learning_rate": 8.424670258220325e-07, "loss": 0.5117, "step": 42130 }, { "epoch": 0.9589695742211501, "grad_norm": 5.40625, "learning_rate": 8.378227754040498e-07, "loss": 0.6899, "step": 42140 }, { "epoch": 0.9591971417518148, "grad_norm": 0.0037689208984375, "learning_rate": 8.331785249860673e-07, "loss": 0.6219, "step": 42150 }, { "epoch": 0.9594247092824796, "grad_norm": 0.1796875, "learning_rate": 8.285342745680848e-07, "loss": 0.8517, "step": 42160 }, { "epoch": 0.9596522768131444, "grad_norm": 169.0, "learning_rate": 8.238900241501023e-07, "loss": 0.6809, "step": 42170 }, { "epoch": 0.959879844343809, "grad_norm": 53.75, "learning_rate": 8.192457737321196e-07, "loss": 0.7981, "step": 42180 }, { "epoch": 0.9601074118744738, "grad_norm": 79.5, "learning_rate": 8.146015233141371e-07, "loss": 0.4688, "step": 42190 }, { "epoch": 0.9603349794051385, "grad_norm": 0.006072998046875, "learning_rate": 8.099572728961547e-07, "loss": 0.1908, "step": 42200 }, { "epoch": 0.9605625469358032, "grad_norm": 145.0, "learning_rate": 8.053130224781721e-07, "loss": 0.9836, "step": 42210 }, { "epoch": 0.9607901144664679, "grad_norm": 0.005584716796875, "learning_rate": 8.006687720601896e-07, "loss": 0.6546, "step": 42220 }, { "epoch": 0.9610176819971327, "grad_norm": 0.006683349609375, "learning_rate": 7.96024521642207e-07, "loss": 0.7382, "step": 42230 }, { "epoch": 0.9612452495277973, "grad_norm": 0.003143310546875, "learning_rate": 7.913802712242245e-07, "loss": 0.7653, "step": 42240 }, { "epoch": 0.9614728170584621, "grad_norm": 188.0, "learning_rate": 7.867360208062419e-07, "loss": 1.1141, "step": 42250 }, { "epoch": 0.9617003845891269, "grad_norm": 0.03759765625, "learning_rate": 7.820917703882594e-07, "loss": 0.3988, "step": 42260 }, { "epoch": 0.9619279521197915, "grad_norm": 0.09619140625, "learning_rate": 7.774475199702769e-07, "loss": 0.2041, "step": 42270 }, { "epoch": 0.9621555196504563, "grad_norm": 0.0019378662109375, "learning_rate": 7.728032695522942e-07, "loss": 0.3761, "step": 42280 }, { "epoch": 0.962383087181121, "grad_norm": 20.5, "learning_rate": 7.681590191343118e-07, "loss": 0.3932, "step": 42290 }, { "epoch": 0.9626106547117858, "grad_norm": 336.0, "learning_rate": 7.635147687163293e-07, "loss": 0.9872, "step": 42300 }, { "epoch": 0.9628382222424504, "grad_norm": 159.0, "learning_rate": 7.588705182983468e-07, "loss": 0.4259, "step": 42310 }, { "epoch": 0.9630657897731152, "grad_norm": 99.0, "learning_rate": 7.542262678803641e-07, "loss": 1.3817, "step": 42320 }, { "epoch": 0.9632933573037799, "grad_norm": 1.078125, "learning_rate": 7.495820174623816e-07, "loss": 0.745, "step": 42330 }, { "epoch": 0.9635209248344446, "grad_norm": 0.01336669921875, "learning_rate": 7.449377670443991e-07, "loss": 2.0588, "step": 42340 }, { "epoch": 0.9637484923651094, "grad_norm": 0.0135498046875, "learning_rate": 7.402935166264165e-07, "loss": 0.9216, "step": 42350 }, { "epoch": 0.9639760598957741, "grad_norm": 185.0, "learning_rate": 7.35649266208434e-07, "loss": 0.5483, "step": 42360 }, { "epoch": 0.9642036274264388, "grad_norm": 22.625, "learning_rate": 7.310050157904515e-07, "loss": 1.2616, "step": 42370 }, { "epoch": 0.9644311949571035, "grad_norm": 0.0015716552734375, "learning_rate": 7.26360765372469e-07, "loss": 1.3654, "step": 42380 }, { "epoch": 0.9646587624877683, "grad_norm": 156.0, "learning_rate": 7.217165149544864e-07, "loss": 0.5145, "step": 42390 }, { "epoch": 0.9648863300184329, "grad_norm": 262.0, "learning_rate": 7.170722645365038e-07, "loss": 0.9576, "step": 42400 }, { "epoch": 0.9651138975490977, "grad_norm": 0.003997802734375, "learning_rate": 7.124280141185213e-07, "loss": 1.2964, "step": 42410 }, { "epoch": 0.9653414650797624, "grad_norm": 128.0, "learning_rate": 7.077837637005387e-07, "loss": 0.4682, "step": 42420 }, { "epoch": 0.9655690326104271, "grad_norm": 540.0, "learning_rate": 7.031395132825562e-07, "loss": 1.5978, "step": 42430 }, { "epoch": 0.9657966001410919, "grad_norm": 83.0, "learning_rate": 6.984952628645738e-07, "loss": 1.1033, "step": 42440 }, { "epoch": 0.9660241676717566, "grad_norm": 43.0, "learning_rate": 6.938510124465912e-07, "loss": 1.1169, "step": 42450 }, { "epoch": 0.9662517352024214, "grad_norm": 0.0133056640625, "learning_rate": 6.892067620286086e-07, "loss": 0.4807, "step": 42460 }, { "epoch": 0.966479302733086, "grad_norm": 206.0, "learning_rate": 6.845625116106261e-07, "loss": 1.3346, "step": 42470 }, { "epoch": 0.9667068702637508, "grad_norm": 0.0008544921875, "learning_rate": 6.799182611926436e-07, "loss": 1.2885, "step": 42480 }, { "epoch": 0.9669344377944155, "grad_norm": 140.0, "learning_rate": 6.75274010774661e-07, "loss": 0.9859, "step": 42490 }, { "epoch": 0.9671620053250802, "grad_norm": 40.0, "learning_rate": 6.706297603566784e-07, "loss": 0.4546, "step": 42500 }, { "epoch": 0.9673895728557449, "grad_norm": 84.5, "learning_rate": 6.65985509938696e-07, "loss": 1.7257, "step": 42510 }, { "epoch": 0.9676171403864097, "grad_norm": 82.5, "learning_rate": 6.613412595207135e-07, "loss": 0.6211, "step": 42520 }, { "epoch": 0.9678447079170744, "grad_norm": 229.0, "learning_rate": 6.566970091027309e-07, "loss": 0.5329, "step": 42530 }, { "epoch": 0.9680722754477391, "grad_norm": 0.0010528564453125, "learning_rate": 6.520527586847483e-07, "loss": 1.2191, "step": 42540 }, { "epoch": 0.9682998429784039, "grad_norm": 0.0021209716796875, "learning_rate": 6.474085082667658e-07, "loss": 0.398, "step": 42550 }, { "epoch": 0.9685274105090685, "grad_norm": 60.25, "learning_rate": 6.427642578487833e-07, "loss": 0.7799, "step": 42560 }, { "epoch": 0.9687549780397333, "grad_norm": 656.0, "learning_rate": 6.381200074308007e-07, "loss": 1.6435, "step": 42570 }, { "epoch": 0.968982545570398, "grad_norm": 0.00347900390625, "learning_rate": 6.334757570128181e-07, "loss": 0.5347, "step": 42580 }, { "epoch": 0.9692101131010628, "grad_norm": 12.375, "learning_rate": 6.288315065948357e-07, "loss": 0.4244, "step": 42590 }, { "epoch": 0.9694376806317274, "grad_norm": 0.0205078125, "learning_rate": 6.241872561768531e-07, "loss": 2.2514, "step": 42600 }, { "epoch": 0.9696652481623922, "grad_norm": 0.000682830810546875, "learning_rate": 6.195430057588706e-07, "loss": 0.6696, "step": 42610 }, { "epoch": 0.969892815693057, "grad_norm": 19.375, "learning_rate": 6.148987553408881e-07, "loss": 0.8169, "step": 42620 }, { "epoch": 0.9701203832237216, "grad_norm": 177.0, "learning_rate": 6.102545049229055e-07, "loss": 0.8437, "step": 42630 }, { "epoch": 0.9703479507543864, "grad_norm": 82.0, "learning_rate": 6.05610254504923e-07, "loss": 0.9561, "step": 42640 }, { "epoch": 0.9705755182850511, "grad_norm": 0.017333984375, "learning_rate": 6.009660040869404e-07, "loss": 0.9154, "step": 42650 }, { "epoch": 0.9708030858157158, "grad_norm": 356.0, "learning_rate": 5.963217536689579e-07, "loss": 1.2073, "step": 42660 }, { "epoch": 0.9710306533463805, "grad_norm": 53.5, "learning_rate": 5.916775032509753e-07, "loss": 0.8474, "step": 42670 }, { "epoch": 0.9712582208770453, "grad_norm": 106.5, "learning_rate": 5.870332528329928e-07, "loss": 0.2919, "step": 42680 }, { "epoch": 0.9714857884077099, "grad_norm": 37.25, "learning_rate": 5.823890024150103e-07, "loss": 0.9097, "step": 42690 }, { "epoch": 0.9717133559383747, "grad_norm": 99.5, "learning_rate": 5.777447519970277e-07, "loss": 0.4928, "step": 42700 }, { "epoch": 0.9719409234690395, "grad_norm": 360.0, "learning_rate": 5.731005015790453e-07, "loss": 1.2384, "step": 42710 }, { "epoch": 0.9721684909997041, "grad_norm": 153.0, "learning_rate": 5.684562511610626e-07, "loss": 0.5017, "step": 42720 }, { "epoch": 0.9723960585303689, "grad_norm": 94.0, "learning_rate": 5.638120007430801e-07, "loss": 1.1818, "step": 42730 }, { "epoch": 0.9726236260610336, "grad_norm": 0.008544921875, "learning_rate": 5.591677503250976e-07, "loss": 1.1104, "step": 42740 }, { "epoch": 0.9728511935916984, "grad_norm": 612.0, "learning_rate": 5.545234999071151e-07, "loss": 0.7861, "step": 42750 }, { "epoch": 0.973078761122363, "grad_norm": 0.02783203125, "learning_rate": 5.498792494891325e-07, "loss": 0.6705, "step": 42760 }, { "epoch": 0.9733063286530278, "grad_norm": 0.004058837890625, "learning_rate": 5.452349990711499e-07, "loss": 0.1353, "step": 42770 }, { "epoch": 0.9735338961836925, "grad_norm": 169.0, "learning_rate": 5.405907486531675e-07, "loss": 1.8099, "step": 42780 }, { "epoch": 0.9737614637143572, "grad_norm": 17.75, "learning_rate": 5.359464982351849e-07, "loss": 0.6096, "step": 42790 }, { "epoch": 0.973989031245022, "grad_norm": 400.0, "learning_rate": 5.313022478172024e-07, "loss": 1.3651, "step": 42800 }, { "epoch": 0.9742165987756867, "grad_norm": 45.75, "learning_rate": 5.266579973992198e-07, "loss": 0.9068, "step": 42810 }, { "epoch": 0.9744441663063514, "grad_norm": 0.0035552978515625, "learning_rate": 5.220137469812372e-07, "loss": 0.7249, "step": 42820 }, { "epoch": 0.9746717338370161, "grad_norm": 5.625, "learning_rate": 5.173694965632548e-07, "loss": 0.2589, "step": 42830 }, { "epoch": 0.9748993013676809, "grad_norm": 7.90625, "learning_rate": 5.127252461452722e-07, "loss": 1.095, "step": 42840 }, { "epoch": 0.9751268688983455, "grad_norm": 0.000583648681640625, "learning_rate": 5.080809957272896e-07, "loss": 0.6279, "step": 42850 }, { "epoch": 0.9753544364290103, "grad_norm": 166.0, "learning_rate": 5.034367453093071e-07, "loss": 0.6815, "step": 42860 }, { "epoch": 0.975582003959675, "grad_norm": 130.0, "learning_rate": 4.987924948913246e-07, "loss": 0.7295, "step": 42870 }, { "epoch": 0.9758095714903398, "grad_norm": 0.93359375, "learning_rate": 4.941482444733421e-07, "loss": 0.2115, "step": 42880 }, { "epoch": 0.9760371390210045, "grad_norm": 95.0, "learning_rate": 4.895039940553594e-07, "loss": 2.0283, "step": 42890 }, { "epoch": 0.9762647065516692, "grad_norm": 243.0, "learning_rate": 4.84859743637377e-07, "loss": 0.6729, "step": 42900 }, { "epoch": 0.976492274082334, "grad_norm": 125.0, "learning_rate": 4.802154932193944e-07, "loss": 0.4388, "step": 42910 }, { "epoch": 0.9767198416129986, "grad_norm": 28.0, "learning_rate": 4.7557124280141193e-07, "loss": 0.3676, "step": 42920 }, { "epoch": 0.9769474091436634, "grad_norm": 382.0, "learning_rate": 4.7092699238342936e-07, "loss": 0.659, "step": 42930 }, { "epoch": 0.9771749766743281, "grad_norm": 324.0, "learning_rate": 4.6628274196544684e-07, "loss": 1.2889, "step": 42940 }, { "epoch": 0.9774025442049928, "grad_norm": 170.0, "learning_rate": 4.6163849154746426e-07, "loss": 0.552, "step": 42950 }, { "epoch": 0.9776301117356575, "grad_norm": 205.0, "learning_rate": 4.569942411294817e-07, "loss": 1.1317, "step": 42960 }, { "epoch": 0.9778576792663223, "grad_norm": 182.0, "learning_rate": 4.523499907114992e-07, "loss": 0.6163, "step": 42970 }, { "epoch": 0.978085246796987, "grad_norm": 262.0, "learning_rate": 4.4770574029351664e-07, "loss": 0.9328, "step": 42980 }, { "epoch": 0.9783128143276517, "grad_norm": 45.5, "learning_rate": 4.430614898755341e-07, "loss": 1.3236, "step": 42990 }, { "epoch": 0.9785403818583165, "grad_norm": 276.0, "learning_rate": 4.3841723945755155e-07, "loss": 1.0827, "step": 43000 }, { "epoch": 0.9787679493889812, "grad_norm": 58.25, "learning_rate": 4.337729890395691e-07, "loss": 1.3059, "step": 43010 }, { "epoch": 0.9789955169196459, "grad_norm": 248.0, "learning_rate": 4.291287386215865e-07, "loss": 1.2348, "step": 43020 }, { "epoch": 0.9792230844503106, "grad_norm": 143.0, "learning_rate": 4.2448448820360393e-07, "loss": 0.2925, "step": 43030 }, { "epoch": 0.9794506519809754, "grad_norm": 0.0242919921875, "learning_rate": 4.1984023778562146e-07, "loss": 0.44, "step": 43040 }, { "epoch": 0.97967821951164, "grad_norm": 116.5, "learning_rate": 4.151959873676389e-07, "loss": 0.5218, "step": 43050 }, { "epoch": 0.9799057870423048, "grad_norm": 75.0, "learning_rate": 4.1055173694965637e-07, "loss": 0.5091, "step": 43060 }, { "epoch": 0.9801333545729696, "grad_norm": 233.0, "learning_rate": 4.059074865316738e-07, "loss": 0.6416, "step": 43070 }, { "epoch": 0.9803609221036342, "grad_norm": 123.5, "learning_rate": 4.012632361136913e-07, "loss": 0.7359, "step": 43080 }, { "epoch": 0.980588489634299, "grad_norm": 0.01171875, "learning_rate": 3.9661898569570875e-07, "loss": 0.4958, "step": 43090 }, { "epoch": 0.9808160571649637, "grad_norm": 316.0, "learning_rate": 3.9197473527772623e-07, "loss": 0.5802, "step": 43100 }, { "epoch": 0.9810436246956284, "grad_norm": 206.0, "learning_rate": 3.8733048485974365e-07, "loss": 0.3825, "step": 43110 }, { "epoch": 0.9812711922262931, "grad_norm": 0.3203125, "learning_rate": 3.826862344417611e-07, "loss": 0.4393, "step": 43120 }, { "epoch": 0.9814987597569579, "grad_norm": 0.0140380859375, "learning_rate": 3.780419840237786e-07, "loss": 0.8786, "step": 43130 }, { "epoch": 0.9817263272876225, "grad_norm": 272.0, "learning_rate": 3.7339773360579604e-07, "loss": 0.824, "step": 43140 }, { "epoch": 0.9819538948182873, "grad_norm": 7.152557373046875e-05, "learning_rate": 3.687534831878135e-07, "loss": 1.7853, "step": 43150 }, { "epoch": 0.9821814623489521, "grad_norm": 31.0, "learning_rate": 3.64109232769831e-07, "loss": 1.4823, "step": 43160 }, { "epoch": 0.9824090298796168, "grad_norm": 121.0, "learning_rate": 3.5946498235184847e-07, "loss": 0.1354, "step": 43170 }, { "epoch": 0.9826365974102815, "grad_norm": 0.6640625, "learning_rate": 3.548207319338659e-07, "loss": 0.4764, "step": 43180 }, { "epoch": 0.9828641649409462, "grad_norm": 0.004364013671875, "learning_rate": 3.501764815158833e-07, "loss": 1.0185, "step": 43190 }, { "epoch": 0.983091732471611, "grad_norm": 124.0, "learning_rate": 3.4553223109790085e-07, "loss": 0.5175, "step": 43200 }, { "epoch": 0.9833193000022756, "grad_norm": 36.75, "learning_rate": 3.408879806799183e-07, "loss": 0.275, "step": 43210 }, { "epoch": 0.9835468675329404, "grad_norm": 292.0, "learning_rate": 3.3624373026193576e-07, "loss": 0.7722, "step": 43220 }, { "epoch": 0.9837744350636052, "grad_norm": 0.04052734375, "learning_rate": 3.315994798439532e-07, "loss": 0.7943, "step": 43230 }, { "epoch": 0.9840020025942698, "grad_norm": 282.0, "learning_rate": 3.269552294259707e-07, "loss": 0.6405, "step": 43240 }, { "epoch": 0.9842295701249346, "grad_norm": 0.00830078125, "learning_rate": 3.2231097900798814e-07, "loss": 0.4154, "step": 43250 }, { "epoch": 0.9844571376555993, "grad_norm": 402.0, "learning_rate": 3.1766672859000556e-07, "loss": 0.6708, "step": 43260 }, { "epoch": 0.984684705186264, "grad_norm": 252.0, "learning_rate": 3.1302247817202304e-07, "loss": 0.3828, "step": 43270 }, { "epoch": 0.9849122727169287, "grad_norm": 49.75, "learning_rate": 3.083782277540405e-07, "loss": 0.7748, "step": 43280 }, { "epoch": 0.9851398402475935, "grad_norm": 9.875, "learning_rate": 3.03733977336058e-07, "loss": 0.8022, "step": 43290 }, { "epoch": 0.9853674077782582, "grad_norm": 49.25, "learning_rate": 2.990897269180754e-07, "loss": 0.5513, "step": 43300 }, { "epoch": 0.9855949753089229, "grad_norm": 218.0, "learning_rate": 2.944454765000929e-07, "loss": 1.4295, "step": 43310 }, { "epoch": 0.9858225428395877, "grad_norm": 89.0, "learning_rate": 2.898012260821104e-07, "loss": 0.7273, "step": 43320 }, { "epoch": 0.9860501103702524, "grad_norm": 165.0, "learning_rate": 2.851569756641278e-07, "loss": 0.2631, "step": 43330 }, { "epoch": 0.9862776779009171, "grad_norm": 0.0245361328125, "learning_rate": 2.805127252461453e-07, "loss": 0.8145, "step": 43340 }, { "epoch": 0.9865052454315818, "grad_norm": 0.1669921875, "learning_rate": 2.7586847482816277e-07, "loss": 1.5433, "step": 43350 }, { "epoch": 0.9867328129622466, "grad_norm": 94.0, "learning_rate": 2.7122422441018024e-07, "loss": 0.6862, "step": 43360 }, { "epoch": 0.9869603804929112, "grad_norm": 0.39453125, "learning_rate": 2.665799739921977e-07, "loss": 1.0692, "step": 43370 }, { "epoch": 0.987187948023576, "grad_norm": 166.0, "learning_rate": 2.6193572357421515e-07, "loss": 0.7135, "step": 43380 }, { "epoch": 0.9874155155542407, "grad_norm": 0.035888671875, "learning_rate": 2.5729147315623257e-07, "loss": 1.3545, "step": 43390 }, { "epoch": 0.9876430830849054, "grad_norm": 51.75, "learning_rate": 2.5264722273825005e-07, "loss": 1.6251, "step": 43400 }, { "epoch": 0.9878706506155702, "grad_norm": 137.0, "learning_rate": 2.4800297232026753e-07, "loss": 0.9526, "step": 43410 }, { "epoch": 0.9880982181462349, "grad_norm": 35.5, "learning_rate": 2.43358721902285e-07, "loss": 0.8263, "step": 43420 }, { "epoch": 0.9883257856768997, "grad_norm": 96.0, "learning_rate": 2.387144714843025e-07, "loss": 1.305, "step": 43430 }, { "epoch": 0.9885533532075643, "grad_norm": 205.0, "learning_rate": 2.3407022106631994e-07, "loss": 0.5553, "step": 43440 }, { "epoch": 0.9887809207382291, "grad_norm": 106.5, "learning_rate": 2.2942597064833736e-07, "loss": 0.4371, "step": 43450 }, { "epoch": 0.9890084882688938, "grad_norm": 0.306640625, "learning_rate": 2.2478172023035482e-07, "loss": 1.1876, "step": 43460 }, { "epoch": 0.9892360557995585, "grad_norm": 114.5, "learning_rate": 2.201374698123723e-07, "loss": 0.6701, "step": 43470 }, { "epoch": 0.9894636233302232, "grad_norm": 34.25, "learning_rate": 2.1549321939438977e-07, "loss": 0.6967, "step": 43480 }, { "epoch": 0.989691190860888, "grad_norm": 0.008056640625, "learning_rate": 2.1084896897640723e-07, "loss": 0.9611, "step": 43490 }, { "epoch": 0.9899187583915527, "grad_norm": 400.0, "learning_rate": 2.062047185584247e-07, "loss": 0.7535, "step": 43500 }, { "epoch": 0.9901463259222174, "grad_norm": 154.0, "learning_rate": 2.0156046814044216e-07, "loss": 0.4265, "step": 43510 }, { "epoch": 0.9903738934528822, "grad_norm": 14.3125, "learning_rate": 1.9691621772245963e-07, "loss": 0.5422, "step": 43520 }, { "epoch": 0.9906014609835468, "grad_norm": 1.4765625, "learning_rate": 1.9227196730447706e-07, "loss": 0.5955, "step": 43530 }, { "epoch": 0.9908290285142116, "grad_norm": 58.75, "learning_rate": 1.8762771688649454e-07, "loss": 0.9521, "step": 43540 }, { "epoch": 0.9910565960448763, "grad_norm": 0.01123046875, "learning_rate": 1.82983466468512e-07, "loss": 1.6394, "step": 43550 }, { "epoch": 0.991284163575541, "grad_norm": 0.0017242431640625, "learning_rate": 1.7833921605052947e-07, "loss": 0.5429, "step": 43560 }, { "epoch": 0.9915117311062057, "grad_norm": 0.0047607421875, "learning_rate": 1.7369496563254692e-07, "loss": 1.2209, "step": 43570 }, { "epoch": 0.9917392986368705, "grad_norm": 2.09375, "learning_rate": 1.690507152145644e-07, "loss": 0.4713, "step": 43580 }, { "epoch": 0.9919668661675353, "grad_norm": 0.00543212890625, "learning_rate": 1.6440646479658185e-07, "loss": 1.0367, "step": 43590 }, { "epoch": 0.9921944336981999, "grad_norm": 233.0, "learning_rate": 1.5976221437859933e-07, "loss": 0.2743, "step": 43600 }, { "epoch": 0.9924220012288647, "grad_norm": 210.0, "learning_rate": 1.5511796396061678e-07, "loss": 1.5769, "step": 43610 }, { "epoch": 0.9926495687595294, "grad_norm": 100.0, "learning_rate": 1.5047371354263423e-07, "loss": 1.1105, "step": 43620 }, { "epoch": 0.9928771362901941, "grad_norm": 53.0, "learning_rate": 1.4582946312465169e-07, "loss": 0.5386, "step": 43630 }, { "epoch": 0.9931047038208588, "grad_norm": 67.5, "learning_rate": 1.4118521270666916e-07, "loss": 0.8195, "step": 43640 }, { "epoch": 0.9933322713515236, "grad_norm": 0.46484375, "learning_rate": 1.3654096228868662e-07, "loss": 0.156, "step": 43650 }, { "epoch": 0.9935598388821882, "grad_norm": 336.0, "learning_rate": 1.3189671187070407e-07, "loss": 1.1726, "step": 43660 }, { "epoch": 0.993787406412853, "grad_norm": 159.0, "learning_rate": 1.2725246145272155e-07, "loss": 0.8899, "step": 43670 }, { "epoch": 0.9940149739435178, "grad_norm": 157.0, "learning_rate": 1.22608211034739e-07, "loss": 1.5587, "step": 43680 }, { "epoch": 0.9942425414741825, "grad_norm": 512.0, "learning_rate": 1.1796396061675648e-07, "loss": 1.1514, "step": 43690 }, { "epoch": 0.9944701090048472, "grad_norm": 0.0028839111328125, "learning_rate": 1.1331971019877392e-07, "loss": 1.0759, "step": 43700 }, { "epoch": 0.9946976765355119, "grad_norm": 243.0, "learning_rate": 1.086754597807914e-07, "loss": 1.0491, "step": 43710 }, { "epoch": 0.9949252440661767, "grad_norm": 123.0, "learning_rate": 1.0403120936280886e-07, "loss": 0.841, "step": 43720 }, { "epoch": 0.9951528115968413, "grad_norm": 0.0179443359375, "learning_rate": 9.938695894482632e-08, "loss": 0.4865, "step": 43730 }, { "epoch": 0.9953803791275061, "grad_norm": 182.0, "learning_rate": 9.474270852684376e-08, "loss": 0.4718, "step": 43740 }, { "epoch": 0.9956079466581708, "grad_norm": 85.0, "learning_rate": 9.009845810886124e-08, "loss": 0.2899, "step": 43750 }, { "epoch": 0.9958355141888355, "grad_norm": 33.0, "learning_rate": 8.54542076908787e-08, "loss": 0.1927, "step": 43760 }, { "epoch": 0.9960630817195003, "grad_norm": 0.2890625, "learning_rate": 8.080995727289617e-08, "loss": 0.857, "step": 43770 }, { "epoch": 0.996290649250165, "grad_norm": 0.0033721923828125, "learning_rate": 7.616570685491362e-08, "loss": 0.8845, "step": 43780 }, { "epoch": 0.9965182167808297, "grad_norm": 80.0, "learning_rate": 7.152145643693109e-08, "loss": 0.3362, "step": 43790 }, { "epoch": 0.9967457843114944, "grad_norm": 0.0162353515625, "learning_rate": 6.687720601894855e-08, "loss": 0.3574, "step": 43800 }, { "epoch": 0.9969733518421592, "grad_norm": 112.5, "learning_rate": 6.2232955600966e-08, "loss": 0.4992, "step": 43810 }, { "epoch": 0.9972009193728238, "grad_norm": 0.036865234375, "learning_rate": 5.7588705182983464e-08, "loss": 0.9337, "step": 43820 }, { "epoch": 0.9974284869034886, "grad_norm": 268.0, "learning_rate": 5.2944454765000936e-08, "loss": 0.5788, "step": 43830 }, { "epoch": 0.9976560544341533, "grad_norm": 17.875, "learning_rate": 4.830020434701839e-08, "loss": 0.6825, "step": 43840 }, { "epoch": 0.9978836219648181, "grad_norm": 126.5, "learning_rate": 4.365595392903586e-08, "loss": 1.3572, "step": 43850 }, { "epoch": 0.9981111894954828, "grad_norm": 56.5, "learning_rate": 3.901170351105332e-08, "loss": 0.431, "step": 43860 }, { "epoch": 0.9983387570261475, "grad_norm": 64.5, "learning_rate": 3.4367453093070784e-08, "loss": 0.5223, "step": 43870 }, { "epoch": 0.9985663245568123, "grad_norm": 19.125, "learning_rate": 2.9723202675088246e-08, "loss": 0.4116, "step": 43880 }, { "epoch": 0.9987938920874769, "grad_norm": 2.375, "learning_rate": 2.5078952257105708e-08, "loss": 0.2104, "step": 43890 }, { "epoch": 0.9990214596181417, "grad_norm": 0.001373291015625, "learning_rate": 2.043470183912317e-08, "loss": 0.8277, "step": 43900 }, { "epoch": 0.9992490271488064, "grad_norm": 25.375, "learning_rate": 1.5790451421140628e-08, "loss": 0.6373, "step": 43910 }, { "epoch": 0.9994765946794711, "grad_norm": 346.0, "learning_rate": 1.1146201003158092e-08, "loss": 0.9354, "step": 43920 }, { "epoch": 0.9997041622101358, "grad_norm": 199.0, "learning_rate": 6.501950585175554e-09, "loss": 1.3832, "step": 43930 }, { "epoch": 0.9999317297408006, "grad_norm": 0.0037078857421875, "learning_rate": 1.8577001671930154e-09, "loss": 1.4164, "step": 43940 } ], "logging_steps": 10, "max_steps": 43943, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.407608319990349e+16, "train_batch_size": 1, "trial_name": null, "trial_params": null }