| { |
| "best_global_step": 7998, |
| "best_metric": 1.0033386945724487, |
| "best_model_checkpoint": "./../../../models/LedgerBERT-SciBERT-base-v3-News-Class/2025-10-15_00-24-07/market_direction/checkpoint-7998", |
| "epoch": 3.0, |
| "eval_steps": 500, |
| "global_step": 7998, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0, |
| "eval_accuracy": 0.37046413502109704, |
| "eval_f1_macro": 0.31648220525898246, |
| "eval_f1_weighted": 0.3428571794493407, |
| "eval_loss": 1.0956553220748901, |
| "eval_precision_macro": 0.3404493817232522, |
| "eval_precision_weighted": 0.3541600044961222, |
| "eval_recall_macro": 0.3400488233349732, |
| "eval_recall_weighted": 0.37046413502109704, |
| "eval_runtime": 5.1696, |
| "eval_samples_per_second": 458.454, |
| "eval_steps_per_second": 57.452, |
| "step": 0 |
| }, |
| { |
| "epoch": 0.00037509377344336085, |
| "grad_norm": 6.950562000274658, |
| "learning_rate": 0.0, |
| "loss": 1.0664, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.0037509377344336083, |
| "grad_norm": 7.2157673835754395, |
| "learning_rate": 3.6e-07, |
| "loss": 1.1053, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.007501875468867217, |
| "grad_norm": 7.101637840270996, |
| "learning_rate": 7.6e-07, |
| "loss": 1.081, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.011252813203300824, |
| "grad_norm": 7.503627777099609, |
| "learning_rate": 1.1600000000000001e-06, |
| "loss": 1.1167, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.015003750937734433, |
| "grad_norm": 6.733654975891113, |
| "learning_rate": 1.56e-06, |
| "loss": 1.1038, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.018754688672168042, |
| "grad_norm": 6.987666130065918, |
| "learning_rate": 1.9600000000000003e-06, |
| "loss": 1.1025, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.02250562640660165, |
| "grad_norm": 9.358382225036621, |
| "learning_rate": 2.3600000000000003e-06, |
| "loss": 1.1166, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.02625656414103526, |
| "grad_norm": 6.5409040451049805, |
| "learning_rate": 2.7600000000000003e-06, |
| "loss": 1.0841, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.030007501875468866, |
| "grad_norm": 7.030813217163086, |
| "learning_rate": 3.1600000000000002e-06, |
| "loss": 1.0583, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.03375843960990248, |
| "grad_norm": 6.986401081085205, |
| "learning_rate": 3.5600000000000002e-06, |
| "loss": 1.1288, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.037509377344336084, |
| "grad_norm": 5.53237247467041, |
| "learning_rate": 3.96e-06, |
| "loss": 1.0573, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.04126031507876969, |
| "grad_norm": 14.836161613464355, |
| "learning_rate": 4.360000000000001e-06, |
| "loss": 1.0754, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.0450112528132033, |
| "grad_norm": 8.877525329589844, |
| "learning_rate": 4.76e-06, |
| "loss": 1.0753, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.04876219054763691, |
| "grad_norm": 10.311164855957031, |
| "learning_rate": 5.1600000000000006e-06, |
| "loss": 1.1246, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.05251312828207052, |
| "grad_norm": 5.360109329223633, |
| "learning_rate": 5.560000000000001e-06, |
| "loss": 1.0218, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.056264066016504126, |
| "grad_norm": 10.249133110046387, |
| "learning_rate": 5.9600000000000005e-06, |
| "loss": 1.0723, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.06001500375093773, |
| "grad_norm": 7.881443500518799, |
| "learning_rate": 6.360000000000001e-06, |
| "loss": 1.0727, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.06376594148537135, |
| "grad_norm": 5.892578601837158, |
| "learning_rate": 6.760000000000001e-06, |
| "loss": 1.0498, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.06751687921980495, |
| "grad_norm": 6.164844512939453, |
| "learning_rate": 7.16e-06, |
| "loss": 1.1182, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.07126781695423856, |
| "grad_norm": 6.351868629455566, |
| "learning_rate": 7.5600000000000005e-06, |
| "loss": 1.0735, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.07501875468867217, |
| "grad_norm": 6.895458698272705, |
| "learning_rate": 7.960000000000002e-06, |
| "loss": 1.0503, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.07876969242310577, |
| "grad_norm": 8.486842155456543, |
| "learning_rate": 8.36e-06, |
| "loss": 1.0965, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.08252063015753938, |
| "grad_norm": 8.301511764526367, |
| "learning_rate": 8.76e-06, |
| "loss": 1.1157, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.08627156789197299, |
| "grad_norm": 11.515487670898438, |
| "learning_rate": 9.16e-06, |
| "loss": 1.0854, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.0900225056264066, |
| "grad_norm": 6.189631938934326, |
| "learning_rate": 9.56e-06, |
| "loss": 1.054, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.09377344336084022, |
| "grad_norm": 4.8885393142700195, |
| "learning_rate": 9.960000000000001e-06, |
| "loss": 1.0693, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.09752438109527382, |
| "grad_norm": 6.190073490142822, |
| "learning_rate": 1.036e-05, |
| "loss": 1.0786, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.10127531882970743, |
| "grad_norm": 8.178174018859863, |
| "learning_rate": 1.0760000000000002e-05, |
| "loss": 1.0374, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.10502625656414104, |
| "grad_norm": 5.824592113494873, |
| "learning_rate": 1.1160000000000002e-05, |
| "loss": 1.0829, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.10877719429857464, |
| "grad_norm": 7.339807033538818, |
| "learning_rate": 1.156e-05, |
| "loss": 1.1085, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.11252813203300825, |
| "grad_norm": 6.39154577255249, |
| "learning_rate": 1.196e-05, |
| "loss": 1.0505, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.11627906976744186, |
| "grad_norm": 7.54710054397583, |
| "learning_rate": 1.236e-05, |
| "loss": 1.049, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.12003000750187547, |
| "grad_norm": 10.610452651977539, |
| "learning_rate": 1.2760000000000001e-05, |
| "loss": 1.1105, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.12378094523630907, |
| "grad_norm": 6.961548328399658, |
| "learning_rate": 1.3160000000000001e-05, |
| "loss": 1.0392, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.1275318829707427, |
| "grad_norm": 8.800139427185059, |
| "learning_rate": 1.3560000000000002e-05, |
| "loss": 1.1473, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.1312828207051763, |
| "grad_norm": 7.540011405944824, |
| "learning_rate": 1.396e-05, |
| "loss": 1.0891, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.1350337584396099, |
| "grad_norm": 11.337075233459473, |
| "learning_rate": 1.4360000000000001e-05, |
| "loss": 1.0715, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.13878469617404351, |
| "grad_norm": 5.6576457023620605, |
| "learning_rate": 1.4760000000000001e-05, |
| "loss": 1.0702, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.14253563390847712, |
| "grad_norm": 8.98009967803955, |
| "learning_rate": 1.516e-05, |
| "loss": 1.0752, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.14628657164291073, |
| "grad_norm": 4.932474613189697, |
| "learning_rate": 1.556e-05, |
| "loss": 1.0641, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.15003750937734434, |
| "grad_norm": 6.130215644836426, |
| "learning_rate": 1.5960000000000003e-05, |
| "loss": 1.0133, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.15378844711177794, |
| "grad_norm": 16.0273380279541, |
| "learning_rate": 1.636e-05, |
| "loss": 1.0442, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.15753938484621155, |
| "grad_norm": 12.93301010131836, |
| "learning_rate": 1.6760000000000002e-05, |
| "loss": 1.1161, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.16129032258064516, |
| "grad_norm": 9.27346420288086, |
| "learning_rate": 1.7160000000000002e-05, |
| "loss": 1.0539, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.16504126031507876, |
| "grad_norm": 5.5671186447143555, |
| "learning_rate": 1.756e-05, |
| "loss": 0.9452, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.16879219804951237, |
| "grad_norm": 7.939000606536865, |
| "learning_rate": 1.796e-05, |
| "loss": 1.0522, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.17254313578394598, |
| "grad_norm": 9.265899658203125, |
| "learning_rate": 1.8360000000000004e-05, |
| "loss": 1.0866, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.17629407351837958, |
| "grad_norm": 6.934913158416748, |
| "learning_rate": 1.876e-05, |
| "loss": 0.9723, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.1800450112528132, |
| "grad_norm": 6.007977485656738, |
| "learning_rate": 1.916e-05, |
| "loss": 0.9742, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.1837959489872468, |
| "grad_norm": 7.842029094696045, |
| "learning_rate": 1.9560000000000002e-05, |
| "loss": 0.9334, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.18754688672168043, |
| "grad_norm": 10.53432559967041, |
| "learning_rate": 1.9960000000000002e-05, |
| "loss": 0.8706, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.19129782445611404, |
| "grad_norm": 9.365771293640137, |
| "learning_rate": 1.997599359829288e-05, |
| "loss": 1.0399, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.19504876219054765, |
| "grad_norm": 9.351228713989258, |
| "learning_rate": 1.99493198186183e-05, |
| "loss": 0.9525, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.19879969992498125, |
| "grad_norm": 12.21917724609375, |
| "learning_rate": 1.992264603894372e-05, |
| "loss": 0.9793, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.20255063765941486, |
| "grad_norm": 17.076719284057617, |
| "learning_rate": 1.9895972259269142e-05, |
| "loss": 1.0403, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.20630157539384847, |
| "grad_norm": 6.928652286529541, |
| "learning_rate": 1.9869298479594562e-05, |
| "loss": 0.9047, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.21005251312828208, |
| "grad_norm": 6.858879089355469, |
| "learning_rate": 1.984262469991998e-05, |
| "loss": 1.012, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.21380345086271568, |
| "grad_norm": 5.987520217895508, |
| "learning_rate": 1.98159509202454e-05, |
| "loss": 0.9345, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.2175543885971493, |
| "grad_norm": 12.161517143249512, |
| "learning_rate": 1.978927714057082e-05, |
| "loss": 0.9955, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.2213053263315829, |
| "grad_norm": 9.229764938354492, |
| "learning_rate": 1.976260336089624e-05, |
| "loss": 0.998, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.2250562640660165, |
| "grad_norm": 9.257465362548828, |
| "learning_rate": 1.973592958122166e-05, |
| "loss": 0.9882, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.2288072018004501, |
| "grad_norm": 11.260259628295898, |
| "learning_rate": 1.970925580154708e-05, |
| "loss": 0.9727, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.23255813953488372, |
| "grad_norm": 5.7551984786987305, |
| "learning_rate": 1.96825820218725e-05, |
| "loss": 0.9139, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.23630907726931732, |
| "grad_norm": 7.264505863189697, |
| "learning_rate": 1.9655908242197922e-05, |
| "loss": 0.8718, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.24006001500375093, |
| "grad_norm": 13.518917083740234, |
| "learning_rate": 1.9629234462523342e-05, |
| "loss": 1.0478, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.24381095273818454, |
| "grad_norm": 7.133944034576416, |
| "learning_rate": 1.960256068284876e-05, |
| "loss": 0.951, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.24756189047261815, |
| "grad_norm": 10.491629600524902, |
| "learning_rate": 1.957588690317418e-05, |
| "loss": 0.9271, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.25131282820705175, |
| "grad_norm": 6.807431697845459, |
| "learning_rate": 1.95492131234996e-05, |
| "loss": 1.0804, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.2550637659414854, |
| "grad_norm": 9.180730819702148, |
| "learning_rate": 1.9522539343825024e-05, |
| "loss": 0.9079, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.25881470367591897, |
| "grad_norm": 6.459209442138672, |
| "learning_rate": 1.9495865564150443e-05, |
| "loss": 0.9989, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.2625656414103526, |
| "grad_norm": 5.8546929359436035, |
| "learning_rate": 1.9469191784475863e-05, |
| "loss": 0.951, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.2663165791447862, |
| "grad_norm": 10.301909446716309, |
| "learning_rate": 1.9442518004801282e-05, |
| "loss": 0.8549, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.2700675168792198, |
| "grad_norm": 17.759777069091797, |
| "learning_rate": 1.9415844225126702e-05, |
| "loss": 1.1818, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.2738184546136534, |
| "grad_norm": 7.105804920196533, |
| "learning_rate": 1.938917044545212e-05, |
| "loss": 1.034, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.27756939234808703, |
| "grad_norm": 8.125602722167969, |
| "learning_rate": 1.936249666577754e-05, |
| "loss": 0.9509, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.2813203300825206, |
| "grad_norm": 6.968907833099365, |
| "learning_rate": 1.933582288610296e-05, |
| "loss": 0.9292, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.28507126781695424, |
| "grad_norm": 9.841052055358887, |
| "learning_rate": 1.930914910642838e-05, |
| "loss": 1.0401, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.2888222055513878, |
| "grad_norm": 6.7177910804748535, |
| "learning_rate": 1.9282475326753804e-05, |
| "loss": 1.0079, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.29257314328582146, |
| "grad_norm": 8.652711868286133, |
| "learning_rate": 1.9255801547079223e-05, |
| "loss": 0.8986, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.29632408102025504, |
| "grad_norm": 7.266161918640137, |
| "learning_rate": 1.9229127767404643e-05, |
| "loss": 0.9805, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.30007501875468867, |
| "grad_norm": 7.372107982635498, |
| "learning_rate": 1.9202453987730062e-05, |
| "loss": 1.0254, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.3038259564891223, |
| "grad_norm": 6.467881202697754, |
| "learning_rate": 1.9175780208055482e-05, |
| "loss": 0.9931, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.3075768942235559, |
| "grad_norm": 8.692418098449707, |
| "learning_rate": 1.9149106428380905e-05, |
| "loss": 0.8585, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.3113278319579895, |
| "grad_norm": 7.981175422668457, |
| "learning_rate": 1.9122432648706325e-05, |
| "loss": 0.9164, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.3150787696924231, |
| "grad_norm": 11.882697105407715, |
| "learning_rate": 1.9095758869031744e-05, |
| "loss": 1.0325, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.31882970742685673, |
| "grad_norm": 10.736306190490723, |
| "learning_rate": 1.9069085089357164e-05, |
| "loss": 0.9888, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.3225806451612903, |
| "grad_norm": 5.334744453430176, |
| "learning_rate": 1.9042411309682583e-05, |
| "loss": 0.9364, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.32633158289572395, |
| "grad_norm": 6.579550743103027, |
| "learning_rate": 1.9015737530008003e-05, |
| "loss": 0.9395, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.3300825206301575, |
| "grad_norm": 7.336994171142578, |
| "learning_rate": 1.8989063750333423e-05, |
| "loss": 0.9363, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.33383345836459116, |
| "grad_norm": 9.523600578308105, |
| "learning_rate": 1.8962389970658842e-05, |
| "loss": 0.9405, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.33758439609902474, |
| "grad_norm": 9.350625038146973, |
| "learning_rate": 1.8935716190984262e-05, |
| "loss": 1.0351, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.3413353338334584, |
| "grad_norm": 9.00391674041748, |
| "learning_rate": 1.8909042411309685e-05, |
| "loss": 0.9721, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.34508627156789196, |
| "grad_norm": 5.69331693649292, |
| "learning_rate": 1.8882368631635105e-05, |
| "loss": 0.8811, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.3488372093023256, |
| "grad_norm": 6.127689361572266, |
| "learning_rate": 1.8855694851960524e-05, |
| "loss": 1.0079, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.35258814703675917, |
| "grad_norm": 11.653777122497559, |
| "learning_rate": 1.8829021072285944e-05, |
| "loss": 0.9518, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.3563390847711928, |
| "grad_norm": 7.30828332901001, |
| "learning_rate": 1.8802347292611363e-05, |
| "loss": 0.8464, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.3600900225056264, |
| "grad_norm": 9.21927547454834, |
| "learning_rate": 1.8775673512936786e-05, |
| "loss": 1.0584, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.36384096024006, |
| "grad_norm": 6.939789772033691, |
| "learning_rate": 1.8748999733262206e-05, |
| "loss": 0.9, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.3675918979744936, |
| "grad_norm": 12.434165954589844, |
| "learning_rate": 1.8722325953587626e-05, |
| "loss": 1.015, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.37134283570892723, |
| "grad_norm": 11.779828071594238, |
| "learning_rate": 1.8695652173913045e-05, |
| "loss": 0.9725, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.37509377344336087, |
| "grad_norm": 12.166790962219238, |
| "learning_rate": 1.8668978394238465e-05, |
| "loss": 1.0591, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.37884471117779445, |
| "grad_norm": 8.87903881072998, |
| "learning_rate": 1.8642304614563884e-05, |
| "loss": 0.9767, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.3825956489122281, |
| "grad_norm": 5.176930904388428, |
| "learning_rate": 1.8615630834889304e-05, |
| "loss": 0.8934, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.38634658664666166, |
| "grad_norm": 7.772132396697998, |
| "learning_rate": 1.8588957055214724e-05, |
| "loss": 0.9488, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.3900975243810953, |
| "grad_norm": 10.097055435180664, |
| "learning_rate": 1.8562283275540143e-05, |
| "loss": 0.9725, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.3938484621155289, |
| "grad_norm": 10.014994621276855, |
| "learning_rate": 1.8535609495865566e-05, |
| "loss": 0.9432, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.3975993998499625, |
| "grad_norm": 10.885961532592773, |
| "learning_rate": 1.8508935716190986e-05, |
| "loss": 1.0393, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.4013503375843961, |
| "grad_norm": 7.621641635894775, |
| "learning_rate": 1.8482261936516406e-05, |
| "loss": 0.9801, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.4051012753188297, |
| "grad_norm": 6.268519878387451, |
| "learning_rate": 1.8455588156841825e-05, |
| "loss": 0.9922, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.4088522130532633, |
| "grad_norm": 6.714245796203613, |
| "learning_rate": 1.8428914377167245e-05, |
| "loss": 1.0355, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.41260315078769694, |
| "grad_norm": 11.643074035644531, |
| "learning_rate": 1.8402240597492668e-05, |
| "loss": 1.0575, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.4163540885221305, |
| "grad_norm": 6.439828395843506, |
| "learning_rate": 1.8375566817818087e-05, |
| "loss": 0.9101, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.42010502625656415, |
| "grad_norm": 6.833279609680176, |
| "learning_rate": 1.8348893038143507e-05, |
| "loss": 0.935, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.42385596399099773, |
| "grad_norm": 7.262381553649902, |
| "learning_rate": 1.8322219258468927e-05, |
| "loss": 0.977, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.42760690172543137, |
| "grad_norm": 5.480360984802246, |
| "learning_rate": 1.8295545478794346e-05, |
| "loss": 0.8673, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.43135783945986494, |
| "grad_norm": 8.4745454788208, |
| "learning_rate": 1.8268871699119766e-05, |
| "loss": 0.88, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.4351087771942986, |
| "grad_norm": 16.769878387451172, |
| "learning_rate": 1.8242197919445185e-05, |
| "loss": 0.9576, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.43885971492873216, |
| "grad_norm": 7.4179582595825195, |
| "learning_rate": 1.8215524139770605e-05, |
| "loss": 0.9263, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.4426106526631658, |
| "grad_norm": 11.899470329284668, |
| "learning_rate": 1.8188850360096028e-05, |
| "loss": 0.9328, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.4463615903975994, |
| "grad_norm": 8.113855361938477, |
| "learning_rate": 1.8162176580421448e-05, |
| "loss": 0.9684, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.450112528132033, |
| "grad_norm": 7.619154453277588, |
| "learning_rate": 1.8135502800746867e-05, |
| "loss": 0.918, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.45386346586646664, |
| "grad_norm": 7.7961602210998535, |
| "learning_rate": 1.8108829021072287e-05, |
| "loss": 0.8574, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.4576144036009002, |
| "grad_norm": 8.734787940979004, |
| "learning_rate": 1.8082155241397707e-05, |
| "loss": 0.9009, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.46136534133533386, |
| "grad_norm": 5.773232936859131, |
| "learning_rate": 1.8055481461723126e-05, |
| "loss": 1.0554, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.46511627906976744, |
| "grad_norm": 7.872585773468018, |
| "learning_rate": 1.802880768204855e-05, |
| "loss": 0.8688, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.46886721680420107, |
| "grad_norm": 7.2498602867126465, |
| "learning_rate": 1.800213390237397e-05, |
| "loss": 0.9726, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.47261815453863465, |
| "grad_norm": 11.007004737854004, |
| "learning_rate": 1.797546012269939e-05, |
| "loss": 0.9338, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.4763690922730683, |
| "grad_norm": 10.418313980102539, |
| "learning_rate": 1.7948786343024808e-05, |
| "loss": 0.9217, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.48012003000750186, |
| "grad_norm": 11.935880661010742, |
| "learning_rate": 1.7922112563350228e-05, |
| "loss": 0.8656, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.4838709677419355, |
| "grad_norm": 10.331807136535645, |
| "learning_rate": 1.789543878367565e-05, |
| "loss": 0.9948, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.4876219054763691, |
| "grad_norm": 7.979977607727051, |
| "learning_rate": 1.7868765004001067e-05, |
| "loss": 0.9068, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.4913728432108027, |
| "grad_norm": 7.865904808044434, |
| "learning_rate": 1.7842091224326486e-05, |
| "loss": 0.8362, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.4951237809452363, |
| "grad_norm": 11.6406888961792, |
| "learning_rate": 1.781541744465191e-05, |
| "loss": 1.0061, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.4988747186796699, |
| "grad_norm": 9.274069786071777, |
| "learning_rate": 1.778874366497733e-05, |
| "loss": 0.9448, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.5026256564141035, |
| "grad_norm": 9.999556541442871, |
| "learning_rate": 1.776206988530275e-05, |
| "loss": 0.9188, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.5063765941485371, |
| "grad_norm": 10.032958984375, |
| "learning_rate": 1.773539610562817e-05, |
| "loss": 0.9794, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.5101275318829708, |
| "grad_norm": 5.453114032745361, |
| "learning_rate": 1.7708722325953588e-05, |
| "loss": 1.0102, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.5138784696174044, |
| "grad_norm": 13.257373809814453, |
| "learning_rate": 1.7682048546279008e-05, |
| "loss": 0.9801, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.5176294073518379, |
| "grad_norm": 5.355706691741943, |
| "learning_rate": 1.765537476660443e-05, |
| "loss": 0.9126, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.5213803450862715, |
| "grad_norm": 9.768399238586426, |
| "learning_rate": 1.762870098692985e-05, |
| "loss": 0.9423, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.5251312828207052, |
| "grad_norm": 8.362143516540527, |
| "learning_rate": 1.760202720725527e-05, |
| "loss": 1.0289, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.5288822205551388, |
| "grad_norm": 10.58354377746582, |
| "learning_rate": 1.757535342758069e-05, |
| "loss": 0.9593, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.5326331582895724, |
| "grad_norm": 8.964977264404297, |
| "learning_rate": 1.754867964790611e-05, |
| "loss": 1.1002, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.536384096024006, |
| "grad_norm": 11.886764526367188, |
| "learning_rate": 1.7522005868231532e-05, |
| "loss": 0.842, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.5401350337584396, |
| "grad_norm": 9.155001640319824, |
| "learning_rate": 1.7495332088556948e-05, |
| "loss": 1.0402, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.5438859714928732, |
| "grad_norm": 7.865649223327637, |
| "learning_rate": 1.7468658308882368e-05, |
| "loss": 0.9502, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.5476369092273068, |
| "grad_norm": 8.232137680053711, |
| "learning_rate": 1.744198452920779e-05, |
| "loss": 0.9042, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.5513878469617405, |
| "grad_norm": 7.428460597991943, |
| "learning_rate": 1.741531074953321e-05, |
| "loss": 0.8664, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.5551387846961741, |
| "grad_norm": 6.769949913024902, |
| "learning_rate": 1.738863696985863e-05, |
| "loss": 0.9676, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.5588897224306076, |
| "grad_norm": 7.262323379516602, |
| "learning_rate": 1.736196319018405e-05, |
| "loss": 0.9461, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.5626406601650412, |
| "grad_norm": 7.46332311630249, |
| "learning_rate": 1.733528941050947e-05, |
| "loss": 0.9928, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.5663915978994749, |
| "grad_norm": 13.346348762512207, |
| "learning_rate": 1.7308615630834892e-05, |
| "loss": 0.9645, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.5701425356339085, |
| "grad_norm": 7.057946681976318, |
| "learning_rate": 1.7281941851160312e-05, |
| "loss": 0.872, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.5738934733683421, |
| "grad_norm": 11.920793533325195, |
| "learning_rate": 1.725526807148573e-05, |
| "loss": 0.9084, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.5776444111027756, |
| "grad_norm": 4.696298122406006, |
| "learning_rate": 1.722859429181115e-05, |
| "loss": 0.9184, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.5813953488372093, |
| "grad_norm": 9.623963356018066, |
| "learning_rate": 1.720192051213657e-05, |
| "loss": 0.8924, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.5851462865716429, |
| "grad_norm": 10.262091636657715, |
| "learning_rate": 1.717524673246199e-05, |
| "loss": 0.9476, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.5888972243060765, |
| "grad_norm": 10.587578773498535, |
| "learning_rate": 1.7148572952787413e-05, |
| "loss": 0.9443, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.5926481620405101, |
| "grad_norm": 8.189558029174805, |
| "learning_rate": 1.7121899173112833e-05, |
| "loss": 0.9245, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.5963990997749438, |
| "grad_norm": 7.582670211791992, |
| "learning_rate": 1.709522539343825e-05, |
| "loss": 0.8533, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.6001500375093773, |
| "grad_norm": 8.973713874816895, |
| "learning_rate": 1.7068551613763672e-05, |
| "loss": 0.9197, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.6039009752438109, |
| "grad_norm": 7.140238285064697, |
| "learning_rate": 1.7041877834089092e-05, |
| "loss": 0.8815, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.6076519129782446, |
| "grad_norm": 7.83927059173584, |
| "learning_rate": 1.701520405441451e-05, |
| "loss": 0.9782, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.6114028507126782, |
| "grad_norm": 6.876523494720459, |
| "learning_rate": 1.698853027473993e-05, |
| "loss": 0.9575, |
| "step": 1630 |
| }, |
| { |
| "epoch": 0.6151537884471118, |
| "grad_norm": 10.362568855285645, |
| "learning_rate": 1.696185649506535e-05, |
| "loss": 0.8977, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.6189047261815454, |
| "grad_norm": 9.509383201599121, |
| "learning_rate": 1.6935182715390774e-05, |
| "loss": 0.996, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.622655663915979, |
| "grad_norm": 5.023642539978027, |
| "learning_rate": 1.6908508935716193e-05, |
| "loss": 0.9131, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.6264066016504126, |
| "grad_norm": 6.320276260375977, |
| "learning_rate": 1.6881835156041613e-05, |
| "loss": 0.9765, |
| "step": 1670 |
| }, |
| { |
| "epoch": 0.6301575393848462, |
| "grad_norm": 10.261762619018555, |
| "learning_rate": 1.6855161376367033e-05, |
| "loss": 0.9057, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.6339084771192798, |
| "grad_norm": 8.115468978881836, |
| "learning_rate": 1.6828487596692452e-05, |
| "loss": 0.8892, |
| "step": 1690 |
| }, |
| { |
| "epoch": 0.6376594148537135, |
| "grad_norm": 10.657661437988281, |
| "learning_rate": 1.6801813817017875e-05, |
| "loss": 0.9186, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.641410352588147, |
| "grad_norm": 7.065814018249512, |
| "learning_rate": 1.6775140037343295e-05, |
| "loss": 0.8878, |
| "step": 1710 |
| }, |
| { |
| "epoch": 0.6451612903225806, |
| "grad_norm": 8.048439979553223, |
| "learning_rate": 1.6748466257668714e-05, |
| "loss": 0.946, |
| "step": 1720 |
| }, |
| { |
| "epoch": 0.6489122280570142, |
| "grad_norm": 10.228202819824219, |
| "learning_rate": 1.672179247799413e-05, |
| "loss": 0.838, |
| "step": 1730 |
| }, |
| { |
| "epoch": 0.6526631657914479, |
| "grad_norm": 10.011300086975098, |
| "learning_rate": 1.6695118698319554e-05, |
| "loss": 1.0565, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.6564141035258815, |
| "grad_norm": 8.266985893249512, |
| "learning_rate": 1.6668444918644973e-05, |
| "loss": 0.9523, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.660165041260315, |
| "grad_norm": 7.511131763458252, |
| "learning_rate": 1.6641771138970393e-05, |
| "loss": 1.0325, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.6639159789947486, |
| "grad_norm": 7.235232830047607, |
| "learning_rate": 1.6615097359295813e-05, |
| "loss": 0.9197, |
| "step": 1770 |
| }, |
| { |
| "epoch": 0.6676669167291823, |
| "grad_norm": 8.137916564941406, |
| "learning_rate": 1.6588423579621232e-05, |
| "loss": 0.886, |
| "step": 1780 |
| }, |
| { |
| "epoch": 0.6714178544636159, |
| "grad_norm": 7.320621013641357, |
| "learning_rate": 1.6561749799946655e-05, |
| "loss": 0.8866, |
| "step": 1790 |
| }, |
| { |
| "epoch": 0.6751687921980495, |
| "grad_norm": 8.104268074035645, |
| "learning_rate": 1.6535076020272075e-05, |
| "loss": 0.9554, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.6789197299324832, |
| "grad_norm": 8.669350624084473, |
| "learning_rate": 1.6508402240597494e-05, |
| "loss": 0.907, |
| "step": 1810 |
| }, |
| { |
| "epoch": 0.6826706676669168, |
| "grad_norm": 7.718722820281982, |
| "learning_rate": 1.6481728460922914e-05, |
| "loss": 0.9931, |
| "step": 1820 |
| }, |
| { |
| "epoch": 0.6864216054013503, |
| "grad_norm": 6.479692459106445, |
| "learning_rate": 1.6455054681248334e-05, |
| "loss": 0.9669, |
| "step": 1830 |
| }, |
| { |
| "epoch": 0.6901725431357839, |
| "grad_norm": 5.159636497497559, |
| "learning_rate": 1.6428380901573757e-05, |
| "loss": 1.0003, |
| "step": 1840 |
| }, |
| { |
| "epoch": 0.6939234808702176, |
| "grad_norm": 6.043707847595215, |
| "learning_rate": 1.6401707121899176e-05, |
| "loss": 0.894, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.6976744186046512, |
| "grad_norm": 8.509610176086426, |
| "learning_rate": 1.6375033342224596e-05, |
| "loss": 1.0656, |
| "step": 1860 |
| }, |
| { |
| "epoch": 0.7014253563390848, |
| "grad_norm": 10.496292114257812, |
| "learning_rate": 1.6348359562550015e-05, |
| "loss": 0.9162, |
| "step": 1870 |
| }, |
| { |
| "epoch": 0.7051762940735183, |
| "grad_norm": 9.357151985168457, |
| "learning_rate": 1.6321685782875435e-05, |
| "loss": 0.8575, |
| "step": 1880 |
| }, |
| { |
| "epoch": 0.708927231807952, |
| "grad_norm": 7.78256368637085, |
| "learning_rate": 1.6295012003200855e-05, |
| "loss": 0.7904, |
| "step": 1890 |
| }, |
| { |
| "epoch": 0.7126781695423856, |
| "grad_norm": 6.14832067489624, |
| "learning_rate": 1.6268338223526274e-05, |
| "loss": 0.8348, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.7164291072768192, |
| "grad_norm": 7.879366874694824, |
| "learning_rate": 1.6241664443851694e-05, |
| "loss": 0.8826, |
| "step": 1910 |
| }, |
| { |
| "epoch": 0.7201800450112528, |
| "grad_norm": 6.204752445220947, |
| "learning_rate": 1.6214990664177114e-05, |
| "loss": 0.9157, |
| "step": 1920 |
| }, |
| { |
| "epoch": 0.7239309827456865, |
| "grad_norm": 7.274019241333008, |
| "learning_rate": 1.6188316884502537e-05, |
| "loss": 0.8869, |
| "step": 1930 |
| }, |
| { |
| "epoch": 0.72768192048012, |
| "grad_norm": 5.929676055908203, |
| "learning_rate": 1.6161643104827956e-05, |
| "loss": 0.9372, |
| "step": 1940 |
| }, |
| { |
| "epoch": 0.7314328582145536, |
| "grad_norm": 9.161755561828613, |
| "learning_rate": 1.6134969325153376e-05, |
| "loss": 0.9211, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.7351837959489872, |
| "grad_norm": 5.079675674438477, |
| "learning_rate": 1.6108295545478795e-05, |
| "loss": 0.8084, |
| "step": 1960 |
| }, |
| { |
| "epoch": 0.7389347336834209, |
| "grad_norm": 8.15173053741455, |
| "learning_rate": 1.6081621765804215e-05, |
| "loss": 1.0033, |
| "step": 1970 |
| }, |
| { |
| "epoch": 0.7426856714178545, |
| "grad_norm": 6.805727005004883, |
| "learning_rate": 1.6054947986129638e-05, |
| "loss": 1.0074, |
| "step": 1980 |
| }, |
| { |
| "epoch": 0.746436609152288, |
| "grad_norm": 8.05391788482666, |
| "learning_rate": 1.6028274206455058e-05, |
| "loss": 0.9942, |
| "step": 1990 |
| }, |
| { |
| "epoch": 0.7501875468867217, |
| "grad_norm": 6.02817440032959, |
| "learning_rate": 1.6001600426780477e-05, |
| "loss": 1.0494, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.7539384846211553, |
| "grad_norm": 9.404801368713379, |
| "learning_rate": 1.5974926647105897e-05, |
| "loss": 0.9451, |
| "step": 2010 |
| }, |
| { |
| "epoch": 0.7576894223555889, |
| "grad_norm": 5.526783466339111, |
| "learning_rate": 1.5948252867431316e-05, |
| "loss": 0.9378, |
| "step": 2020 |
| }, |
| { |
| "epoch": 0.7614403600900225, |
| "grad_norm": 8.972588539123535, |
| "learning_rate": 1.5921579087756736e-05, |
| "loss": 0.9808, |
| "step": 2030 |
| }, |
| { |
| "epoch": 0.7651912978244562, |
| "grad_norm": 4.961981296539307, |
| "learning_rate": 1.5894905308082156e-05, |
| "loss": 0.9078, |
| "step": 2040 |
| }, |
| { |
| "epoch": 0.7689422355588897, |
| "grad_norm": 3.8509440422058105, |
| "learning_rate": 1.5868231528407575e-05, |
| "loss": 1.0518, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.7726931732933233, |
| "grad_norm": 7.673577785491943, |
| "learning_rate": 1.5841557748732995e-05, |
| "loss": 0.9075, |
| "step": 2060 |
| }, |
| { |
| "epoch": 0.7764441110277569, |
| "grad_norm": 8.731016159057617, |
| "learning_rate": 1.5814883969058418e-05, |
| "loss": 0.9208, |
| "step": 2070 |
| }, |
| { |
| "epoch": 0.7801950487621906, |
| "grad_norm": 6.979492664337158, |
| "learning_rate": 1.5788210189383838e-05, |
| "loss": 0.8977, |
| "step": 2080 |
| }, |
| { |
| "epoch": 0.7839459864966242, |
| "grad_norm": 8.666240692138672, |
| "learning_rate": 1.5761536409709257e-05, |
| "loss": 0.899, |
| "step": 2090 |
| }, |
| { |
| "epoch": 0.7876969242310577, |
| "grad_norm": 6.528694152832031, |
| "learning_rate": 1.5734862630034677e-05, |
| "loss": 0.844, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.7914478619654913, |
| "grad_norm": 7.253232479095459, |
| "learning_rate": 1.5708188850360096e-05, |
| "loss": 0.7766, |
| "step": 2110 |
| }, |
| { |
| "epoch": 0.795198799699925, |
| "grad_norm": 6.888519287109375, |
| "learning_rate": 1.568151507068552e-05, |
| "loss": 0.9393, |
| "step": 2120 |
| }, |
| { |
| "epoch": 0.7989497374343586, |
| "grad_norm": 6.408233165740967, |
| "learning_rate": 1.565484129101094e-05, |
| "loss": 1.0171, |
| "step": 2130 |
| }, |
| { |
| "epoch": 0.8027006751687922, |
| "grad_norm": 9.36056137084961, |
| "learning_rate": 1.562816751133636e-05, |
| "loss": 0.9127, |
| "step": 2140 |
| }, |
| { |
| "epoch": 0.8064516129032258, |
| "grad_norm": 11.695134162902832, |
| "learning_rate": 1.5601493731661778e-05, |
| "loss": 1.0232, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.8102025506376594, |
| "grad_norm": 6.716568470001221, |
| "learning_rate": 1.5574819951987198e-05, |
| "loss": 0.9904, |
| "step": 2160 |
| }, |
| { |
| "epoch": 0.813953488372093, |
| "grad_norm": 5.994268417358398, |
| "learning_rate": 1.5548146172312617e-05, |
| "loss": 0.8897, |
| "step": 2170 |
| }, |
| { |
| "epoch": 0.8177044261065266, |
| "grad_norm": 8.419204711914062, |
| "learning_rate": 1.5521472392638037e-05, |
| "loss": 0.8315, |
| "step": 2180 |
| }, |
| { |
| "epoch": 0.8214553638409603, |
| "grad_norm": 6.702762603759766, |
| "learning_rate": 1.5494798612963457e-05, |
| "loss": 0.9393, |
| "step": 2190 |
| }, |
| { |
| "epoch": 0.8252063015753939, |
| "grad_norm": 9.53264045715332, |
| "learning_rate": 1.5468124833288876e-05, |
| "loss": 1.0074, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.8289572393098275, |
| "grad_norm": 5.6720476150512695, |
| "learning_rate": 1.54414510536143e-05, |
| "loss": 0.7935, |
| "step": 2210 |
| }, |
| { |
| "epoch": 0.832708177044261, |
| "grad_norm": 7.338003158569336, |
| "learning_rate": 1.541477727393972e-05, |
| "loss": 0.898, |
| "step": 2220 |
| }, |
| { |
| "epoch": 0.8364591147786947, |
| "grad_norm": 6.529892444610596, |
| "learning_rate": 1.538810349426514e-05, |
| "loss": 0.8197, |
| "step": 2230 |
| }, |
| { |
| "epoch": 0.8402100525131283, |
| "grad_norm": 9.971487045288086, |
| "learning_rate": 1.5361429714590558e-05, |
| "loss": 0.9551, |
| "step": 2240 |
| }, |
| { |
| "epoch": 0.8439609902475619, |
| "grad_norm": 5.594128608703613, |
| "learning_rate": 1.5334755934915978e-05, |
| "loss": 1.1114, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.8477119279819955, |
| "grad_norm": 5.723794460296631, |
| "learning_rate": 1.53080821552414e-05, |
| "loss": 0.9341, |
| "step": 2260 |
| }, |
| { |
| "epoch": 0.8514628657164292, |
| "grad_norm": 5.728211879730225, |
| "learning_rate": 1.528140837556682e-05, |
| "loss": 0.9961, |
| "step": 2270 |
| }, |
| { |
| "epoch": 0.8552138034508627, |
| "grad_norm": 7.517919063568115, |
| "learning_rate": 1.525473459589224e-05, |
| "loss": 0.8542, |
| "step": 2280 |
| }, |
| { |
| "epoch": 0.8589647411852963, |
| "grad_norm": 4.70159387588501, |
| "learning_rate": 1.522806081621766e-05, |
| "loss": 1.0348, |
| "step": 2290 |
| }, |
| { |
| "epoch": 0.8627156789197299, |
| "grad_norm": 5.308437347412109, |
| "learning_rate": 1.5201387036543081e-05, |
| "loss": 0.9645, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.8664666166541636, |
| "grad_norm": 5.659054756164551, |
| "learning_rate": 1.5174713256868499e-05, |
| "loss": 0.8317, |
| "step": 2310 |
| }, |
| { |
| "epoch": 0.8702175543885972, |
| "grad_norm": 5.970462799072266, |
| "learning_rate": 1.5148039477193918e-05, |
| "loss": 0.9889, |
| "step": 2320 |
| }, |
| { |
| "epoch": 0.8739684921230307, |
| "grad_norm": 5.605343818664551, |
| "learning_rate": 1.512136569751934e-05, |
| "loss": 0.8545, |
| "step": 2330 |
| }, |
| { |
| "epoch": 0.8777194298574643, |
| "grad_norm": 9.641878128051758, |
| "learning_rate": 1.509469191784476e-05, |
| "loss": 1.0026, |
| "step": 2340 |
| }, |
| { |
| "epoch": 0.881470367591898, |
| "grad_norm": 9.36474323272705, |
| "learning_rate": 1.5068018138170179e-05, |
| "loss": 0.927, |
| "step": 2350 |
| }, |
| { |
| "epoch": 0.8852213053263316, |
| "grad_norm": 8.28822135925293, |
| "learning_rate": 1.50413443584956e-05, |
| "loss": 0.9955, |
| "step": 2360 |
| }, |
| { |
| "epoch": 0.8889722430607652, |
| "grad_norm": 7.714781284332275, |
| "learning_rate": 1.501467057882102e-05, |
| "loss": 0.9366, |
| "step": 2370 |
| }, |
| { |
| "epoch": 0.8927231807951987, |
| "grad_norm": 3.879307508468628, |
| "learning_rate": 1.498799679914644e-05, |
| "loss": 0.9002, |
| "step": 2380 |
| }, |
| { |
| "epoch": 0.8964741185296324, |
| "grad_norm": 5.898133754730225, |
| "learning_rate": 1.4961323019471861e-05, |
| "loss": 0.8564, |
| "step": 2390 |
| }, |
| { |
| "epoch": 0.900225056264066, |
| "grad_norm": 6.275933265686035, |
| "learning_rate": 1.493464923979728e-05, |
| "loss": 0.9471, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.9039759939984996, |
| "grad_norm": 6.680263519287109, |
| "learning_rate": 1.4907975460122702e-05, |
| "loss": 0.8609, |
| "step": 2410 |
| }, |
| { |
| "epoch": 0.9077269317329333, |
| "grad_norm": 7.0698676109313965, |
| "learning_rate": 1.4881301680448121e-05, |
| "loss": 0.7758, |
| "step": 2420 |
| }, |
| { |
| "epoch": 0.9114778694673669, |
| "grad_norm": 10.66848373413086, |
| "learning_rate": 1.4854627900773541e-05, |
| "loss": 0.8225, |
| "step": 2430 |
| }, |
| { |
| "epoch": 0.9152288072018004, |
| "grad_norm": 8.714693069458008, |
| "learning_rate": 1.4827954121098962e-05, |
| "loss": 0.8777, |
| "step": 2440 |
| }, |
| { |
| "epoch": 0.918979744936234, |
| "grad_norm": 31.062232971191406, |
| "learning_rate": 1.480128034142438e-05, |
| "loss": 1.0204, |
| "step": 2450 |
| }, |
| { |
| "epoch": 0.9227306826706677, |
| "grad_norm": 11.140453338623047, |
| "learning_rate": 1.47746065617498e-05, |
| "loss": 0.9509, |
| "step": 2460 |
| }, |
| { |
| "epoch": 0.9264816204051013, |
| "grad_norm": 6.338695526123047, |
| "learning_rate": 1.4747932782075221e-05, |
| "loss": 0.8125, |
| "step": 2470 |
| }, |
| { |
| "epoch": 0.9302325581395349, |
| "grad_norm": 8.720800399780273, |
| "learning_rate": 1.472125900240064e-05, |
| "loss": 0.8114, |
| "step": 2480 |
| }, |
| { |
| "epoch": 0.9339834958739685, |
| "grad_norm": 11.407164573669434, |
| "learning_rate": 1.469458522272606e-05, |
| "loss": 1.0623, |
| "step": 2490 |
| }, |
| { |
| "epoch": 0.9377344336084021, |
| "grad_norm": 6.310417652130127, |
| "learning_rate": 1.4667911443051482e-05, |
| "loss": 0.9014, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.9414853713428357, |
| "grad_norm": 5.94149923324585, |
| "learning_rate": 1.4641237663376901e-05, |
| "loss": 0.7657, |
| "step": 2510 |
| }, |
| { |
| "epoch": 0.9452363090772693, |
| "grad_norm": 9.478999137878418, |
| "learning_rate": 1.4614563883702323e-05, |
| "loss": 0.8412, |
| "step": 2520 |
| }, |
| { |
| "epoch": 0.9489872468117029, |
| "grad_norm": 8.735868453979492, |
| "learning_rate": 1.4587890104027742e-05, |
| "loss": 0.9043, |
| "step": 2530 |
| }, |
| { |
| "epoch": 0.9527381845461366, |
| "grad_norm": 6.766534328460693, |
| "learning_rate": 1.4561216324353162e-05, |
| "loss": 0.9538, |
| "step": 2540 |
| }, |
| { |
| "epoch": 0.9564891222805701, |
| "grad_norm": 18.577468872070312, |
| "learning_rate": 1.4534542544678583e-05, |
| "loss": 0.9458, |
| "step": 2550 |
| }, |
| { |
| "epoch": 0.9602400600150037, |
| "grad_norm": 9.248088836669922, |
| "learning_rate": 1.4507868765004003e-05, |
| "loss": 0.913, |
| "step": 2560 |
| }, |
| { |
| "epoch": 0.9639909977494373, |
| "grad_norm": 7.771203994750977, |
| "learning_rate": 1.4481194985329422e-05, |
| "loss": 0.931, |
| "step": 2570 |
| }, |
| { |
| "epoch": 0.967741935483871, |
| "grad_norm": 7.330334663391113, |
| "learning_rate": 1.4454521205654844e-05, |
| "loss": 0.9681, |
| "step": 2580 |
| }, |
| { |
| "epoch": 0.9714928732183046, |
| "grad_norm": 6.74515438079834, |
| "learning_rate": 1.4427847425980263e-05, |
| "loss": 0.9477, |
| "step": 2590 |
| }, |
| { |
| "epoch": 0.9752438109527382, |
| "grad_norm": 8.954100608825684, |
| "learning_rate": 1.4401173646305681e-05, |
| "loss": 0.8958, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.9789947486871718, |
| "grad_norm": 11.33262825012207, |
| "learning_rate": 1.4374499866631103e-05, |
| "loss": 0.7998, |
| "step": 2610 |
| }, |
| { |
| "epoch": 0.9827456864216054, |
| "grad_norm": 7.142065048217773, |
| "learning_rate": 1.4347826086956522e-05, |
| "loss": 0.9897, |
| "step": 2620 |
| }, |
| { |
| "epoch": 0.986496624156039, |
| "grad_norm": 8.922056198120117, |
| "learning_rate": 1.4321152307281942e-05, |
| "loss": 0.9172, |
| "step": 2630 |
| }, |
| { |
| "epoch": 0.9902475618904726, |
| "grad_norm": 5.288200378417969, |
| "learning_rate": 1.4294478527607363e-05, |
| "loss": 0.8836, |
| "step": 2640 |
| }, |
| { |
| "epoch": 0.9939984996249063, |
| "grad_norm": 10.067593574523926, |
| "learning_rate": 1.4267804747932783e-05, |
| "loss": 1.0019, |
| "step": 2650 |
| }, |
| { |
| "epoch": 0.9977494373593399, |
| "grad_norm": 5.186861515045166, |
| "learning_rate": 1.4241130968258204e-05, |
| "loss": 0.8005, |
| "step": 2660 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_accuracy": 0.5890295358649789, |
| "eval_f1_macro": 0.5855792301386851, |
| "eval_f1_weighted": 0.5883403945261724, |
| "eval_loss": 0.9054797887802124, |
| "eval_precision_macro": 0.5964531108356991, |
| "eval_precision_weighted": 0.5920764019753845, |
| "eval_recall_macro": 0.5799936335134275, |
| "eval_recall_weighted": 0.5890295358649789, |
| "eval_runtime": 4.8377, |
| "eval_samples_per_second": 489.903, |
| "eval_steps_per_second": 61.393, |
| "step": 2666 |
| }, |
| { |
| "epoch": 1.0015003750937734, |
| "grad_norm": 7.454843044281006, |
| "learning_rate": 1.4214457188583624e-05, |
| "loss": 0.9261, |
| "step": 2670 |
| }, |
| { |
| "epoch": 1.005251312828207, |
| "grad_norm": 7.612959384918213, |
| "learning_rate": 1.4187783408909043e-05, |
| "loss": 0.8656, |
| "step": 2680 |
| }, |
| { |
| "epoch": 1.0090022505626406, |
| "grad_norm": 5.689546585083008, |
| "learning_rate": 1.4161109629234465e-05, |
| "loss": 0.8539, |
| "step": 2690 |
| }, |
| { |
| "epoch": 1.0127531882970742, |
| "grad_norm": 9.812941551208496, |
| "learning_rate": 1.4134435849559884e-05, |
| "loss": 0.8154, |
| "step": 2700 |
| }, |
| { |
| "epoch": 1.016504126031508, |
| "grad_norm": 6.9208550453186035, |
| "learning_rate": 1.4107762069885304e-05, |
| "loss": 0.8441, |
| "step": 2710 |
| }, |
| { |
| "epoch": 1.0202550637659416, |
| "grad_norm": 5.310056686401367, |
| "learning_rate": 1.4081088290210725e-05, |
| "loss": 0.9471, |
| "step": 2720 |
| }, |
| { |
| "epoch": 1.0240060015003751, |
| "grad_norm": 9.985223770141602, |
| "learning_rate": 1.4054414510536145e-05, |
| "loss": 0.853, |
| "step": 2730 |
| }, |
| { |
| "epoch": 1.0277569392348087, |
| "grad_norm": 21.524646759033203, |
| "learning_rate": 1.4027740730861563e-05, |
| "loss": 0.9408, |
| "step": 2740 |
| }, |
| { |
| "epoch": 1.0315078769692423, |
| "grad_norm": 9.250083923339844, |
| "learning_rate": 1.4001066951186984e-05, |
| "loss": 0.8023, |
| "step": 2750 |
| }, |
| { |
| "epoch": 1.0352588147036759, |
| "grad_norm": 6.028738975524902, |
| "learning_rate": 1.3974393171512404e-05, |
| "loss": 0.7849, |
| "step": 2760 |
| }, |
| { |
| "epoch": 1.0390097524381094, |
| "grad_norm": 9.787884712219238, |
| "learning_rate": 1.3947719391837823e-05, |
| "loss": 0.7474, |
| "step": 2770 |
| }, |
| { |
| "epoch": 1.042760690172543, |
| "grad_norm": 12.639663696289062, |
| "learning_rate": 1.3921045612163244e-05, |
| "loss": 0.8167, |
| "step": 2780 |
| }, |
| { |
| "epoch": 1.0465116279069768, |
| "grad_norm": 15.691644668579102, |
| "learning_rate": 1.3894371832488664e-05, |
| "loss": 0.7467, |
| "step": 2790 |
| }, |
| { |
| "epoch": 1.0502625656414104, |
| "grad_norm": 7.864928722381592, |
| "learning_rate": 1.3867698052814085e-05, |
| "loss": 0.9476, |
| "step": 2800 |
| }, |
| { |
| "epoch": 1.054013503375844, |
| "grad_norm": 8.662647247314453, |
| "learning_rate": 1.3841024273139505e-05, |
| "loss": 0.8529, |
| "step": 2810 |
| }, |
| { |
| "epoch": 1.0577644411102776, |
| "grad_norm": 8.244277954101562, |
| "learning_rate": 1.3814350493464925e-05, |
| "loss": 0.753, |
| "step": 2820 |
| }, |
| { |
| "epoch": 1.0615153788447111, |
| "grad_norm": 8.806965827941895, |
| "learning_rate": 1.3787676713790346e-05, |
| "loss": 0.7577, |
| "step": 2830 |
| }, |
| { |
| "epoch": 1.0652663165791447, |
| "grad_norm": 11.864466667175293, |
| "learning_rate": 1.3761002934115766e-05, |
| "loss": 0.8227, |
| "step": 2840 |
| }, |
| { |
| "epoch": 1.0690172543135783, |
| "grad_norm": 16.477638244628906, |
| "learning_rate": 1.3734329154441187e-05, |
| "loss": 0.8603, |
| "step": 2850 |
| }, |
| { |
| "epoch": 1.072768192048012, |
| "grad_norm": 10.029014587402344, |
| "learning_rate": 1.3707655374766607e-05, |
| "loss": 0.7507, |
| "step": 2860 |
| }, |
| { |
| "epoch": 1.0765191297824457, |
| "grad_norm": 42.02882766723633, |
| "learning_rate": 1.3680981595092026e-05, |
| "loss": 0.8731, |
| "step": 2870 |
| }, |
| { |
| "epoch": 1.0802700675168793, |
| "grad_norm": 11.340489387512207, |
| "learning_rate": 1.3654307815417447e-05, |
| "loss": 0.8736, |
| "step": 2880 |
| }, |
| { |
| "epoch": 1.0840210052513128, |
| "grad_norm": 10.736079216003418, |
| "learning_rate": 1.3627634035742865e-05, |
| "loss": 0.7387, |
| "step": 2890 |
| }, |
| { |
| "epoch": 1.0877719429857464, |
| "grad_norm": 12.158968925476074, |
| "learning_rate": 1.3600960256068285e-05, |
| "loss": 0.8563, |
| "step": 2900 |
| }, |
| { |
| "epoch": 1.09152288072018, |
| "grad_norm": 4.968686103820801, |
| "learning_rate": 1.3574286476393706e-05, |
| "loss": 0.8865, |
| "step": 2910 |
| }, |
| { |
| "epoch": 1.0952738184546136, |
| "grad_norm": 9.05169677734375, |
| "learning_rate": 1.3547612696719126e-05, |
| "loss": 0.9706, |
| "step": 2920 |
| }, |
| { |
| "epoch": 1.0990247561890472, |
| "grad_norm": 8.993448257446289, |
| "learning_rate": 1.3520938917044546e-05, |
| "loss": 0.7936, |
| "step": 2930 |
| }, |
| { |
| "epoch": 1.102775693923481, |
| "grad_norm": 9.852548599243164, |
| "learning_rate": 1.3494265137369967e-05, |
| "loss": 0.9188, |
| "step": 2940 |
| }, |
| { |
| "epoch": 1.1065266316579145, |
| "grad_norm": 8.509963035583496, |
| "learning_rate": 1.3467591357695386e-05, |
| "loss": 0.9182, |
| "step": 2950 |
| }, |
| { |
| "epoch": 1.1102775693923481, |
| "grad_norm": 9.74703311920166, |
| "learning_rate": 1.3440917578020806e-05, |
| "loss": 0.8979, |
| "step": 2960 |
| }, |
| { |
| "epoch": 1.1140285071267817, |
| "grad_norm": 11.76938247680664, |
| "learning_rate": 1.3414243798346227e-05, |
| "loss": 0.8431, |
| "step": 2970 |
| }, |
| { |
| "epoch": 1.1177794448612153, |
| "grad_norm": 8.194916725158691, |
| "learning_rate": 1.3387570018671647e-05, |
| "loss": 0.8794, |
| "step": 2980 |
| }, |
| { |
| "epoch": 1.1215303825956489, |
| "grad_norm": 5.259307861328125, |
| "learning_rate": 1.3360896238997068e-05, |
| "loss": 0.8688, |
| "step": 2990 |
| }, |
| { |
| "epoch": 1.1252813203300824, |
| "grad_norm": 8.892224311828613, |
| "learning_rate": 1.3334222459322488e-05, |
| "loss": 0.8924, |
| "step": 3000 |
| }, |
| { |
| "epoch": 1.129032258064516, |
| "grad_norm": 10.505491256713867, |
| "learning_rate": 1.3307548679647908e-05, |
| "loss": 0.8347, |
| "step": 3010 |
| }, |
| { |
| "epoch": 1.1327831957989498, |
| "grad_norm": 4.74807071685791, |
| "learning_rate": 1.3280874899973329e-05, |
| "loss": 0.7728, |
| "step": 3020 |
| }, |
| { |
| "epoch": 1.1365341335333834, |
| "grad_norm": 12.980900764465332, |
| "learning_rate": 1.3254201120298747e-05, |
| "loss": 0.7915, |
| "step": 3030 |
| }, |
| { |
| "epoch": 1.140285071267817, |
| "grad_norm": 12.24691104888916, |
| "learning_rate": 1.3227527340624166e-05, |
| "loss": 0.8422, |
| "step": 3040 |
| }, |
| { |
| "epoch": 1.1440360090022506, |
| "grad_norm": 6.215153217315674, |
| "learning_rate": 1.3200853560949588e-05, |
| "loss": 0.8067, |
| "step": 3050 |
| }, |
| { |
| "epoch": 1.1477869467366841, |
| "grad_norm": 15.73306941986084, |
| "learning_rate": 1.3174179781275007e-05, |
| "loss": 0.8135, |
| "step": 3060 |
| }, |
| { |
| "epoch": 1.1515378844711177, |
| "grad_norm": 12.068921089172363, |
| "learning_rate": 1.3147506001600427e-05, |
| "loss": 0.7305, |
| "step": 3070 |
| }, |
| { |
| "epoch": 1.1552888222055513, |
| "grad_norm": 6.1044464111328125, |
| "learning_rate": 1.3120832221925848e-05, |
| "loss": 0.9578, |
| "step": 3080 |
| }, |
| { |
| "epoch": 1.159039759939985, |
| "grad_norm": 10.416324615478516, |
| "learning_rate": 1.3094158442251268e-05, |
| "loss": 0.8362, |
| "step": 3090 |
| }, |
| { |
| "epoch": 1.1627906976744187, |
| "grad_norm": 13.548623085021973, |
| "learning_rate": 1.3067484662576687e-05, |
| "loss": 0.7862, |
| "step": 3100 |
| }, |
| { |
| "epoch": 1.1665416354088523, |
| "grad_norm": 9.015273094177246, |
| "learning_rate": 1.3040810882902109e-05, |
| "loss": 0.8432, |
| "step": 3110 |
| }, |
| { |
| "epoch": 1.1702925731432858, |
| "grad_norm": 4.893497467041016, |
| "learning_rate": 1.3014137103227528e-05, |
| "loss": 0.719, |
| "step": 3120 |
| }, |
| { |
| "epoch": 1.1740435108777194, |
| "grad_norm": 12.783862113952637, |
| "learning_rate": 1.298746332355295e-05, |
| "loss": 0.9088, |
| "step": 3130 |
| }, |
| { |
| "epoch": 1.177794448612153, |
| "grad_norm": 10.826465606689453, |
| "learning_rate": 1.296078954387837e-05, |
| "loss": 0.8758, |
| "step": 3140 |
| }, |
| { |
| "epoch": 1.1815453863465866, |
| "grad_norm": 9.32836627960205, |
| "learning_rate": 1.2934115764203789e-05, |
| "loss": 0.7643, |
| "step": 3150 |
| }, |
| { |
| "epoch": 1.1852963240810204, |
| "grad_norm": 9.504363059997559, |
| "learning_rate": 1.290744198452921e-05, |
| "loss": 0.8174, |
| "step": 3160 |
| }, |
| { |
| "epoch": 1.189047261815454, |
| "grad_norm": 12.839066505432129, |
| "learning_rate": 1.2880768204854628e-05, |
| "loss": 0.7992, |
| "step": 3170 |
| }, |
| { |
| "epoch": 1.1927981995498875, |
| "grad_norm": 9.912968635559082, |
| "learning_rate": 1.2854094425180048e-05, |
| "loss": 0.7793, |
| "step": 3180 |
| }, |
| { |
| "epoch": 1.196549137284321, |
| "grad_norm": 9.632975578308105, |
| "learning_rate": 1.2827420645505469e-05, |
| "loss": 0.8062, |
| "step": 3190 |
| }, |
| { |
| "epoch": 1.2003000750187547, |
| "grad_norm": 15.091144561767578, |
| "learning_rate": 1.2800746865830889e-05, |
| "loss": 0.8319, |
| "step": 3200 |
| }, |
| { |
| "epoch": 1.2040510127531883, |
| "grad_norm": 9.834930419921875, |
| "learning_rate": 1.2774073086156308e-05, |
| "loss": 0.7946, |
| "step": 3210 |
| }, |
| { |
| "epoch": 1.2078019504876218, |
| "grad_norm": 9.097467422485352, |
| "learning_rate": 1.274739930648173e-05, |
| "loss": 0.7423, |
| "step": 3220 |
| }, |
| { |
| "epoch": 1.2115528882220554, |
| "grad_norm": 7.097741603851318, |
| "learning_rate": 1.272072552680715e-05, |
| "loss": 0.7668, |
| "step": 3230 |
| }, |
| { |
| "epoch": 1.215303825956489, |
| "grad_norm": 16.66200828552246, |
| "learning_rate": 1.269405174713257e-05, |
| "loss": 0.8292, |
| "step": 3240 |
| }, |
| { |
| "epoch": 1.2190547636909228, |
| "grad_norm": 4.819615840911865, |
| "learning_rate": 1.266737796745799e-05, |
| "loss": 0.7641, |
| "step": 3250 |
| }, |
| { |
| "epoch": 1.2228057014253564, |
| "grad_norm": 12.379060745239258, |
| "learning_rate": 1.264070418778341e-05, |
| "loss": 0.7749, |
| "step": 3260 |
| }, |
| { |
| "epoch": 1.22655663915979, |
| "grad_norm": 10.446650505065918, |
| "learning_rate": 1.2614030408108831e-05, |
| "loss": 0.8306, |
| "step": 3270 |
| }, |
| { |
| "epoch": 1.2303075768942235, |
| "grad_norm": 13.330952644348145, |
| "learning_rate": 1.258735662843425e-05, |
| "loss": 0.7924, |
| "step": 3280 |
| }, |
| { |
| "epoch": 1.2340585146286571, |
| "grad_norm": 11.163646697998047, |
| "learning_rate": 1.256068284875967e-05, |
| "loss": 0.8505, |
| "step": 3290 |
| }, |
| { |
| "epoch": 1.2378094523630907, |
| "grad_norm": 10.235424995422363, |
| "learning_rate": 1.2534009069085092e-05, |
| "loss": 0.7111, |
| "step": 3300 |
| }, |
| { |
| "epoch": 1.2415603900975243, |
| "grad_norm": 9.529205322265625, |
| "learning_rate": 1.2507335289410511e-05, |
| "loss": 0.9559, |
| "step": 3310 |
| }, |
| { |
| "epoch": 1.245311327831958, |
| "grad_norm": 9.511346817016602, |
| "learning_rate": 1.2480661509735929e-05, |
| "loss": 1.0107, |
| "step": 3320 |
| }, |
| { |
| "epoch": 1.2490622655663917, |
| "grad_norm": 5.115582466125488, |
| "learning_rate": 1.245398773006135e-05, |
| "loss": 0.8128, |
| "step": 3330 |
| }, |
| { |
| "epoch": 1.2528132033008252, |
| "grad_norm": 10.270365715026855, |
| "learning_rate": 1.242731395038677e-05, |
| "loss": 0.7537, |
| "step": 3340 |
| }, |
| { |
| "epoch": 1.2565641410352588, |
| "grad_norm": 15.309682846069336, |
| "learning_rate": 1.240064017071219e-05, |
| "loss": 0.776, |
| "step": 3350 |
| }, |
| { |
| "epoch": 1.2603150787696924, |
| "grad_norm": 6.9617414474487305, |
| "learning_rate": 1.2373966391037611e-05, |
| "loss": 0.7818, |
| "step": 3360 |
| }, |
| { |
| "epoch": 1.264066016504126, |
| "grad_norm": 14.111533164978027, |
| "learning_rate": 1.234729261136303e-05, |
| "loss": 0.8766, |
| "step": 3370 |
| }, |
| { |
| "epoch": 1.2678169542385596, |
| "grad_norm": 15.513258934020996, |
| "learning_rate": 1.2320618831688452e-05, |
| "loss": 0.8124, |
| "step": 3380 |
| }, |
| { |
| "epoch": 1.2715678919729934, |
| "grad_norm": 10.617011070251465, |
| "learning_rate": 1.2293945052013872e-05, |
| "loss": 0.7367, |
| "step": 3390 |
| }, |
| { |
| "epoch": 1.275318829707427, |
| "grad_norm": 10.756956100463867, |
| "learning_rate": 1.2267271272339291e-05, |
| "loss": 0.9371, |
| "step": 3400 |
| }, |
| { |
| "epoch": 1.2790697674418605, |
| "grad_norm": 20.27239990234375, |
| "learning_rate": 1.2240597492664712e-05, |
| "loss": 0.7812, |
| "step": 3410 |
| }, |
| { |
| "epoch": 1.282820705176294, |
| "grad_norm": 13.26762580871582, |
| "learning_rate": 1.2213923712990132e-05, |
| "loss": 0.9214, |
| "step": 3420 |
| }, |
| { |
| "epoch": 1.2865716429107277, |
| "grad_norm": 6.740780830383301, |
| "learning_rate": 1.2187249933315552e-05, |
| "loss": 0.7254, |
| "step": 3430 |
| }, |
| { |
| "epoch": 1.2903225806451613, |
| "grad_norm": 8.460843086242676, |
| "learning_rate": 1.2160576153640973e-05, |
| "loss": 0.8793, |
| "step": 3440 |
| }, |
| { |
| "epoch": 1.2940735183795948, |
| "grad_norm": 8.37424373626709, |
| "learning_rate": 1.2133902373966393e-05, |
| "loss": 0.7178, |
| "step": 3450 |
| }, |
| { |
| "epoch": 1.2978244561140286, |
| "grad_norm": 9.57453441619873, |
| "learning_rate": 1.210722859429181e-05, |
| "loss": 0.9584, |
| "step": 3460 |
| }, |
| { |
| "epoch": 1.301575393848462, |
| "grad_norm": 15.27446460723877, |
| "learning_rate": 1.2080554814617232e-05, |
| "loss": 0.7314, |
| "step": 3470 |
| }, |
| { |
| "epoch": 1.3053263315828958, |
| "grad_norm": 16.266162872314453, |
| "learning_rate": 1.2053881034942651e-05, |
| "loss": 0.8651, |
| "step": 3480 |
| }, |
| { |
| "epoch": 1.3090772693173294, |
| "grad_norm": 9.161102294921875, |
| "learning_rate": 1.2027207255268071e-05, |
| "loss": 0.7086, |
| "step": 3490 |
| }, |
| { |
| "epoch": 1.312828207051763, |
| "grad_norm": 12.645145416259766, |
| "learning_rate": 1.2000533475593492e-05, |
| "loss": 0.874, |
| "step": 3500 |
| }, |
| { |
| "epoch": 1.3165791447861965, |
| "grad_norm": 9.018929481506348, |
| "learning_rate": 1.1973859695918912e-05, |
| "loss": 0.7457, |
| "step": 3510 |
| }, |
| { |
| "epoch": 1.32033008252063, |
| "grad_norm": 10.96903133392334, |
| "learning_rate": 1.1947185916244333e-05, |
| "loss": 0.865, |
| "step": 3520 |
| }, |
| { |
| "epoch": 1.3240810202550637, |
| "grad_norm": 15.08077621459961, |
| "learning_rate": 1.1920512136569753e-05, |
| "loss": 0.8127, |
| "step": 3530 |
| }, |
| { |
| "epoch": 1.3278319579894973, |
| "grad_norm": 6.171741962432861, |
| "learning_rate": 1.1893838356895173e-05, |
| "loss": 0.7038, |
| "step": 3540 |
| }, |
| { |
| "epoch": 1.331582895723931, |
| "grad_norm": 12.167604446411133, |
| "learning_rate": 1.1867164577220594e-05, |
| "loss": 0.7373, |
| "step": 3550 |
| }, |
| { |
| "epoch": 1.3353338334583646, |
| "grad_norm": 12.859063148498535, |
| "learning_rate": 1.1840490797546013e-05, |
| "loss": 0.8292, |
| "step": 3560 |
| }, |
| { |
| "epoch": 1.3390847711927982, |
| "grad_norm": 9.17769718170166, |
| "learning_rate": 1.1813817017871435e-05, |
| "loss": 0.8117, |
| "step": 3570 |
| }, |
| { |
| "epoch": 1.3428357089272318, |
| "grad_norm": 7.380620002746582, |
| "learning_rate": 1.1787143238196854e-05, |
| "loss": 0.7943, |
| "step": 3580 |
| }, |
| { |
| "epoch": 1.3465866466616654, |
| "grad_norm": 19.143110275268555, |
| "learning_rate": 1.1760469458522274e-05, |
| "loss": 0.7798, |
| "step": 3590 |
| }, |
| { |
| "epoch": 1.350337584396099, |
| "grad_norm": 14.915560722351074, |
| "learning_rate": 1.1733795678847695e-05, |
| "loss": 0.8568, |
| "step": 3600 |
| }, |
| { |
| "epoch": 1.3540885221305325, |
| "grad_norm": 16.487377166748047, |
| "learning_rate": 1.1707121899173113e-05, |
| "loss": 0.8586, |
| "step": 3610 |
| }, |
| { |
| "epoch": 1.3578394598649663, |
| "grad_norm": 9.255929946899414, |
| "learning_rate": 1.1680448119498533e-05, |
| "loss": 0.7957, |
| "step": 3620 |
| }, |
| { |
| "epoch": 1.3615903975994, |
| "grad_norm": 12.38227653503418, |
| "learning_rate": 1.1653774339823954e-05, |
| "loss": 0.7957, |
| "step": 3630 |
| }, |
| { |
| "epoch": 1.3653413353338335, |
| "grad_norm": 10.949649810791016, |
| "learning_rate": 1.1627100560149374e-05, |
| "loss": 0.6871, |
| "step": 3640 |
| }, |
| { |
| "epoch": 1.369092273068267, |
| "grad_norm": 7.265697956085205, |
| "learning_rate": 1.1600426780474793e-05, |
| "loss": 0.7341, |
| "step": 3650 |
| }, |
| { |
| "epoch": 1.3728432108027007, |
| "grad_norm": 12.582711219787598, |
| "learning_rate": 1.1573753000800215e-05, |
| "loss": 0.8242, |
| "step": 3660 |
| }, |
| { |
| "epoch": 1.3765941485371342, |
| "grad_norm": 12.345062255859375, |
| "learning_rate": 1.1547079221125634e-05, |
| "loss": 0.8768, |
| "step": 3670 |
| }, |
| { |
| "epoch": 1.3803450862715678, |
| "grad_norm": 8.697713851928711, |
| "learning_rate": 1.1520405441451054e-05, |
| "loss": 0.855, |
| "step": 3680 |
| }, |
| { |
| "epoch": 1.3840960240060016, |
| "grad_norm": 9.254758834838867, |
| "learning_rate": 1.1493731661776475e-05, |
| "loss": 0.909, |
| "step": 3690 |
| }, |
| { |
| "epoch": 1.387846961740435, |
| "grad_norm": 9.739770889282227, |
| "learning_rate": 1.1467057882101895e-05, |
| "loss": 0.8582, |
| "step": 3700 |
| }, |
| { |
| "epoch": 1.3915978994748688, |
| "grad_norm": 12.004996299743652, |
| "learning_rate": 1.1440384102427316e-05, |
| "loss": 0.7344, |
| "step": 3710 |
| }, |
| { |
| "epoch": 1.3953488372093024, |
| "grad_norm": 13.092066764831543, |
| "learning_rate": 1.1413710322752736e-05, |
| "loss": 0.8916, |
| "step": 3720 |
| }, |
| { |
| "epoch": 1.399099774943736, |
| "grad_norm": 12.259298324584961, |
| "learning_rate": 1.1387036543078155e-05, |
| "loss": 0.9096, |
| "step": 3730 |
| }, |
| { |
| "epoch": 1.4028507126781695, |
| "grad_norm": 8.312166213989258, |
| "learning_rate": 1.1360362763403577e-05, |
| "loss": 0.8647, |
| "step": 3740 |
| }, |
| { |
| "epoch": 1.406601650412603, |
| "grad_norm": 8.59150218963623, |
| "learning_rate": 1.1333688983728995e-05, |
| "loss": 0.9202, |
| "step": 3750 |
| }, |
| { |
| "epoch": 1.4103525881470367, |
| "grad_norm": 8.444820404052734, |
| "learning_rate": 1.1307015204054414e-05, |
| "loss": 0.7343, |
| "step": 3760 |
| }, |
| { |
| "epoch": 1.4141035258814703, |
| "grad_norm": 12.232796669006348, |
| "learning_rate": 1.1280341424379836e-05, |
| "loss": 0.7329, |
| "step": 3770 |
| }, |
| { |
| "epoch": 1.417854463615904, |
| "grad_norm": 9.038057327270508, |
| "learning_rate": 1.1253667644705255e-05, |
| "loss": 0.8751, |
| "step": 3780 |
| }, |
| { |
| "epoch": 1.4216054013503376, |
| "grad_norm": 5.729677200317383, |
| "learning_rate": 1.1226993865030675e-05, |
| "loss": 0.7319, |
| "step": 3790 |
| }, |
| { |
| "epoch": 1.4253563390847712, |
| "grad_norm": 7.777376651763916, |
| "learning_rate": 1.1200320085356096e-05, |
| "loss": 0.802, |
| "step": 3800 |
| }, |
| { |
| "epoch": 1.4291072768192048, |
| "grad_norm": 13.165481567382812, |
| "learning_rate": 1.1173646305681516e-05, |
| "loss": 0.7195, |
| "step": 3810 |
| }, |
| { |
| "epoch": 1.4328582145536384, |
| "grad_norm": 10.966960906982422, |
| "learning_rate": 1.1146972526006935e-05, |
| "loss": 0.8234, |
| "step": 3820 |
| }, |
| { |
| "epoch": 1.436609152288072, |
| "grad_norm": 8.237056732177734, |
| "learning_rate": 1.1120298746332357e-05, |
| "loss": 0.7832, |
| "step": 3830 |
| }, |
| { |
| "epoch": 1.4403600900225055, |
| "grad_norm": 10.419988632202148, |
| "learning_rate": 1.1093624966657776e-05, |
| "loss": 0.8292, |
| "step": 3840 |
| }, |
| { |
| "epoch": 1.4441110277569393, |
| "grad_norm": 14.655726432800293, |
| "learning_rate": 1.1066951186983198e-05, |
| "loss": 0.8523, |
| "step": 3850 |
| }, |
| { |
| "epoch": 1.447861965491373, |
| "grad_norm": 10.38304328918457, |
| "learning_rate": 1.1040277407308617e-05, |
| "loss": 0.856, |
| "step": 3860 |
| }, |
| { |
| "epoch": 1.4516129032258065, |
| "grad_norm": 13.249422073364258, |
| "learning_rate": 1.1013603627634037e-05, |
| "loss": 0.8403, |
| "step": 3870 |
| }, |
| { |
| "epoch": 1.45536384096024, |
| "grad_norm": 9.854536056518555, |
| "learning_rate": 1.0986929847959458e-05, |
| "loss": 0.794, |
| "step": 3880 |
| }, |
| { |
| "epoch": 1.4591147786946737, |
| "grad_norm": 11.48951530456543, |
| "learning_rate": 1.0960256068284876e-05, |
| "loss": 0.7569, |
| "step": 3890 |
| }, |
| { |
| "epoch": 1.4628657164291072, |
| "grad_norm": 8.955044746398926, |
| "learning_rate": 1.0933582288610296e-05, |
| "loss": 0.8064, |
| "step": 3900 |
| }, |
| { |
| "epoch": 1.4666166541635408, |
| "grad_norm": 16.088743209838867, |
| "learning_rate": 1.0906908508935717e-05, |
| "loss": 0.8518, |
| "step": 3910 |
| }, |
| { |
| "epoch": 1.4703675918979746, |
| "grad_norm": 9.207806587219238, |
| "learning_rate": 1.0880234729261137e-05, |
| "loss": 0.875, |
| "step": 3920 |
| }, |
| { |
| "epoch": 1.474118529632408, |
| "grad_norm": 18.738187789916992, |
| "learning_rate": 1.0853560949586556e-05, |
| "loss": 0.8164, |
| "step": 3930 |
| }, |
| { |
| "epoch": 1.4778694673668418, |
| "grad_norm": 10.138594627380371, |
| "learning_rate": 1.0826887169911977e-05, |
| "loss": 0.791, |
| "step": 3940 |
| }, |
| { |
| "epoch": 1.4816204051012754, |
| "grad_norm": 9.635621070861816, |
| "learning_rate": 1.0800213390237397e-05, |
| "loss": 0.7878, |
| "step": 3950 |
| }, |
| { |
| "epoch": 1.485371342835709, |
| "grad_norm": 9.569879531860352, |
| "learning_rate": 1.0773539610562818e-05, |
| "loss": 0.8404, |
| "step": 3960 |
| }, |
| { |
| "epoch": 1.4891222805701425, |
| "grad_norm": 9.855542182922363, |
| "learning_rate": 1.0746865830888238e-05, |
| "loss": 0.8726, |
| "step": 3970 |
| }, |
| { |
| "epoch": 1.492873218304576, |
| "grad_norm": 16.710786819458008, |
| "learning_rate": 1.0720192051213658e-05, |
| "loss": 0.8706, |
| "step": 3980 |
| }, |
| { |
| "epoch": 1.49662415603901, |
| "grad_norm": 13.603216171264648, |
| "learning_rate": 1.0693518271539079e-05, |
| "loss": 0.8437, |
| "step": 3990 |
| }, |
| { |
| "epoch": 1.5003750937734432, |
| "grad_norm": 11.3872652053833, |
| "learning_rate": 1.0666844491864499e-05, |
| "loss": 0.6512, |
| "step": 4000 |
| }, |
| { |
| "epoch": 1.504126031507877, |
| "grad_norm": 10.2975492477417, |
| "learning_rate": 1.0640170712189918e-05, |
| "loss": 0.8774, |
| "step": 4010 |
| }, |
| { |
| "epoch": 1.5078769692423106, |
| "grad_norm": 7.741751194000244, |
| "learning_rate": 1.061349693251534e-05, |
| "loss": 0.7528, |
| "step": 4020 |
| }, |
| { |
| "epoch": 1.5116279069767442, |
| "grad_norm": 9.902315139770508, |
| "learning_rate": 1.0586823152840759e-05, |
| "loss": 0.7995, |
| "step": 4030 |
| }, |
| { |
| "epoch": 1.5153788447111778, |
| "grad_norm": 11.541082382202148, |
| "learning_rate": 1.0560149373166177e-05, |
| "loss": 0.7694, |
| "step": 4040 |
| }, |
| { |
| "epoch": 1.5191297824456114, |
| "grad_norm": 8.56485366821289, |
| "learning_rate": 1.0533475593491598e-05, |
| "loss": 0.8002, |
| "step": 4050 |
| }, |
| { |
| "epoch": 1.5228807201800452, |
| "grad_norm": 8.866626739501953, |
| "learning_rate": 1.0506801813817018e-05, |
| "loss": 0.792, |
| "step": 4060 |
| }, |
| { |
| "epoch": 1.5266316579144785, |
| "grad_norm": 10.332854270935059, |
| "learning_rate": 1.0480128034142438e-05, |
| "loss": 0.7378, |
| "step": 4070 |
| }, |
| { |
| "epoch": 1.5303825956489123, |
| "grad_norm": 8.805913925170898, |
| "learning_rate": 1.0453454254467859e-05, |
| "loss": 0.8287, |
| "step": 4080 |
| }, |
| { |
| "epoch": 1.5341335333833457, |
| "grad_norm": 10.885342597961426, |
| "learning_rate": 1.0426780474793278e-05, |
| "loss": 0.8454, |
| "step": 4090 |
| }, |
| { |
| "epoch": 1.5378844711177795, |
| "grad_norm": 11.047041893005371, |
| "learning_rate": 1.04001066951187e-05, |
| "loss": 0.8955, |
| "step": 4100 |
| }, |
| { |
| "epoch": 1.541635408852213, |
| "grad_norm": 12.287060737609863, |
| "learning_rate": 1.037343291544412e-05, |
| "loss": 0.9106, |
| "step": 4110 |
| }, |
| { |
| "epoch": 1.5453863465866466, |
| "grad_norm": 7.6913628578186035, |
| "learning_rate": 1.0346759135769539e-05, |
| "loss": 0.8287, |
| "step": 4120 |
| }, |
| { |
| "epoch": 1.5491372843210802, |
| "grad_norm": 12.864625930786133, |
| "learning_rate": 1.032008535609496e-05, |
| "loss": 0.8176, |
| "step": 4130 |
| }, |
| { |
| "epoch": 1.5528882220555138, |
| "grad_norm": 17.12616539001465, |
| "learning_rate": 1.029341157642038e-05, |
| "loss": 0.8964, |
| "step": 4140 |
| }, |
| { |
| "epoch": 1.5566391597899476, |
| "grad_norm": 9.076611518859863, |
| "learning_rate": 1.02667377967458e-05, |
| "loss": 0.8222, |
| "step": 4150 |
| }, |
| { |
| "epoch": 1.560390097524381, |
| "grad_norm": 9.327693939208984, |
| "learning_rate": 1.0240064017071221e-05, |
| "loss": 0.9358, |
| "step": 4160 |
| }, |
| { |
| "epoch": 1.5641410352588148, |
| "grad_norm": 7.653916358947754, |
| "learning_rate": 1.021339023739664e-05, |
| "loss": 0.7721, |
| "step": 4170 |
| }, |
| { |
| "epoch": 1.5678919729932483, |
| "grad_norm": 10.110307693481445, |
| "learning_rate": 1.0186716457722058e-05, |
| "loss": 0.9346, |
| "step": 4180 |
| }, |
| { |
| "epoch": 1.571642910727682, |
| "grad_norm": 11.298696517944336, |
| "learning_rate": 1.016004267804748e-05, |
| "loss": 0.8456, |
| "step": 4190 |
| }, |
| { |
| "epoch": 1.5753938484621155, |
| "grad_norm": 13.459417343139648, |
| "learning_rate": 1.01333688983729e-05, |
| "loss": 0.7815, |
| "step": 4200 |
| }, |
| { |
| "epoch": 1.579144786196549, |
| "grad_norm": 16.08547592163086, |
| "learning_rate": 1.0106695118698319e-05, |
| "loss": 0.7656, |
| "step": 4210 |
| }, |
| { |
| "epoch": 1.5828957239309829, |
| "grad_norm": 8.995433807373047, |
| "learning_rate": 1.008002133902374e-05, |
| "loss": 0.8248, |
| "step": 4220 |
| }, |
| { |
| "epoch": 1.5866466616654162, |
| "grad_norm": 10.426254272460938, |
| "learning_rate": 1.005334755934916e-05, |
| "loss": 0.7957, |
| "step": 4230 |
| }, |
| { |
| "epoch": 1.59039759939985, |
| "grad_norm": 8.310003280639648, |
| "learning_rate": 1.0026673779674581e-05, |
| "loss": 0.8313, |
| "step": 4240 |
| }, |
| { |
| "epoch": 1.5941485371342836, |
| "grad_norm": 14.415204048156738, |
| "learning_rate": 1e-05, |
| "loss": 0.7711, |
| "step": 4250 |
| }, |
| { |
| "epoch": 1.5978994748687172, |
| "grad_norm": 8.948083877563477, |
| "learning_rate": 9.97332622032542e-06, |
| "loss": 0.7868, |
| "step": 4260 |
| }, |
| { |
| "epoch": 1.6016504126031508, |
| "grad_norm": 16.681766510009766, |
| "learning_rate": 9.946652440650842e-06, |
| "loss": 0.8633, |
| "step": 4270 |
| }, |
| { |
| "epoch": 1.6054013503375844, |
| "grad_norm": 11.883402824401855, |
| "learning_rate": 9.919978660976261e-06, |
| "loss": 0.8195, |
| "step": 4280 |
| }, |
| { |
| "epoch": 1.6091522880720182, |
| "grad_norm": 11.386548042297363, |
| "learning_rate": 9.893304881301681e-06, |
| "loss": 0.7621, |
| "step": 4290 |
| }, |
| { |
| "epoch": 1.6129032258064515, |
| "grad_norm": 13.255663871765137, |
| "learning_rate": 9.8666311016271e-06, |
| "loss": 1.0233, |
| "step": 4300 |
| }, |
| { |
| "epoch": 1.6166541635408853, |
| "grad_norm": 10.955714225769043, |
| "learning_rate": 9.839957321952522e-06, |
| "loss": 0.9456, |
| "step": 4310 |
| }, |
| { |
| "epoch": 1.6204051012753187, |
| "grad_norm": 7.624833583831787, |
| "learning_rate": 9.813283542277942e-06, |
| "loss": 0.9029, |
| "step": 4320 |
| }, |
| { |
| "epoch": 1.6241560390097525, |
| "grad_norm": 8.860147476196289, |
| "learning_rate": 9.786609762603361e-06, |
| "loss": 0.835, |
| "step": 4330 |
| }, |
| { |
| "epoch": 1.627906976744186, |
| "grad_norm": 13.29971981048584, |
| "learning_rate": 9.759935982928782e-06, |
| "loss": 0.848, |
| "step": 4340 |
| }, |
| { |
| "epoch": 1.6316579144786196, |
| "grad_norm": 10.151264190673828, |
| "learning_rate": 9.733262203254202e-06, |
| "loss": 0.7443, |
| "step": 4350 |
| }, |
| { |
| "epoch": 1.6354088522130532, |
| "grad_norm": 14.21789264678955, |
| "learning_rate": 9.706588423579622e-06, |
| "loss": 0.908, |
| "step": 4360 |
| }, |
| { |
| "epoch": 1.6391597899474868, |
| "grad_norm": 7.94905424118042, |
| "learning_rate": 9.679914643905041e-06, |
| "loss": 0.6919, |
| "step": 4370 |
| }, |
| { |
| "epoch": 1.6429107276819206, |
| "grad_norm": 8.60908031463623, |
| "learning_rate": 9.653240864230463e-06, |
| "loss": 0.7309, |
| "step": 4380 |
| }, |
| { |
| "epoch": 1.646661665416354, |
| "grad_norm": 15.03842544555664, |
| "learning_rate": 9.626567084555882e-06, |
| "loss": 0.9343, |
| "step": 4390 |
| }, |
| { |
| "epoch": 1.6504126031507877, |
| "grad_norm": 11.684754371643066, |
| "learning_rate": 9.599893304881302e-06, |
| "loss": 0.7532, |
| "step": 4400 |
| }, |
| { |
| "epoch": 1.6541635408852213, |
| "grad_norm": 6.24261999130249, |
| "learning_rate": 9.573219525206723e-06, |
| "loss": 0.8449, |
| "step": 4410 |
| }, |
| { |
| "epoch": 1.657914478619655, |
| "grad_norm": 5.580635070800781, |
| "learning_rate": 9.546545745532143e-06, |
| "loss": 0.644, |
| "step": 4420 |
| }, |
| { |
| "epoch": 1.6616654163540885, |
| "grad_norm": 13.382287979125977, |
| "learning_rate": 9.519871965857564e-06, |
| "loss": 0.8713, |
| "step": 4430 |
| }, |
| { |
| "epoch": 1.665416354088522, |
| "grad_norm": 11.218451499938965, |
| "learning_rate": 9.493198186182982e-06, |
| "loss": 0.6552, |
| "step": 4440 |
| }, |
| { |
| "epoch": 1.6691672918229559, |
| "grad_norm": 16.548782348632812, |
| "learning_rate": 9.466524406508403e-06, |
| "loss": 0.8024, |
| "step": 4450 |
| }, |
| { |
| "epoch": 1.6729182295573892, |
| "grad_norm": 17.210647583007812, |
| "learning_rate": 9.439850626833823e-06, |
| "loss": 0.7543, |
| "step": 4460 |
| }, |
| { |
| "epoch": 1.676669167291823, |
| "grad_norm": 13.630977630615234, |
| "learning_rate": 9.413176847159243e-06, |
| "loss": 0.8754, |
| "step": 4470 |
| }, |
| { |
| "epoch": 1.6804201050262566, |
| "grad_norm": 13.967558860778809, |
| "learning_rate": 9.386503067484664e-06, |
| "loss": 0.7699, |
| "step": 4480 |
| }, |
| { |
| "epoch": 1.6841710427606902, |
| "grad_norm": 11.707578659057617, |
| "learning_rate": 9.359829287810083e-06, |
| "loss": 0.8324, |
| "step": 4490 |
| }, |
| { |
| "epoch": 1.6879219804951238, |
| "grad_norm": 9.124420166015625, |
| "learning_rate": 9.333155508135505e-06, |
| "loss": 0.794, |
| "step": 4500 |
| }, |
| { |
| "epoch": 1.6916729182295573, |
| "grad_norm": 10.910788536071777, |
| "learning_rate": 9.306481728460923e-06, |
| "loss": 0.8241, |
| "step": 4510 |
| }, |
| { |
| "epoch": 1.6954238559639911, |
| "grad_norm": 13.6180419921875, |
| "learning_rate": 9.279807948786344e-06, |
| "loss": 0.8882, |
| "step": 4520 |
| }, |
| { |
| "epoch": 1.6991747936984245, |
| "grad_norm": 7.055276393890381, |
| "learning_rate": 9.253134169111764e-06, |
| "loss": 0.9011, |
| "step": 4530 |
| }, |
| { |
| "epoch": 1.7029257314328583, |
| "grad_norm": 14.100971221923828, |
| "learning_rate": 9.226460389437183e-06, |
| "loss": 0.8026, |
| "step": 4540 |
| }, |
| { |
| "epoch": 1.7066766691672917, |
| "grad_norm": 6.9184441566467285, |
| "learning_rate": 9.199786609762605e-06, |
| "loss": 0.6888, |
| "step": 4550 |
| }, |
| { |
| "epoch": 1.7104276069017255, |
| "grad_norm": 9.915225982666016, |
| "learning_rate": 9.173112830088024e-06, |
| "loss": 0.8456, |
| "step": 4560 |
| }, |
| { |
| "epoch": 1.714178544636159, |
| "grad_norm": 11.1101655960083, |
| "learning_rate": 9.146439050413445e-06, |
| "loss": 0.8979, |
| "step": 4570 |
| }, |
| { |
| "epoch": 1.7179294823705926, |
| "grad_norm": 11.128944396972656, |
| "learning_rate": 9.119765270738863e-06, |
| "loss": 0.8386, |
| "step": 4580 |
| }, |
| { |
| "epoch": 1.7216804201050264, |
| "grad_norm": 8.845916748046875, |
| "learning_rate": 9.093091491064285e-06, |
| "loss": 0.8375, |
| "step": 4590 |
| }, |
| { |
| "epoch": 1.7254313578394598, |
| "grad_norm": 12.3989839553833, |
| "learning_rate": 9.066417711389704e-06, |
| "loss": 0.7884, |
| "step": 4600 |
| }, |
| { |
| "epoch": 1.7291822955738936, |
| "grad_norm": 8.899964332580566, |
| "learning_rate": 9.039743931715126e-06, |
| "loss": 0.8391, |
| "step": 4610 |
| }, |
| { |
| "epoch": 1.732933233308327, |
| "grad_norm": 11.830737113952637, |
| "learning_rate": 9.013070152040545e-06, |
| "loss": 0.836, |
| "step": 4620 |
| }, |
| { |
| "epoch": 1.7366841710427607, |
| "grad_norm": 14.875555038452148, |
| "learning_rate": 8.986396372365965e-06, |
| "loss": 0.8148, |
| "step": 4630 |
| }, |
| { |
| "epoch": 1.7404351087771943, |
| "grad_norm": 8.44090461730957, |
| "learning_rate": 8.959722592691386e-06, |
| "loss": 0.7033, |
| "step": 4640 |
| }, |
| { |
| "epoch": 1.744186046511628, |
| "grad_norm": 7.954046726226807, |
| "learning_rate": 8.933048813016804e-06, |
| "loss": 0.8364, |
| "step": 4650 |
| }, |
| { |
| "epoch": 1.7479369842460615, |
| "grad_norm": 14.886021614074707, |
| "learning_rate": 8.906375033342225e-06, |
| "loss": 0.7641, |
| "step": 4660 |
| }, |
| { |
| "epoch": 1.751687921980495, |
| "grad_norm": 15.42341136932373, |
| "learning_rate": 8.879701253667645e-06, |
| "loss": 0.7152, |
| "step": 4670 |
| }, |
| { |
| "epoch": 1.7554388597149289, |
| "grad_norm": 18.62801742553711, |
| "learning_rate": 8.853027473993066e-06, |
| "loss": 0.9192, |
| "step": 4680 |
| }, |
| { |
| "epoch": 1.7591897974493622, |
| "grad_norm": 9.787707328796387, |
| "learning_rate": 8.826353694318486e-06, |
| "loss": 1.0479, |
| "step": 4690 |
| }, |
| { |
| "epoch": 1.762940735183796, |
| "grad_norm": 10.803950309753418, |
| "learning_rate": 8.799679914643906e-06, |
| "loss": 0.7217, |
| "step": 4700 |
| }, |
| { |
| "epoch": 1.7666916729182296, |
| "grad_norm": 5.519962787628174, |
| "learning_rate": 8.773006134969327e-06, |
| "loss": 0.8044, |
| "step": 4710 |
| }, |
| { |
| "epoch": 1.7704426106526632, |
| "grad_norm": 10.77694320678711, |
| "learning_rate": 8.746332355294745e-06, |
| "loss": 0.7759, |
| "step": 4720 |
| }, |
| { |
| "epoch": 1.7741935483870968, |
| "grad_norm": 8.671502113342285, |
| "learning_rate": 8.719658575620166e-06, |
| "loss": 0.834, |
| "step": 4730 |
| }, |
| { |
| "epoch": 1.7779444861215303, |
| "grad_norm": 10.25809097290039, |
| "learning_rate": 8.692984795945586e-06, |
| "loss": 0.7088, |
| "step": 4740 |
| }, |
| { |
| "epoch": 1.7816954238559641, |
| "grad_norm": 11.049978256225586, |
| "learning_rate": 8.666311016271007e-06, |
| "loss": 0.8777, |
| "step": 4750 |
| }, |
| { |
| "epoch": 1.7854463615903975, |
| "grad_norm": 6.090721130371094, |
| "learning_rate": 8.639637236596427e-06, |
| "loss": 0.8311, |
| "step": 4760 |
| }, |
| { |
| "epoch": 1.7891972993248313, |
| "grad_norm": 7.393324375152588, |
| "learning_rate": 8.612963456921846e-06, |
| "loss": 0.7623, |
| "step": 4770 |
| }, |
| { |
| "epoch": 1.7929482370592649, |
| "grad_norm": 9.985932350158691, |
| "learning_rate": 8.586289677247268e-06, |
| "loss": 0.7164, |
| "step": 4780 |
| }, |
| { |
| "epoch": 1.7966991747936985, |
| "grad_norm": 23.15224266052246, |
| "learning_rate": 8.559615897572687e-06, |
| "loss": 0.8139, |
| "step": 4790 |
| }, |
| { |
| "epoch": 1.800450112528132, |
| "grad_norm": 15.539804458618164, |
| "learning_rate": 8.532942117898107e-06, |
| "loss": 0.9038, |
| "step": 4800 |
| }, |
| { |
| "epoch": 1.8042010502625656, |
| "grad_norm": 20.424936294555664, |
| "learning_rate": 8.506268338223526e-06, |
| "loss": 0.9228, |
| "step": 4810 |
| }, |
| { |
| "epoch": 1.8079519879969994, |
| "grad_norm": 12.960927963256836, |
| "learning_rate": 8.479594558548948e-06, |
| "loss": 0.8129, |
| "step": 4820 |
| }, |
| { |
| "epoch": 1.8117029257314328, |
| "grad_norm": 12.578907012939453, |
| "learning_rate": 8.452920778874367e-06, |
| "loss": 0.7919, |
| "step": 4830 |
| }, |
| { |
| "epoch": 1.8154538634658666, |
| "grad_norm": 9.88344955444336, |
| "learning_rate": 8.426246999199787e-06, |
| "loss": 0.8888, |
| "step": 4840 |
| }, |
| { |
| "epoch": 1.8192048012003, |
| "grad_norm": 9.531432151794434, |
| "learning_rate": 8.399573219525208e-06, |
| "loss": 0.8074, |
| "step": 4850 |
| }, |
| { |
| "epoch": 1.8229557389347337, |
| "grad_norm": 10.701923370361328, |
| "learning_rate": 8.372899439850628e-06, |
| "loss": 0.8598, |
| "step": 4860 |
| }, |
| { |
| "epoch": 1.8267066766691673, |
| "grad_norm": 10.894915580749512, |
| "learning_rate": 8.346225660176047e-06, |
| "loss": 0.6588, |
| "step": 4870 |
| }, |
| { |
| "epoch": 1.8304576144036009, |
| "grad_norm": 9.2036714553833, |
| "learning_rate": 8.319551880501467e-06, |
| "loss": 0.8323, |
| "step": 4880 |
| }, |
| { |
| "epoch": 1.8342085521380345, |
| "grad_norm": 8.6634521484375, |
| "learning_rate": 8.292878100826888e-06, |
| "loss": 0.7526, |
| "step": 4890 |
| }, |
| { |
| "epoch": 1.837959489872468, |
| "grad_norm": 14.781025886535645, |
| "learning_rate": 8.266204321152308e-06, |
| "loss": 0.6999, |
| "step": 4900 |
| }, |
| { |
| "epoch": 1.8417104276069018, |
| "grad_norm": 12.273209571838379, |
| "learning_rate": 8.239530541477728e-06, |
| "loss": 0.6734, |
| "step": 4910 |
| }, |
| { |
| "epoch": 1.8454613653413352, |
| "grad_norm": 11.974825859069824, |
| "learning_rate": 8.212856761803149e-06, |
| "loss": 0.7195, |
| "step": 4920 |
| }, |
| { |
| "epoch": 1.849212303075769, |
| "grad_norm": 12.195642471313477, |
| "learning_rate": 8.186182982128569e-06, |
| "loss": 0.8301, |
| "step": 4930 |
| }, |
| { |
| "epoch": 1.8529632408102026, |
| "grad_norm": 6.2414751052856445, |
| "learning_rate": 8.159509202453988e-06, |
| "loss": 0.8528, |
| "step": 4940 |
| }, |
| { |
| "epoch": 1.8567141785446362, |
| "grad_norm": 9.026991844177246, |
| "learning_rate": 8.132835422779408e-06, |
| "loss": 0.8165, |
| "step": 4950 |
| }, |
| { |
| "epoch": 1.8604651162790697, |
| "grad_norm": 13.745824813842773, |
| "learning_rate": 8.106161643104829e-06, |
| "loss": 0.9866, |
| "step": 4960 |
| }, |
| { |
| "epoch": 1.8642160540135033, |
| "grad_norm": 8.861783027648926, |
| "learning_rate": 8.079487863430249e-06, |
| "loss": 0.9738, |
| "step": 4970 |
| }, |
| { |
| "epoch": 1.8679669917479371, |
| "grad_norm": 7.437354564666748, |
| "learning_rate": 8.052814083755668e-06, |
| "loss": 0.7223, |
| "step": 4980 |
| }, |
| { |
| "epoch": 1.8717179294823705, |
| "grad_norm": 14.148890495300293, |
| "learning_rate": 8.02614030408109e-06, |
| "loss": 0.8356, |
| "step": 4990 |
| }, |
| { |
| "epoch": 1.8754688672168043, |
| "grad_norm": 13.688013076782227, |
| "learning_rate": 7.99946652440651e-06, |
| "loss": 0.8949, |
| "step": 5000 |
| }, |
| { |
| "epoch": 1.8792198049512379, |
| "grad_norm": 16.709125518798828, |
| "learning_rate": 7.972792744731929e-06, |
| "loss": 0.8775, |
| "step": 5010 |
| }, |
| { |
| "epoch": 1.8829707426856714, |
| "grad_norm": 9.73661994934082, |
| "learning_rate": 7.946118965057348e-06, |
| "loss": 0.832, |
| "step": 5020 |
| }, |
| { |
| "epoch": 1.886721680420105, |
| "grad_norm": 10.575983047485352, |
| "learning_rate": 7.91944518538277e-06, |
| "loss": 0.7976, |
| "step": 5030 |
| }, |
| { |
| "epoch": 1.8904726181545386, |
| "grad_norm": 9.284303665161133, |
| "learning_rate": 7.89277140570819e-06, |
| "loss": 0.9656, |
| "step": 5040 |
| }, |
| { |
| "epoch": 1.8942235558889724, |
| "grad_norm": 6.543034553527832, |
| "learning_rate": 7.866097626033609e-06, |
| "loss": 0.7157, |
| "step": 5050 |
| }, |
| { |
| "epoch": 1.8979744936234058, |
| "grad_norm": 5.064873218536377, |
| "learning_rate": 7.83942384635903e-06, |
| "loss": 0.7334, |
| "step": 5060 |
| }, |
| { |
| "epoch": 1.9017254313578396, |
| "grad_norm": 16.654563903808594, |
| "learning_rate": 7.81275006668445e-06, |
| "loss": 0.7677, |
| "step": 5070 |
| }, |
| { |
| "epoch": 1.905476369092273, |
| "grad_norm": 20.614212036132812, |
| "learning_rate": 7.78607628700987e-06, |
| "loss": 0.8525, |
| "step": 5080 |
| }, |
| { |
| "epoch": 1.9092273068267067, |
| "grad_norm": 13.709310531616211, |
| "learning_rate": 7.75940250733529e-06, |
| "loss": 0.7237, |
| "step": 5090 |
| }, |
| { |
| "epoch": 1.9129782445611403, |
| "grad_norm": 17.662317276000977, |
| "learning_rate": 7.73272872766071e-06, |
| "loss": 0.8529, |
| "step": 5100 |
| }, |
| { |
| "epoch": 1.9167291822955739, |
| "grad_norm": 9.610177040100098, |
| "learning_rate": 7.70605494798613e-06, |
| "loss": 0.9447, |
| "step": 5110 |
| }, |
| { |
| "epoch": 1.9204801200300075, |
| "grad_norm": 19.19601821899414, |
| "learning_rate": 7.67938116831155e-06, |
| "loss": 0.8738, |
| "step": 5120 |
| }, |
| { |
| "epoch": 1.924231057764441, |
| "grad_norm": 8.228813171386719, |
| "learning_rate": 7.652707388636971e-06, |
| "loss": 0.8096, |
| "step": 5130 |
| }, |
| { |
| "epoch": 1.9279819954988748, |
| "grad_norm": 14.475564956665039, |
| "learning_rate": 7.626033608962391e-06, |
| "loss": 0.7235, |
| "step": 5140 |
| }, |
| { |
| "epoch": 1.9317329332333082, |
| "grad_norm": 17.313648223876953, |
| "learning_rate": 7.599359829287811e-06, |
| "loss": 0.7778, |
| "step": 5150 |
| }, |
| { |
| "epoch": 1.935483870967742, |
| "grad_norm": 6.775811672210693, |
| "learning_rate": 7.572686049613231e-06, |
| "loss": 0.7627, |
| "step": 5160 |
| }, |
| { |
| "epoch": 1.9392348087021756, |
| "grad_norm": 11.815681457519531, |
| "learning_rate": 7.54601226993865e-06, |
| "loss": 0.8978, |
| "step": 5170 |
| }, |
| { |
| "epoch": 1.9429857464366092, |
| "grad_norm": 13.653975486755371, |
| "learning_rate": 7.519338490264071e-06, |
| "loss": 0.7364, |
| "step": 5180 |
| }, |
| { |
| "epoch": 1.9467366841710427, |
| "grad_norm": 9.049905776977539, |
| "learning_rate": 7.492664710589491e-06, |
| "loss": 0.8631, |
| "step": 5190 |
| }, |
| { |
| "epoch": 1.9504876219054763, |
| "grad_norm": 14.149343490600586, |
| "learning_rate": 7.465990930914912e-06, |
| "loss": 0.8279, |
| "step": 5200 |
| }, |
| { |
| "epoch": 1.9542385596399101, |
| "grad_norm": 15.612215995788574, |
| "learning_rate": 7.439317151240331e-06, |
| "loss": 0.9058, |
| "step": 5210 |
| }, |
| { |
| "epoch": 1.9579894973743435, |
| "grad_norm": 11.682372093200684, |
| "learning_rate": 7.412643371565752e-06, |
| "loss": 0.8859, |
| "step": 5220 |
| }, |
| { |
| "epoch": 1.9617404351087773, |
| "grad_norm": 9.87074089050293, |
| "learning_rate": 7.385969591891171e-06, |
| "loss": 0.8733, |
| "step": 5230 |
| }, |
| { |
| "epoch": 1.9654913728432108, |
| "grad_norm": 9.963356971740723, |
| "learning_rate": 7.359295812216591e-06, |
| "loss": 0.7134, |
| "step": 5240 |
| }, |
| { |
| "epoch": 1.9692423105776444, |
| "grad_norm": 4.6800537109375, |
| "learning_rate": 7.3326220325420115e-06, |
| "loss": 0.7594, |
| "step": 5250 |
| }, |
| { |
| "epoch": 1.972993248312078, |
| "grad_norm": 13.148963928222656, |
| "learning_rate": 7.305948252867432e-06, |
| "loss": 0.947, |
| "step": 5260 |
| }, |
| { |
| "epoch": 1.9767441860465116, |
| "grad_norm": 10.073929786682129, |
| "learning_rate": 7.279274473192852e-06, |
| "loss": 0.8769, |
| "step": 5270 |
| }, |
| { |
| "epoch": 1.9804951237809454, |
| "grad_norm": 11.67326831817627, |
| "learning_rate": 7.252600693518272e-06, |
| "loss": 0.7545, |
| "step": 5280 |
| }, |
| { |
| "epoch": 1.9842460615153787, |
| "grad_norm": 7.498824119567871, |
| "learning_rate": 7.2259269138436925e-06, |
| "loss": 0.7997, |
| "step": 5290 |
| }, |
| { |
| "epoch": 1.9879969992498125, |
| "grad_norm": 9.357927322387695, |
| "learning_rate": 7.199253134169112e-06, |
| "loss": 0.8754, |
| "step": 5300 |
| }, |
| { |
| "epoch": 1.991747936984246, |
| "grad_norm": 12.50817584991455, |
| "learning_rate": 7.172579354494532e-06, |
| "loss": 0.79, |
| "step": 5310 |
| }, |
| { |
| "epoch": 1.9954988747186797, |
| "grad_norm": 14.613991737365723, |
| "learning_rate": 7.145905574819952e-06, |
| "loss": 0.8005, |
| "step": 5320 |
| }, |
| { |
| "epoch": 1.9992498124531133, |
| "grad_norm": 9.007129669189453, |
| "learning_rate": 7.119231795145373e-06, |
| "loss": 0.9009, |
| "step": 5330 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_accuracy": 0.580168776371308, |
| "eval_f1_macro": 0.5788189436128865, |
| "eval_f1_weighted": 0.5800618837244829, |
| "eval_loss": 0.9064968228340149, |
| "eval_precision_macro": 0.5789782500874713, |
| "eval_precision_weighted": 0.5804785651892536, |
| "eval_recall_macro": 0.5792145494510413, |
| "eval_recall_weighted": 0.580168776371308, |
| "eval_runtime": 4.8637, |
| "eval_samples_per_second": 487.284, |
| "eval_steps_per_second": 61.065, |
| "step": 5332 |
| }, |
| { |
| "epoch": 2.003000750187547, |
| "grad_norm": 10.368429183959961, |
| "learning_rate": 7.092558015470793e-06, |
| "loss": 0.6842, |
| "step": 5340 |
| }, |
| { |
| "epoch": 2.0067516879219807, |
| "grad_norm": 10.329928398132324, |
| "learning_rate": 7.065884235796214e-06, |
| "loss": 0.7709, |
| "step": 5350 |
| }, |
| { |
| "epoch": 2.010502625656414, |
| "grad_norm": 13.128575325012207, |
| "learning_rate": 7.039210456121633e-06, |
| "loss": 0.7054, |
| "step": 5360 |
| }, |
| { |
| "epoch": 2.014253563390848, |
| "grad_norm": 10.884894371032715, |
| "learning_rate": 7.012536676447053e-06, |
| "loss": 0.8103, |
| "step": 5370 |
| }, |
| { |
| "epoch": 2.018004501125281, |
| "grad_norm": 17.327537536621094, |
| "learning_rate": 6.985862896772473e-06, |
| "loss": 0.6551, |
| "step": 5380 |
| }, |
| { |
| "epoch": 2.021755438859715, |
| "grad_norm": 9.725515365600586, |
| "learning_rate": 6.959189117097893e-06, |
| "loss": 0.6534, |
| "step": 5390 |
| }, |
| { |
| "epoch": 2.0255063765941483, |
| "grad_norm": 9.302525520324707, |
| "learning_rate": 6.932515337423313e-06, |
| "loss": 0.6741, |
| "step": 5400 |
| }, |
| { |
| "epoch": 2.029257314328582, |
| "grad_norm": 12.362338066101074, |
| "learning_rate": 6.905841557748734e-06, |
| "loss": 0.7026, |
| "step": 5410 |
| }, |
| { |
| "epoch": 2.033008252063016, |
| "grad_norm": 7.654306411743164, |
| "learning_rate": 6.879167778074154e-06, |
| "loss": 0.5962, |
| "step": 5420 |
| }, |
| { |
| "epoch": 2.0367591897974493, |
| "grad_norm": 14.547067642211914, |
| "learning_rate": 6.852493998399574e-06, |
| "loss": 0.5578, |
| "step": 5430 |
| }, |
| { |
| "epoch": 2.040510127531883, |
| "grad_norm": 12.792427062988281, |
| "learning_rate": 6.8258202187249935e-06, |
| "loss": 0.7636, |
| "step": 5440 |
| }, |
| { |
| "epoch": 2.0442610652663165, |
| "grad_norm": 8.322968482971191, |
| "learning_rate": 6.799146439050414e-06, |
| "loss": 0.5881, |
| "step": 5450 |
| }, |
| { |
| "epoch": 2.0480120030007503, |
| "grad_norm": 14.064526557922363, |
| "learning_rate": 6.772472659375834e-06, |
| "loss": 0.6907, |
| "step": 5460 |
| }, |
| { |
| "epoch": 2.0517629407351836, |
| "grad_norm": 11.318249702453613, |
| "learning_rate": 6.745798879701254e-06, |
| "loss": 0.6179, |
| "step": 5470 |
| }, |
| { |
| "epoch": 2.0555138784696174, |
| "grad_norm": 7.615289688110352, |
| "learning_rate": 6.7191251000266745e-06, |
| "loss": 0.5912, |
| "step": 5480 |
| }, |
| { |
| "epoch": 2.059264816204051, |
| "grad_norm": 20.249950408935547, |
| "learning_rate": 6.692451320352095e-06, |
| "loss": 0.7777, |
| "step": 5490 |
| }, |
| { |
| "epoch": 2.0630157539384846, |
| "grad_norm": 13.289349555969238, |
| "learning_rate": 6.665777540677515e-06, |
| "loss": 0.6271, |
| "step": 5500 |
| }, |
| { |
| "epoch": 2.0667666916729184, |
| "grad_norm": 14.625772476196289, |
| "learning_rate": 6.639103761002935e-06, |
| "loss": 0.7248, |
| "step": 5510 |
| }, |
| { |
| "epoch": 2.0705176294073517, |
| "grad_norm": 14.428004264831543, |
| "learning_rate": 6.612429981328355e-06, |
| "loss": 0.6791, |
| "step": 5520 |
| }, |
| { |
| "epoch": 2.0742685671417855, |
| "grad_norm": 21.052837371826172, |
| "learning_rate": 6.585756201653774e-06, |
| "loss": 0.6244, |
| "step": 5530 |
| }, |
| { |
| "epoch": 2.078019504876219, |
| "grad_norm": 17.523300170898438, |
| "learning_rate": 6.559082421979195e-06, |
| "loss": 0.6498, |
| "step": 5540 |
| }, |
| { |
| "epoch": 2.0817704426106527, |
| "grad_norm": 9.524145126342773, |
| "learning_rate": 6.532408642304615e-06, |
| "loss": 0.7792, |
| "step": 5550 |
| }, |
| { |
| "epoch": 2.085521380345086, |
| "grad_norm": 14.92676830291748, |
| "learning_rate": 6.505734862630036e-06, |
| "loss": 0.5748, |
| "step": 5560 |
| }, |
| { |
| "epoch": 2.08927231807952, |
| "grad_norm": 18.87467384338379, |
| "learning_rate": 6.479061082955455e-06, |
| "loss": 0.7199, |
| "step": 5570 |
| }, |
| { |
| "epoch": 2.0930232558139537, |
| "grad_norm": 10.356287002563477, |
| "learning_rate": 6.452387303280876e-06, |
| "loss": 0.7016, |
| "step": 5580 |
| }, |
| { |
| "epoch": 2.096774193548387, |
| "grad_norm": 11.189599990844727, |
| "learning_rate": 6.425713523606295e-06, |
| "loss": 0.6511, |
| "step": 5590 |
| }, |
| { |
| "epoch": 2.100525131282821, |
| "grad_norm": 12.267254829406738, |
| "learning_rate": 6.399039743931715e-06, |
| "loss": 0.6421, |
| "step": 5600 |
| }, |
| { |
| "epoch": 2.104276069017254, |
| "grad_norm": 19.524673461914062, |
| "learning_rate": 6.3723659642571354e-06, |
| "loss": 0.6963, |
| "step": 5610 |
| }, |
| { |
| "epoch": 2.108027006751688, |
| "grad_norm": 13.466742515563965, |
| "learning_rate": 6.345692184582556e-06, |
| "loss": 0.6727, |
| "step": 5620 |
| }, |
| { |
| "epoch": 2.1117779444861213, |
| "grad_norm": 20.707855224609375, |
| "learning_rate": 6.319018404907976e-06, |
| "loss": 0.6695, |
| "step": 5630 |
| }, |
| { |
| "epoch": 2.115528882220555, |
| "grad_norm": 15.425350189208984, |
| "learning_rate": 6.292344625233396e-06, |
| "loss": 0.673, |
| "step": 5640 |
| }, |
| { |
| "epoch": 2.119279819954989, |
| "grad_norm": 5.349853038787842, |
| "learning_rate": 6.2656708455588164e-06, |
| "loss": 0.6275, |
| "step": 5650 |
| }, |
| { |
| "epoch": 2.1230307576894223, |
| "grad_norm": 13.552290916442871, |
| "learning_rate": 6.238997065884236e-06, |
| "loss": 0.6945, |
| "step": 5660 |
| }, |
| { |
| "epoch": 2.126781695423856, |
| "grad_norm": 17.840105056762695, |
| "learning_rate": 6.212323286209656e-06, |
| "loss": 0.8054, |
| "step": 5670 |
| }, |
| { |
| "epoch": 2.1305326331582894, |
| "grad_norm": 21.012237548828125, |
| "learning_rate": 6.185649506535076e-06, |
| "loss": 0.7306, |
| "step": 5680 |
| }, |
| { |
| "epoch": 2.1342835708927232, |
| "grad_norm": 13.1303129196167, |
| "learning_rate": 6.158975726860497e-06, |
| "loss": 0.871, |
| "step": 5690 |
| }, |
| { |
| "epoch": 2.1380345086271566, |
| "grad_norm": 11.506791114807129, |
| "learning_rate": 6.132301947185917e-06, |
| "loss": 0.6722, |
| "step": 5700 |
| }, |
| { |
| "epoch": 2.1417854463615904, |
| "grad_norm": 9.709290504455566, |
| "learning_rate": 6.1056281675113375e-06, |
| "loss": 0.6695, |
| "step": 5710 |
| }, |
| { |
| "epoch": 2.145536384096024, |
| "grad_norm": 8.551689147949219, |
| "learning_rate": 6.078954387836757e-06, |
| "loss": 0.7001, |
| "step": 5720 |
| }, |
| { |
| "epoch": 2.1492873218304576, |
| "grad_norm": 12.69763469696045, |
| "learning_rate": 6.052280608162177e-06, |
| "loss": 0.6778, |
| "step": 5730 |
| }, |
| { |
| "epoch": 2.1530382595648914, |
| "grad_norm": 10.49093246459961, |
| "learning_rate": 6.025606828487597e-06, |
| "loss": 0.6671, |
| "step": 5740 |
| }, |
| { |
| "epoch": 2.1567891972993247, |
| "grad_norm": 7.214636325836182, |
| "learning_rate": 5.998933048813017e-06, |
| "loss": 0.616, |
| "step": 5750 |
| }, |
| { |
| "epoch": 2.1605401350337585, |
| "grad_norm": 8.58086109161377, |
| "learning_rate": 5.972259269138437e-06, |
| "loss": 0.6024, |
| "step": 5760 |
| }, |
| { |
| "epoch": 2.164291072768192, |
| "grad_norm": 7.856104373931885, |
| "learning_rate": 5.945585489463858e-06, |
| "loss": 0.6202, |
| "step": 5770 |
| }, |
| { |
| "epoch": 2.1680420105026257, |
| "grad_norm": 6.472407341003418, |
| "learning_rate": 5.918911709789278e-06, |
| "loss": 0.6141, |
| "step": 5780 |
| }, |
| { |
| "epoch": 2.1717929482370595, |
| "grad_norm": 6.612668991088867, |
| "learning_rate": 5.892237930114698e-06, |
| "loss": 0.7841, |
| "step": 5790 |
| }, |
| { |
| "epoch": 2.175543885971493, |
| "grad_norm": 9.869592666625977, |
| "learning_rate": 5.865564150440118e-06, |
| "loss": 0.5949, |
| "step": 5800 |
| }, |
| { |
| "epoch": 2.1792948237059266, |
| "grad_norm": 12.85415267944336, |
| "learning_rate": 5.838890370765538e-06, |
| "loss": 0.663, |
| "step": 5810 |
| }, |
| { |
| "epoch": 2.18304576144036, |
| "grad_norm": 22.380807876586914, |
| "learning_rate": 5.8122165910909575e-06, |
| "loss": 0.6532, |
| "step": 5820 |
| }, |
| { |
| "epoch": 2.186796699174794, |
| "grad_norm": 23.866607666015625, |
| "learning_rate": 5.785542811416378e-06, |
| "loss": 0.6704, |
| "step": 5830 |
| }, |
| { |
| "epoch": 2.190547636909227, |
| "grad_norm": 12.608299255371094, |
| "learning_rate": 5.7588690317417985e-06, |
| "loss": 0.6231, |
| "step": 5840 |
| }, |
| { |
| "epoch": 2.194298574643661, |
| "grad_norm": 27.60419464111328, |
| "learning_rate": 5.732195252067219e-06, |
| "loss": 0.6369, |
| "step": 5850 |
| }, |
| { |
| "epoch": 2.1980495123780943, |
| "grad_norm": 10.39966869354248, |
| "learning_rate": 5.7055214723926385e-06, |
| "loss": 0.5964, |
| "step": 5860 |
| }, |
| { |
| "epoch": 2.201800450112528, |
| "grad_norm": 23.611059188842773, |
| "learning_rate": 5.678847692718059e-06, |
| "loss": 0.7365, |
| "step": 5870 |
| }, |
| { |
| "epoch": 2.205551387846962, |
| "grad_norm": 10.59642505645752, |
| "learning_rate": 5.652173913043479e-06, |
| "loss": 0.6305, |
| "step": 5880 |
| }, |
| { |
| "epoch": 2.2093023255813953, |
| "grad_norm": 15.549806594848633, |
| "learning_rate": 5.625500133368898e-06, |
| "loss": 0.624, |
| "step": 5890 |
| }, |
| { |
| "epoch": 2.213053263315829, |
| "grad_norm": 17.546363830566406, |
| "learning_rate": 5.598826353694319e-06, |
| "loss": 0.7103, |
| "step": 5900 |
| }, |
| { |
| "epoch": 2.2168042010502624, |
| "grad_norm": 19.833606719970703, |
| "learning_rate": 5.572152574019739e-06, |
| "loss": 0.4821, |
| "step": 5910 |
| }, |
| { |
| "epoch": 2.2205551387846962, |
| "grad_norm": 18.05365562438965, |
| "learning_rate": 5.54547879434516e-06, |
| "loss": 0.6953, |
| "step": 5920 |
| }, |
| { |
| "epoch": 2.2243060765191296, |
| "grad_norm": 3.1533432006835938, |
| "learning_rate": 5.518805014670579e-06, |
| "loss": 0.6899, |
| "step": 5930 |
| }, |
| { |
| "epoch": 2.2280570142535634, |
| "grad_norm": 21.84452247619629, |
| "learning_rate": 5.492131234996e-06, |
| "loss": 0.8146, |
| "step": 5940 |
| }, |
| { |
| "epoch": 2.231807951987997, |
| "grad_norm": 20.791135787963867, |
| "learning_rate": 5.465457455321419e-06, |
| "loss": 0.4915, |
| "step": 5950 |
| }, |
| { |
| "epoch": 2.2355588897224306, |
| "grad_norm": 16.44775390625, |
| "learning_rate": 5.438783675646839e-06, |
| "loss": 0.5946, |
| "step": 5960 |
| }, |
| { |
| "epoch": 2.2393098274568644, |
| "grad_norm": 8.386981964111328, |
| "learning_rate": 5.412109895972259e-06, |
| "loss": 0.7348, |
| "step": 5970 |
| }, |
| { |
| "epoch": 2.2430607651912977, |
| "grad_norm": 26.47071075439453, |
| "learning_rate": 5.38543611629768e-06, |
| "loss": 0.6261, |
| "step": 5980 |
| }, |
| { |
| "epoch": 2.2468117029257315, |
| "grad_norm": 11.219141960144043, |
| "learning_rate": 5.3587623366231e-06, |
| "loss": 0.5324, |
| "step": 5990 |
| }, |
| { |
| "epoch": 2.250562640660165, |
| "grad_norm": 15.969422340393066, |
| "learning_rate": 5.33208855694852e-06, |
| "loss": 0.7459, |
| "step": 6000 |
| }, |
| { |
| "epoch": 2.2543135783945987, |
| "grad_norm": 20.990497589111328, |
| "learning_rate": 5.30541477727394e-06, |
| "loss": 0.5593, |
| "step": 6010 |
| }, |
| { |
| "epoch": 2.258064516129032, |
| "grad_norm": 10.82603645324707, |
| "learning_rate": 5.27874099759936e-06, |
| "loss": 0.6698, |
| "step": 6020 |
| }, |
| { |
| "epoch": 2.261815453863466, |
| "grad_norm": 19.865243911743164, |
| "learning_rate": 5.25206721792478e-06, |
| "loss": 0.732, |
| "step": 6030 |
| }, |
| { |
| "epoch": 2.2655663915978996, |
| "grad_norm": 25.37660026550293, |
| "learning_rate": 5.2253934382502e-06, |
| "loss": 0.5585, |
| "step": 6040 |
| }, |
| { |
| "epoch": 2.269317329332333, |
| "grad_norm": 19.796749114990234, |
| "learning_rate": 5.1987196585756205e-06, |
| "loss": 0.7108, |
| "step": 6050 |
| }, |
| { |
| "epoch": 2.273068267066767, |
| "grad_norm": 12.207030296325684, |
| "learning_rate": 5.172045878901041e-06, |
| "loss": 0.6683, |
| "step": 6060 |
| }, |
| { |
| "epoch": 2.2768192048012, |
| "grad_norm": 20.979265213012695, |
| "learning_rate": 5.1453720992264615e-06, |
| "loss": 0.6962, |
| "step": 6070 |
| }, |
| { |
| "epoch": 2.280570142535634, |
| "grad_norm": 13.058587074279785, |
| "learning_rate": 5.118698319551881e-06, |
| "loss": 0.6119, |
| "step": 6080 |
| }, |
| { |
| "epoch": 2.2843210802700673, |
| "grad_norm": 7.18276309967041, |
| "learning_rate": 5.092024539877301e-06, |
| "loss": 0.606, |
| "step": 6090 |
| }, |
| { |
| "epoch": 2.288072018004501, |
| "grad_norm": 21.568151473999023, |
| "learning_rate": 5.065350760202721e-06, |
| "loss": 0.6909, |
| "step": 6100 |
| }, |
| { |
| "epoch": 2.291822955738935, |
| "grad_norm": 28.49129867553711, |
| "learning_rate": 5.038676980528141e-06, |
| "loss": 0.6764, |
| "step": 6110 |
| }, |
| { |
| "epoch": 2.2955738934733683, |
| "grad_norm": 12.39367389678955, |
| "learning_rate": 5.012003200853561e-06, |
| "loss": 0.753, |
| "step": 6120 |
| }, |
| { |
| "epoch": 2.299324831207802, |
| "grad_norm": 17.55943489074707, |
| "learning_rate": 4.985329421178982e-06, |
| "loss": 0.7103, |
| "step": 6130 |
| }, |
| { |
| "epoch": 2.3030757689422354, |
| "grad_norm": 16.813745498657227, |
| "learning_rate": 4.958655641504402e-06, |
| "loss": 0.645, |
| "step": 6140 |
| }, |
| { |
| "epoch": 2.3068267066766692, |
| "grad_norm": 20.711591720581055, |
| "learning_rate": 4.931981861829822e-06, |
| "loss": 0.6337, |
| "step": 6150 |
| }, |
| { |
| "epoch": 2.3105776444111026, |
| "grad_norm": 5.449891567230225, |
| "learning_rate": 4.905308082155241e-06, |
| "loss": 0.6224, |
| "step": 6160 |
| }, |
| { |
| "epoch": 2.3143285821455364, |
| "grad_norm": 15.508672714233398, |
| "learning_rate": 4.878634302480662e-06, |
| "loss": 0.6718, |
| "step": 6170 |
| }, |
| { |
| "epoch": 2.31807951987997, |
| "grad_norm": 12.16860294342041, |
| "learning_rate": 4.8519605228060815e-06, |
| "loss": 0.6044, |
| "step": 6180 |
| }, |
| { |
| "epoch": 2.3218304576144035, |
| "grad_norm": 16.671234130859375, |
| "learning_rate": 4.825286743131502e-06, |
| "loss": 0.7397, |
| "step": 6190 |
| }, |
| { |
| "epoch": 2.3255813953488373, |
| "grad_norm": 27.95615577697754, |
| "learning_rate": 4.798612963456922e-06, |
| "loss": 0.6451, |
| "step": 6200 |
| }, |
| { |
| "epoch": 2.3293323330832707, |
| "grad_norm": 23.62805938720703, |
| "learning_rate": 4.771939183782343e-06, |
| "loss": 0.7978, |
| "step": 6210 |
| }, |
| { |
| "epoch": 2.3330832708177045, |
| "grad_norm": 17.226280212402344, |
| "learning_rate": 4.7452654041077625e-06, |
| "loss": 0.6442, |
| "step": 6220 |
| }, |
| { |
| "epoch": 2.336834208552138, |
| "grad_norm": 22.371273040771484, |
| "learning_rate": 4.718591624433183e-06, |
| "loss": 0.6885, |
| "step": 6230 |
| }, |
| { |
| "epoch": 2.3405851462865717, |
| "grad_norm": 12.560019493103027, |
| "learning_rate": 4.6919178447586026e-06, |
| "loss": 0.6033, |
| "step": 6240 |
| }, |
| { |
| "epoch": 2.3443360840210055, |
| "grad_norm": 14.103109359741211, |
| "learning_rate": 4.665244065084023e-06, |
| "loss": 0.6683, |
| "step": 6250 |
| }, |
| { |
| "epoch": 2.348087021755439, |
| "grad_norm": 11.051913261413574, |
| "learning_rate": 4.638570285409443e-06, |
| "loss": 0.7092, |
| "step": 6260 |
| }, |
| { |
| "epoch": 2.3518379594898726, |
| "grad_norm": 15.613760948181152, |
| "learning_rate": 4.611896505734863e-06, |
| "loss": 0.6974, |
| "step": 6270 |
| }, |
| { |
| "epoch": 2.355588897224306, |
| "grad_norm": 19.85428237915039, |
| "learning_rate": 4.5852227260602836e-06, |
| "loss": 0.6637, |
| "step": 6280 |
| }, |
| { |
| "epoch": 2.35933983495874, |
| "grad_norm": 15.703207015991211, |
| "learning_rate": 4.558548946385703e-06, |
| "loss": 0.6508, |
| "step": 6290 |
| }, |
| { |
| "epoch": 2.363090772693173, |
| "grad_norm": 11.342123985290527, |
| "learning_rate": 4.531875166711124e-06, |
| "loss": 0.7348, |
| "step": 6300 |
| }, |
| { |
| "epoch": 2.366841710427607, |
| "grad_norm": 11.049941062927246, |
| "learning_rate": 4.505201387036543e-06, |
| "loss": 0.6421, |
| "step": 6310 |
| }, |
| { |
| "epoch": 2.3705926481620407, |
| "grad_norm": 24.488731384277344, |
| "learning_rate": 4.478527607361964e-06, |
| "loss": 0.7123, |
| "step": 6320 |
| }, |
| { |
| "epoch": 2.374343585896474, |
| "grad_norm": 14.967778205871582, |
| "learning_rate": 4.451853827687383e-06, |
| "loss": 0.7142, |
| "step": 6330 |
| }, |
| { |
| "epoch": 2.378094523630908, |
| "grad_norm": 9.328021049499512, |
| "learning_rate": 4.425180048012804e-06, |
| "loss": 0.6251, |
| "step": 6340 |
| }, |
| { |
| "epoch": 2.3818454613653413, |
| "grad_norm": 17.42303466796875, |
| "learning_rate": 4.398506268338224e-06, |
| "loss": 0.6355, |
| "step": 6350 |
| }, |
| { |
| "epoch": 2.385596399099775, |
| "grad_norm": 15.201652526855469, |
| "learning_rate": 4.371832488663644e-06, |
| "loss": 0.7441, |
| "step": 6360 |
| }, |
| { |
| "epoch": 2.3893473368342084, |
| "grad_norm": 23.0561466217041, |
| "learning_rate": 4.345158708989064e-06, |
| "loss": 0.6641, |
| "step": 6370 |
| }, |
| { |
| "epoch": 2.393098274568642, |
| "grad_norm": 14.52270221710205, |
| "learning_rate": 4.318484929314484e-06, |
| "loss": 0.7073, |
| "step": 6380 |
| }, |
| { |
| "epoch": 2.396849212303076, |
| "grad_norm": 13.747902870178223, |
| "learning_rate": 4.291811149639904e-06, |
| "loss": 0.81, |
| "step": 6390 |
| }, |
| { |
| "epoch": 2.4006001500375094, |
| "grad_norm": 14.231673240661621, |
| "learning_rate": 4.265137369965324e-06, |
| "loss": 0.6939, |
| "step": 6400 |
| }, |
| { |
| "epoch": 2.404351087771943, |
| "grad_norm": 7.63701057434082, |
| "learning_rate": 4.2384635902907445e-06, |
| "loss": 0.6873, |
| "step": 6410 |
| }, |
| { |
| "epoch": 2.4081020255063765, |
| "grad_norm": 20.752126693725586, |
| "learning_rate": 4.211789810616165e-06, |
| "loss": 0.571, |
| "step": 6420 |
| }, |
| { |
| "epoch": 2.4118529632408103, |
| "grad_norm": 13.460418701171875, |
| "learning_rate": 4.185116030941585e-06, |
| "loss": 0.6506, |
| "step": 6430 |
| }, |
| { |
| "epoch": 2.4156039009752437, |
| "grad_norm": 8.838345527648926, |
| "learning_rate": 4.158442251267005e-06, |
| "loss": 0.5745, |
| "step": 6440 |
| }, |
| { |
| "epoch": 2.4193548387096775, |
| "grad_norm": 10.570659637451172, |
| "learning_rate": 4.131768471592425e-06, |
| "loss": 0.6607, |
| "step": 6450 |
| }, |
| { |
| "epoch": 2.423105776444111, |
| "grad_norm": 12.49052619934082, |
| "learning_rate": 4.105094691917845e-06, |
| "loss": 0.5026, |
| "step": 6460 |
| }, |
| { |
| "epoch": 2.4268567141785446, |
| "grad_norm": 9.46437931060791, |
| "learning_rate": 4.078420912243265e-06, |
| "loss": 0.6005, |
| "step": 6470 |
| }, |
| { |
| "epoch": 2.430607651912978, |
| "grad_norm": 29.9566593170166, |
| "learning_rate": 4.051747132568685e-06, |
| "loss": 0.6292, |
| "step": 6480 |
| }, |
| { |
| "epoch": 2.434358589647412, |
| "grad_norm": 12.318580627441406, |
| "learning_rate": 4.025073352894106e-06, |
| "loss": 0.7056, |
| "step": 6490 |
| }, |
| { |
| "epoch": 2.4381095273818456, |
| "grad_norm": 20.635848999023438, |
| "learning_rate": 3.998399573219526e-06, |
| "loss": 0.6663, |
| "step": 6500 |
| }, |
| { |
| "epoch": 2.441860465116279, |
| "grad_norm": 13.231310844421387, |
| "learning_rate": 3.971725793544946e-06, |
| "loss": 0.7405, |
| "step": 6510 |
| }, |
| { |
| "epoch": 2.4456114028507128, |
| "grad_norm": 16.560197830200195, |
| "learning_rate": 3.945052013870365e-06, |
| "loss": 0.6678, |
| "step": 6520 |
| }, |
| { |
| "epoch": 2.449362340585146, |
| "grad_norm": 21.45167350769043, |
| "learning_rate": 3.918378234195786e-06, |
| "loss": 0.6032, |
| "step": 6530 |
| }, |
| { |
| "epoch": 2.45311327831958, |
| "grad_norm": 37.360843658447266, |
| "learning_rate": 3.891704454521205e-06, |
| "loss": 0.8438, |
| "step": 6540 |
| }, |
| { |
| "epoch": 2.4568642160540133, |
| "grad_norm": 30.98585319519043, |
| "learning_rate": 3.865030674846626e-06, |
| "loss": 0.6035, |
| "step": 6550 |
| }, |
| { |
| "epoch": 2.460615153788447, |
| "grad_norm": 13.408466339111328, |
| "learning_rate": 3.838356895172046e-06, |
| "loss": 0.5181, |
| "step": 6560 |
| }, |
| { |
| "epoch": 2.464366091522881, |
| "grad_norm": 16.84627914428711, |
| "learning_rate": 3.8116831154974664e-06, |
| "loss": 0.6353, |
| "step": 6570 |
| }, |
| { |
| "epoch": 2.4681170292573142, |
| "grad_norm": 19.02153968811035, |
| "learning_rate": 3.785009335822886e-06, |
| "loss": 0.6052, |
| "step": 6580 |
| }, |
| { |
| "epoch": 2.471867966991748, |
| "grad_norm": 13.263850212097168, |
| "learning_rate": 3.7583355561483065e-06, |
| "loss": 0.8126, |
| "step": 6590 |
| }, |
| { |
| "epoch": 2.4756189047261814, |
| "grad_norm": 22.753215789794922, |
| "learning_rate": 3.731661776473727e-06, |
| "loss": 0.6449, |
| "step": 6600 |
| }, |
| { |
| "epoch": 2.479369842460615, |
| "grad_norm": 13.979212760925293, |
| "learning_rate": 3.704987996799147e-06, |
| "loss": 0.7421, |
| "step": 6610 |
| }, |
| { |
| "epoch": 2.4831207801950486, |
| "grad_norm": 23.614389419555664, |
| "learning_rate": 3.6783142171245666e-06, |
| "loss": 0.8168, |
| "step": 6620 |
| }, |
| { |
| "epoch": 2.4868717179294824, |
| "grad_norm": 7.810019493103027, |
| "learning_rate": 3.651640437449987e-06, |
| "loss": 0.6301, |
| "step": 6630 |
| }, |
| { |
| "epoch": 2.490622655663916, |
| "grad_norm": 17.90605926513672, |
| "learning_rate": 3.624966657775407e-06, |
| "loss": 0.6369, |
| "step": 6640 |
| }, |
| { |
| "epoch": 2.4943735933983495, |
| "grad_norm": 10.375251770019531, |
| "learning_rate": 3.598292878100827e-06, |
| "loss": 0.6254, |
| "step": 6650 |
| }, |
| { |
| "epoch": 2.4981245311327833, |
| "grad_norm": 15.813028335571289, |
| "learning_rate": 3.571619098426247e-06, |
| "loss": 0.7866, |
| "step": 6660 |
| }, |
| { |
| "epoch": 2.5018754688672167, |
| "grad_norm": 8.438957214355469, |
| "learning_rate": 3.5449453187516676e-06, |
| "loss": 0.7288, |
| "step": 6670 |
| }, |
| { |
| "epoch": 2.5056264066016505, |
| "grad_norm": 23.076040267944336, |
| "learning_rate": 3.5182715390770877e-06, |
| "loss": 0.6743, |
| "step": 6680 |
| }, |
| { |
| "epoch": 2.509377344336084, |
| "grad_norm": 14.966166496276855, |
| "learning_rate": 3.4915977594025073e-06, |
| "loss": 0.6408, |
| "step": 6690 |
| }, |
| { |
| "epoch": 2.5131282820705176, |
| "grad_norm": 19.553081512451172, |
| "learning_rate": 3.4649239797279277e-06, |
| "loss": 0.613, |
| "step": 6700 |
| }, |
| { |
| "epoch": 2.5168792198049514, |
| "grad_norm": 12.050764083862305, |
| "learning_rate": 3.4382502000533478e-06, |
| "loss": 0.6547, |
| "step": 6710 |
| }, |
| { |
| "epoch": 2.520630157539385, |
| "grad_norm": 14.52085018157959, |
| "learning_rate": 3.411576420378768e-06, |
| "loss": 0.7239, |
| "step": 6720 |
| }, |
| { |
| "epoch": 2.5243810952738186, |
| "grad_norm": 20.222137451171875, |
| "learning_rate": 3.384902640704188e-06, |
| "loss": 0.7656, |
| "step": 6730 |
| }, |
| { |
| "epoch": 2.528132033008252, |
| "grad_norm": 14.729280471801758, |
| "learning_rate": 3.3582288610296083e-06, |
| "loss": 0.6013, |
| "step": 6740 |
| }, |
| { |
| "epoch": 2.5318829707426858, |
| "grad_norm": 21.984832763671875, |
| "learning_rate": 3.3315550813550284e-06, |
| "loss": 0.6453, |
| "step": 6750 |
| }, |
| { |
| "epoch": 2.535633908477119, |
| "grad_norm": 19.643138885498047, |
| "learning_rate": 3.304881301680448e-06, |
| "loss": 0.8221, |
| "step": 6760 |
| }, |
| { |
| "epoch": 2.539384846211553, |
| "grad_norm": 17.281740188598633, |
| "learning_rate": 3.2782075220058684e-06, |
| "loss": 0.6348, |
| "step": 6770 |
| }, |
| { |
| "epoch": 2.5431357839459867, |
| "grad_norm": 17.821035385131836, |
| "learning_rate": 3.251533742331289e-06, |
| "loss": 0.5855, |
| "step": 6780 |
| }, |
| { |
| "epoch": 2.54688672168042, |
| "grad_norm": 14.015131950378418, |
| "learning_rate": 3.224859962656709e-06, |
| "loss": 0.5544, |
| "step": 6790 |
| }, |
| { |
| "epoch": 2.550637659414854, |
| "grad_norm": 10.391494750976562, |
| "learning_rate": 3.1981861829821286e-06, |
| "loss": 0.588, |
| "step": 6800 |
| }, |
| { |
| "epoch": 2.5543885971492872, |
| "grad_norm": 14.990039825439453, |
| "learning_rate": 3.171512403307549e-06, |
| "loss": 0.5782, |
| "step": 6810 |
| }, |
| { |
| "epoch": 2.558139534883721, |
| "grad_norm": 13.448775291442871, |
| "learning_rate": 3.144838623632969e-06, |
| "loss": 0.8525, |
| "step": 6820 |
| }, |
| { |
| "epoch": 2.5618904726181544, |
| "grad_norm": 13.461121559143066, |
| "learning_rate": 3.118164843958389e-06, |
| "loss": 0.6256, |
| "step": 6830 |
| }, |
| { |
| "epoch": 2.565641410352588, |
| "grad_norm": 13.295988082885742, |
| "learning_rate": 3.091491064283809e-06, |
| "loss": 0.7059, |
| "step": 6840 |
| }, |
| { |
| "epoch": 2.569392348087022, |
| "grad_norm": 14.871612548828125, |
| "learning_rate": 3.0648172846092296e-06, |
| "loss": 0.6338, |
| "step": 6850 |
| }, |
| { |
| "epoch": 2.5731432858214554, |
| "grad_norm": 30.46957778930664, |
| "learning_rate": 3.0381435049346496e-06, |
| "loss": 0.7072, |
| "step": 6860 |
| }, |
| { |
| "epoch": 2.5768942235558887, |
| "grad_norm": 20.661733627319336, |
| "learning_rate": 3.0114697252600693e-06, |
| "loss": 0.6634, |
| "step": 6870 |
| }, |
| { |
| "epoch": 2.5806451612903225, |
| "grad_norm": 10.35488224029541, |
| "learning_rate": 2.9847959455854897e-06, |
| "loss": 0.5709, |
| "step": 6880 |
| }, |
| { |
| "epoch": 2.5843960990247563, |
| "grad_norm": 22.383169174194336, |
| "learning_rate": 2.9581221659109098e-06, |
| "loss": 0.433, |
| "step": 6890 |
| }, |
| { |
| "epoch": 2.5881470367591897, |
| "grad_norm": 21.173015594482422, |
| "learning_rate": 2.93144838623633e-06, |
| "loss": 0.6109, |
| "step": 6900 |
| }, |
| { |
| "epoch": 2.5918979744936235, |
| "grad_norm": 25.366735458374023, |
| "learning_rate": 2.90477460656175e-06, |
| "loss": 0.8177, |
| "step": 6910 |
| }, |
| { |
| "epoch": 2.5956489122280573, |
| "grad_norm": 18.91875457763672, |
| "learning_rate": 2.8781008268871703e-06, |
| "loss": 0.7214, |
| "step": 6920 |
| }, |
| { |
| "epoch": 2.5993998499624906, |
| "grad_norm": 12.457830429077148, |
| "learning_rate": 2.8514270472125903e-06, |
| "loss": 0.553, |
| "step": 6930 |
| }, |
| { |
| "epoch": 2.603150787696924, |
| "grad_norm": 6.222160816192627, |
| "learning_rate": 2.82475326753801e-06, |
| "loss": 0.679, |
| "step": 6940 |
| }, |
| { |
| "epoch": 2.606901725431358, |
| "grad_norm": 8.99958324432373, |
| "learning_rate": 2.7980794878634304e-06, |
| "loss": 0.5929, |
| "step": 6950 |
| }, |
| { |
| "epoch": 2.6106526631657916, |
| "grad_norm": 11.063492774963379, |
| "learning_rate": 2.771405708188851e-06, |
| "loss": 0.5185, |
| "step": 6960 |
| }, |
| { |
| "epoch": 2.614403600900225, |
| "grad_norm": 10.320928573608398, |
| "learning_rate": 2.744731928514271e-06, |
| "loss": 0.5286, |
| "step": 6970 |
| }, |
| { |
| "epoch": 2.6181545386346587, |
| "grad_norm": 13.718670845031738, |
| "learning_rate": 2.7180581488396905e-06, |
| "loss": 0.6508, |
| "step": 6980 |
| }, |
| { |
| "epoch": 2.6219054763690925, |
| "grad_norm": 10.613819122314453, |
| "learning_rate": 2.691384369165111e-06, |
| "loss": 0.5805, |
| "step": 6990 |
| }, |
| { |
| "epoch": 2.625656414103526, |
| "grad_norm": 22.765199661254883, |
| "learning_rate": 2.664710589490531e-06, |
| "loss": 0.6691, |
| "step": 7000 |
| }, |
| { |
| "epoch": 2.6294073518379593, |
| "grad_norm": 12.34518051147461, |
| "learning_rate": 2.638036809815951e-06, |
| "loss": 0.6577, |
| "step": 7010 |
| }, |
| { |
| "epoch": 2.633158289572393, |
| "grad_norm": 15.861391067504883, |
| "learning_rate": 2.611363030141371e-06, |
| "loss": 0.5159, |
| "step": 7020 |
| }, |
| { |
| "epoch": 2.636909227306827, |
| "grad_norm": 7.271751880645752, |
| "learning_rate": 2.5846892504667916e-06, |
| "loss": 0.6844, |
| "step": 7030 |
| }, |
| { |
| "epoch": 2.64066016504126, |
| "grad_norm": 20.930856704711914, |
| "learning_rate": 2.5580154707922116e-06, |
| "loss": 0.7827, |
| "step": 7040 |
| }, |
| { |
| "epoch": 2.644411102775694, |
| "grad_norm": 28.042675018310547, |
| "learning_rate": 2.5313416911176312e-06, |
| "loss": 0.6383, |
| "step": 7050 |
| }, |
| { |
| "epoch": 2.6481620405101274, |
| "grad_norm": 25.815296173095703, |
| "learning_rate": 2.5046679114430517e-06, |
| "loss": 0.6866, |
| "step": 7060 |
| }, |
| { |
| "epoch": 2.651912978244561, |
| "grad_norm": 16.492206573486328, |
| "learning_rate": 2.4779941317684717e-06, |
| "loss": 0.5342, |
| "step": 7070 |
| }, |
| { |
| "epoch": 2.6556639159789945, |
| "grad_norm": 23.266910552978516, |
| "learning_rate": 2.4513203520938918e-06, |
| "loss": 0.5564, |
| "step": 7080 |
| }, |
| { |
| "epoch": 2.6594148537134283, |
| "grad_norm": 11.591928482055664, |
| "learning_rate": 2.424646572419312e-06, |
| "loss": 0.573, |
| "step": 7090 |
| }, |
| { |
| "epoch": 2.663165791447862, |
| "grad_norm": 14.71267032623291, |
| "learning_rate": 2.3979727927447323e-06, |
| "loss": 0.6456, |
| "step": 7100 |
| }, |
| { |
| "epoch": 2.6669167291822955, |
| "grad_norm": 7.238256454467773, |
| "learning_rate": 2.3712990130701523e-06, |
| "loss": 0.7731, |
| "step": 7110 |
| }, |
| { |
| "epoch": 2.6706676669167293, |
| "grad_norm": 38.71699523925781, |
| "learning_rate": 2.3446252333955723e-06, |
| "loss": 0.7189, |
| "step": 7120 |
| }, |
| { |
| "epoch": 2.6744186046511627, |
| "grad_norm": 24.029537200927734, |
| "learning_rate": 2.3179514537209924e-06, |
| "loss": 0.7435, |
| "step": 7130 |
| }, |
| { |
| "epoch": 2.6781695423855965, |
| "grad_norm": 17.704763412475586, |
| "learning_rate": 2.291277674046413e-06, |
| "loss": 0.838, |
| "step": 7140 |
| }, |
| { |
| "epoch": 2.68192048012003, |
| "grad_norm": 36.12045669555664, |
| "learning_rate": 2.2646038943718325e-06, |
| "loss": 0.7103, |
| "step": 7150 |
| }, |
| { |
| "epoch": 2.6856714178544636, |
| "grad_norm": 20.062591552734375, |
| "learning_rate": 2.237930114697253e-06, |
| "loss": 0.5748, |
| "step": 7160 |
| }, |
| { |
| "epoch": 2.6894223555888974, |
| "grad_norm": 9.567973136901855, |
| "learning_rate": 2.211256335022673e-06, |
| "loss": 0.7598, |
| "step": 7170 |
| }, |
| { |
| "epoch": 2.6931732933233308, |
| "grad_norm": 19.337631225585938, |
| "learning_rate": 2.184582555348093e-06, |
| "loss": 0.5945, |
| "step": 7180 |
| }, |
| { |
| "epoch": 2.6969242310577646, |
| "grad_norm": 11.189875602722168, |
| "learning_rate": 2.157908775673513e-06, |
| "loss": 0.77, |
| "step": 7190 |
| }, |
| { |
| "epoch": 2.700675168792198, |
| "grad_norm": 16.071062088012695, |
| "learning_rate": 2.131234995998933e-06, |
| "loss": 0.6758, |
| "step": 7200 |
| }, |
| { |
| "epoch": 2.7044261065266317, |
| "grad_norm": 11.37120532989502, |
| "learning_rate": 2.1045612163243535e-06, |
| "loss": 0.5912, |
| "step": 7210 |
| }, |
| { |
| "epoch": 2.708177044261065, |
| "grad_norm": 25.354324340820312, |
| "learning_rate": 2.0778874366497736e-06, |
| "loss": 0.6741, |
| "step": 7220 |
| }, |
| { |
| "epoch": 2.711927981995499, |
| "grad_norm": 11.246193885803223, |
| "learning_rate": 2.0512136569751936e-06, |
| "loss": 0.6073, |
| "step": 7230 |
| }, |
| { |
| "epoch": 2.7156789197299327, |
| "grad_norm": 9.01452350616455, |
| "learning_rate": 2.0245398773006137e-06, |
| "loss": 0.7363, |
| "step": 7240 |
| }, |
| { |
| "epoch": 2.719429857464366, |
| "grad_norm": 22.3641414642334, |
| "learning_rate": 1.9978660976260337e-06, |
| "loss": 0.6278, |
| "step": 7250 |
| }, |
| { |
| "epoch": 2.7231807951988, |
| "grad_norm": 14.206088066101074, |
| "learning_rate": 1.9711923179514537e-06, |
| "loss": 0.6676, |
| "step": 7260 |
| }, |
| { |
| "epoch": 2.726931732933233, |
| "grad_norm": 14.623751640319824, |
| "learning_rate": 1.9445185382768738e-06, |
| "loss": 0.6629, |
| "step": 7270 |
| }, |
| { |
| "epoch": 2.730682670667667, |
| "grad_norm": 15.682950019836426, |
| "learning_rate": 1.9178447586022942e-06, |
| "loss": 0.8008, |
| "step": 7280 |
| }, |
| { |
| "epoch": 2.7344336084021004, |
| "grad_norm": 16.56915855407715, |
| "learning_rate": 1.891170978927714e-06, |
| "loss": 0.8421, |
| "step": 7290 |
| }, |
| { |
| "epoch": 2.738184546136534, |
| "grad_norm": 20.514009475708008, |
| "learning_rate": 1.8644971992531343e-06, |
| "loss": 0.6755, |
| "step": 7300 |
| }, |
| { |
| "epoch": 2.741935483870968, |
| "grad_norm": 15.838664054870605, |
| "learning_rate": 1.8378234195785544e-06, |
| "loss": 0.6463, |
| "step": 7310 |
| }, |
| { |
| "epoch": 2.7456864216054013, |
| "grad_norm": 30.3530330657959, |
| "learning_rate": 1.8111496399039746e-06, |
| "loss": 0.6295, |
| "step": 7320 |
| }, |
| { |
| "epoch": 2.7494373593398347, |
| "grad_norm": 8.959320068359375, |
| "learning_rate": 1.7844758602293946e-06, |
| "loss": 0.6443, |
| "step": 7330 |
| }, |
| { |
| "epoch": 2.7531882970742685, |
| "grad_norm": 11.156110763549805, |
| "learning_rate": 1.757802080554815e-06, |
| "loss": 0.5971, |
| "step": 7340 |
| }, |
| { |
| "epoch": 2.7569392348087023, |
| "grad_norm": 21.744304656982422, |
| "learning_rate": 1.731128300880235e-06, |
| "loss": 0.5818, |
| "step": 7350 |
| }, |
| { |
| "epoch": 2.7606901725431356, |
| "grad_norm": 23.995845794677734, |
| "learning_rate": 1.7044545212056548e-06, |
| "loss": 0.6885, |
| "step": 7360 |
| }, |
| { |
| "epoch": 2.7644411102775694, |
| "grad_norm": 13.629135131835938, |
| "learning_rate": 1.677780741531075e-06, |
| "loss": 0.771, |
| "step": 7370 |
| }, |
| { |
| "epoch": 2.7681920480120032, |
| "grad_norm": 6.805270671844482, |
| "learning_rate": 1.651106961856495e-06, |
| "loss": 0.687, |
| "step": 7380 |
| }, |
| { |
| "epoch": 2.7719429857464366, |
| "grad_norm": 21.93046760559082, |
| "learning_rate": 1.6244331821819153e-06, |
| "loss": 0.5681, |
| "step": 7390 |
| }, |
| { |
| "epoch": 2.77569392348087, |
| "grad_norm": 22.271133422851562, |
| "learning_rate": 1.5977594025073353e-06, |
| "loss": 0.7504, |
| "step": 7400 |
| }, |
| { |
| "epoch": 2.7794448612153038, |
| "grad_norm": 19.411861419677734, |
| "learning_rate": 1.5710856228327556e-06, |
| "loss": 0.7141, |
| "step": 7410 |
| }, |
| { |
| "epoch": 2.7831957989497376, |
| "grad_norm": 21.990013122558594, |
| "learning_rate": 1.5444118431581756e-06, |
| "loss": 0.7941, |
| "step": 7420 |
| }, |
| { |
| "epoch": 2.786946736684171, |
| "grad_norm": 26.875274658203125, |
| "learning_rate": 1.5177380634835959e-06, |
| "loss": 0.7077, |
| "step": 7430 |
| }, |
| { |
| "epoch": 2.7906976744186047, |
| "grad_norm": 17.144861221313477, |
| "learning_rate": 1.491064283809016e-06, |
| "loss": 0.6153, |
| "step": 7440 |
| }, |
| { |
| "epoch": 2.7944486121530385, |
| "grad_norm": 18.100868225097656, |
| "learning_rate": 1.4643905041344357e-06, |
| "loss": 0.635, |
| "step": 7450 |
| }, |
| { |
| "epoch": 2.798199549887472, |
| "grad_norm": 17.497039794921875, |
| "learning_rate": 1.437716724459856e-06, |
| "loss": 0.7681, |
| "step": 7460 |
| }, |
| { |
| "epoch": 2.8019504876219052, |
| "grad_norm": 11.748749732971191, |
| "learning_rate": 1.411042944785276e-06, |
| "loss": 0.7916, |
| "step": 7470 |
| }, |
| { |
| "epoch": 2.805701425356339, |
| "grad_norm": 17.71030616760254, |
| "learning_rate": 1.3843691651106963e-06, |
| "loss": 0.6826, |
| "step": 7480 |
| }, |
| { |
| "epoch": 2.809452363090773, |
| "grad_norm": 15.269068717956543, |
| "learning_rate": 1.3576953854361163e-06, |
| "loss": 0.6008, |
| "step": 7490 |
| }, |
| { |
| "epoch": 2.813203300825206, |
| "grad_norm": 16.148839950561523, |
| "learning_rate": 1.3310216057615366e-06, |
| "loss": 0.7581, |
| "step": 7500 |
| }, |
| { |
| "epoch": 2.81695423855964, |
| "grad_norm": 7.341813564300537, |
| "learning_rate": 1.3043478260869566e-06, |
| "loss": 0.4343, |
| "step": 7510 |
| }, |
| { |
| "epoch": 2.8207051762940734, |
| "grad_norm": 11.722135543823242, |
| "learning_rate": 1.2776740464123769e-06, |
| "loss": 0.5226, |
| "step": 7520 |
| }, |
| { |
| "epoch": 2.824456114028507, |
| "grad_norm": 17.107776641845703, |
| "learning_rate": 1.251000266737797e-06, |
| "loss": 0.6466, |
| "step": 7530 |
| }, |
| { |
| "epoch": 2.8282070517629405, |
| "grad_norm": 15.833941459655762, |
| "learning_rate": 1.224326487063217e-06, |
| "loss": 0.7703, |
| "step": 7540 |
| }, |
| { |
| "epoch": 2.8319579894973743, |
| "grad_norm": 19.610742568969727, |
| "learning_rate": 1.197652707388637e-06, |
| "loss": 0.6359, |
| "step": 7550 |
| }, |
| { |
| "epoch": 2.835708927231808, |
| "grad_norm": 12.620158195495605, |
| "learning_rate": 1.1709789277140572e-06, |
| "loss": 0.6561, |
| "step": 7560 |
| }, |
| { |
| "epoch": 2.8394598649662415, |
| "grad_norm": 20.80132293701172, |
| "learning_rate": 1.1443051480394773e-06, |
| "loss": 0.7668, |
| "step": 7570 |
| }, |
| { |
| "epoch": 2.8432108027006753, |
| "grad_norm": 9.778907775878906, |
| "learning_rate": 1.1176313683648973e-06, |
| "loss": 0.7067, |
| "step": 7580 |
| }, |
| { |
| "epoch": 2.8469617404351086, |
| "grad_norm": 11.224839210510254, |
| "learning_rate": 1.0909575886903174e-06, |
| "loss": 0.5963, |
| "step": 7590 |
| }, |
| { |
| "epoch": 2.8507126781695424, |
| "grad_norm": 11.957784652709961, |
| "learning_rate": 1.0642838090157376e-06, |
| "loss": 0.7127, |
| "step": 7600 |
| }, |
| { |
| "epoch": 2.854463615903976, |
| "grad_norm": 17.465967178344727, |
| "learning_rate": 1.0376100293411576e-06, |
| "loss": 0.5896, |
| "step": 7610 |
| }, |
| { |
| "epoch": 2.8582145536384096, |
| "grad_norm": 22.074583053588867, |
| "learning_rate": 1.010936249666578e-06, |
| "loss": 0.664, |
| "step": 7620 |
| }, |
| { |
| "epoch": 2.8619654913728434, |
| "grad_norm": 45.1811408996582, |
| "learning_rate": 9.84262469991998e-07, |
| "loss": 0.6788, |
| "step": 7630 |
| }, |
| { |
| "epoch": 2.8657164291072768, |
| "grad_norm": 12.519074440002441, |
| "learning_rate": 9.57588690317418e-07, |
| "loss": 0.5208, |
| "step": 7640 |
| }, |
| { |
| "epoch": 2.8694673668417106, |
| "grad_norm": 14.533720016479492, |
| "learning_rate": 9.309149106428382e-07, |
| "loss": 0.6403, |
| "step": 7650 |
| }, |
| { |
| "epoch": 2.873218304576144, |
| "grad_norm": 6.502141952514648, |
| "learning_rate": 9.042411309682584e-07, |
| "loss": 0.6661, |
| "step": 7660 |
| }, |
| { |
| "epoch": 2.8769692423105777, |
| "grad_norm": 16.4246826171875, |
| "learning_rate": 8.775673512936784e-07, |
| "loss": 0.6631, |
| "step": 7670 |
| }, |
| { |
| "epoch": 2.880720180045011, |
| "grad_norm": 20.435749053955078, |
| "learning_rate": 8.508935716190984e-07, |
| "loss": 0.769, |
| "step": 7680 |
| }, |
| { |
| "epoch": 2.884471117779445, |
| "grad_norm": 9.382180213928223, |
| "learning_rate": 8.242197919445186e-07, |
| "loss": 0.5407, |
| "step": 7690 |
| }, |
| { |
| "epoch": 2.8882220555138787, |
| "grad_norm": 12.802393913269043, |
| "learning_rate": 7.975460122699387e-07, |
| "loss": 0.636, |
| "step": 7700 |
| }, |
| { |
| "epoch": 2.891972993248312, |
| "grad_norm": 5.997576713562012, |
| "learning_rate": 7.708722325953588e-07, |
| "loss": 0.561, |
| "step": 7710 |
| }, |
| { |
| "epoch": 2.895723930982746, |
| "grad_norm": 8.369012832641602, |
| "learning_rate": 7.441984529207789e-07, |
| "loss": 0.5721, |
| "step": 7720 |
| }, |
| { |
| "epoch": 2.899474868717179, |
| "grad_norm": 19.990249633789062, |
| "learning_rate": 7.175246732461991e-07, |
| "loss": 0.7267, |
| "step": 7730 |
| }, |
| { |
| "epoch": 2.903225806451613, |
| "grad_norm": 19.364540100097656, |
| "learning_rate": 6.908508935716192e-07, |
| "loss": 0.6867, |
| "step": 7740 |
| }, |
| { |
| "epoch": 2.9069767441860463, |
| "grad_norm": 10.638273239135742, |
| "learning_rate": 6.641771138970394e-07, |
| "loss": 0.5809, |
| "step": 7750 |
| }, |
| { |
| "epoch": 2.91072768192048, |
| "grad_norm": 24.913246154785156, |
| "learning_rate": 6.375033342224594e-07, |
| "loss": 0.5658, |
| "step": 7760 |
| }, |
| { |
| "epoch": 2.914478619654914, |
| "grad_norm": 6.1255412101745605, |
| "learning_rate": 6.108295545478795e-07, |
| "loss": 0.4796, |
| "step": 7770 |
| }, |
| { |
| "epoch": 2.9182295573893473, |
| "grad_norm": 13.97762680053711, |
| "learning_rate": 5.841557748732996e-07, |
| "loss": 0.6201, |
| "step": 7780 |
| }, |
| { |
| "epoch": 2.921980495123781, |
| "grad_norm": 24.56553840637207, |
| "learning_rate": 5.574819951987197e-07, |
| "loss": 0.5206, |
| "step": 7790 |
| }, |
| { |
| "epoch": 2.9257314328582145, |
| "grad_norm": 20.081579208374023, |
| "learning_rate": 5.308082155241398e-07, |
| "loss": 0.5697, |
| "step": 7800 |
| }, |
| { |
| "epoch": 2.9294823705926483, |
| "grad_norm": 11.358619689941406, |
| "learning_rate": 5.041344358495599e-07, |
| "loss": 0.6268, |
| "step": 7810 |
| }, |
| { |
| "epoch": 2.9332333083270816, |
| "grad_norm": 11.016149520874023, |
| "learning_rate": 4.7746065617498e-07, |
| "loss": 0.5753, |
| "step": 7820 |
| }, |
| { |
| "epoch": 2.9369842460615154, |
| "grad_norm": 17.64615249633789, |
| "learning_rate": 4.507868765004002e-07, |
| "loss": 0.7584, |
| "step": 7830 |
| }, |
| { |
| "epoch": 2.9407351837959492, |
| "grad_norm": 17.292207717895508, |
| "learning_rate": 4.2411309682582024e-07, |
| "loss": 0.6361, |
| "step": 7840 |
| }, |
| { |
| "epoch": 2.9444861215303826, |
| "grad_norm": 17.94815444946289, |
| "learning_rate": 3.974393171512404e-07, |
| "loss": 0.7208, |
| "step": 7850 |
| }, |
| { |
| "epoch": 2.948237059264816, |
| "grad_norm": 13.073601722717285, |
| "learning_rate": 3.7076553747666047e-07, |
| "loss": 0.7179, |
| "step": 7860 |
| }, |
| { |
| "epoch": 2.9519879969992497, |
| "grad_norm": 7.956513404846191, |
| "learning_rate": 3.440917578020806e-07, |
| "loss": 0.6109, |
| "step": 7870 |
| }, |
| { |
| "epoch": 2.9557389347336835, |
| "grad_norm": 18.16693687438965, |
| "learning_rate": 3.1741797812750066e-07, |
| "loss": 0.6499, |
| "step": 7880 |
| }, |
| { |
| "epoch": 2.959489872468117, |
| "grad_norm": 25.006132125854492, |
| "learning_rate": 2.907441984529208e-07, |
| "loss": 0.5358, |
| "step": 7890 |
| }, |
| { |
| "epoch": 2.9632408102025507, |
| "grad_norm": 20.937856674194336, |
| "learning_rate": 2.640704187783409e-07, |
| "loss": 0.6364, |
| "step": 7900 |
| }, |
| { |
| "epoch": 2.9669917479369845, |
| "grad_norm": 12.37922477722168, |
| "learning_rate": 2.3739663910376104e-07, |
| "loss": 0.4916, |
| "step": 7910 |
| }, |
| { |
| "epoch": 2.970742685671418, |
| "grad_norm": 8.240549087524414, |
| "learning_rate": 2.1072285942918113e-07, |
| "loss": 0.6811, |
| "step": 7920 |
| }, |
| { |
| "epoch": 2.974493623405851, |
| "grad_norm": 9.405010223388672, |
| "learning_rate": 1.8404907975460125e-07, |
| "loss": 0.5338, |
| "step": 7930 |
| }, |
| { |
| "epoch": 2.978244561140285, |
| "grad_norm": 13.773921966552734, |
| "learning_rate": 1.5737530008002134e-07, |
| "loss": 0.6314, |
| "step": 7940 |
| }, |
| { |
| "epoch": 2.981995498874719, |
| "grad_norm": 12.41072940826416, |
| "learning_rate": 1.3070152040544146e-07, |
| "loss": 0.5497, |
| "step": 7950 |
| }, |
| { |
| "epoch": 2.985746436609152, |
| "grad_norm": 17.232473373413086, |
| "learning_rate": 1.0402774073086158e-07, |
| "loss": 0.588, |
| "step": 7960 |
| }, |
| { |
| "epoch": 2.989497374343586, |
| "grad_norm": 27.516319274902344, |
| "learning_rate": 7.735396105628168e-08, |
| "loss": 0.7673, |
| "step": 7970 |
| }, |
| { |
| "epoch": 2.99324831207802, |
| "grad_norm": 16.864728927612305, |
| "learning_rate": 5.0680181381701795e-08, |
| "loss": 0.6883, |
| "step": 7980 |
| }, |
| { |
| "epoch": 2.996999249812453, |
| "grad_norm": 16.803760528564453, |
| "learning_rate": 2.40064017071219e-08, |
| "loss": 0.6337, |
| "step": 7990 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_accuracy": 0.5919831223628692, |
| "eval_f1_macro": 0.5904844573730711, |
| "eval_f1_weighted": 0.5917816930917, |
| "eval_loss": 1.0033386945724487, |
| "eval_precision_macro": 0.5945695673493336, |
| "eval_precision_weighted": 0.5926704635628428, |
| "eval_recall_macro": 0.5877159391363334, |
| "eval_recall_weighted": 0.5919831223628692, |
| "eval_runtime": 4.8573, |
| "eval_samples_per_second": 487.928, |
| "eval_steps_per_second": 61.145, |
| "step": 7998 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 7998, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 100, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.68326808991488e+16, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|