| { | |
| "best_global_step": 7998, | |
| "best_metric": 1.0033386945724487, | |
| "best_model_checkpoint": "./../../../models/LedgerBERT-SciBERT-base-v3-News-Class/2025-10-15_00-24-07/market_direction/checkpoint-7998", | |
| "epoch": 3.0, | |
| "eval_steps": 500, | |
| "global_step": 7998, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0, | |
| "eval_accuracy": 0.37046413502109704, | |
| "eval_f1_macro": 0.31648220525898246, | |
| "eval_f1_weighted": 0.3428571794493407, | |
| "eval_loss": 1.0956553220748901, | |
| "eval_precision_macro": 0.3404493817232522, | |
| "eval_precision_weighted": 0.3541600044961222, | |
| "eval_recall_macro": 0.3400488233349732, | |
| "eval_recall_weighted": 0.37046413502109704, | |
| "eval_runtime": 5.1696, | |
| "eval_samples_per_second": 458.454, | |
| "eval_steps_per_second": 57.452, | |
| "step": 0 | |
| }, | |
| { | |
| "epoch": 0.00037509377344336085, | |
| "grad_norm": 6.950562000274658, | |
| "learning_rate": 0.0, | |
| "loss": 1.0664, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.0037509377344336083, | |
| "grad_norm": 7.2157673835754395, | |
| "learning_rate": 3.6e-07, | |
| "loss": 1.1053, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.007501875468867217, | |
| "grad_norm": 7.101637840270996, | |
| "learning_rate": 7.6e-07, | |
| "loss": 1.081, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.011252813203300824, | |
| "grad_norm": 7.503627777099609, | |
| "learning_rate": 1.1600000000000001e-06, | |
| "loss": 1.1167, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.015003750937734433, | |
| "grad_norm": 6.733654975891113, | |
| "learning_rate": 1.56e-06, | |
| "loss": 1.1038, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.018754688672168042, | |
| "grad_norm": 6.987666130065918, | |
| "learning_rate": 1.9600000000000003e-06, | |
| "loss": 1.1025, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.02250562640660165, | |
| "grad_norm": 9.358382225036621, | |
| "learning_rate": 2.3600000000000003e-06, | |
| "loss": 1.1166, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.02625656414103526, | |
| "grad_norm": 6.5409040451049805, | |
| "learning_rate": 2.7600000000000003e-06, | |
| "loss": 1.0841, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.030007501875468866, | |
| "grad_norm": 7.030813217163086, | |
| "learning_rate": 3.1600000000000002e-06, | |
| "loss": 1.0583, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.03375843960990248, | |
| "grad_norm": 6.986401081085205, | |
| "learning_rate": 3.5600000000000002e-06, | |
| "loss": 1.1288, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.037509377344336084, | |
| "grad_norm": 5.53237247467041, | |
| "learning_rate": 3.96e-06, | |
| "loss": 1.0573, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.04126031507876969, | |
| "grad_norm": 14.836161613464355, | |
| "learning_rate": 4.360000000000001e-06, | |
| "loss": 1.0754, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.0450112528132033, | |
| "grad_norm": 8.877525329589844, | |
| "learning_rate": 4.76e-06, | |
| "loss": 1.0753, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.04876219054763691, | |
| "grad_norm": 10.311164855957031, | |
| "learning_rate": 5.1600000000000006e-06, | |
| "loss": 1.1246, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.05251312828207052, | |
| "grad_norm": 5.360109329223633, | |
| "learning_rate": 5.560000000000001e-06, | |
| "loss": 1.0218, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.056264066016504126, | |
| "grad_norm": 10.249133110046387, | |
| "learning_rate": 5.9600000000000005e-06, | |
| "loss": 1.0723, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.06001500375093773, | |
| "grad_norm": 7.881443500518799, | |
| "learning_rate": 6.360000000000001e-06, | |
| "loss": 1.0727, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.06376594148537135, | |
| "grad_norm": 5.892578601837158, | |
| "learning_rate": 6.760000000000001e-06, | |
| "loss": 1.0498, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.06751687921980495, | |
| "grad_norm": 6.164844512939453, | |
| "learning_rate": 7.16e-06, | |
| "loss": 1.1182, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.07126781695423856, | |
| "grad_norm": 6.351868629455566, | |
| "learning_rate": 7.5600000000000005e-06, | |
| "loss": 1.0735, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.07501875468867217, | |
| "grad_norm": 6.895458698272705, | |
| "learning_rate": 7.960000000000002e-06, | |
| "loss": 1.0503, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.07876969242310577, | |
| "grad_norm": 8.486842155456543, | |
| "learning_rate": 8.36e-06, | |
| "loss": 1.0965, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.08252063015753938, | |
| "grad_norm": 8.301511764526367, | |
| "learning_rate": 8.76e-06, | |
| "loss": 1.1157, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.08627156789197299, | |
| "grad_norm": 11.515487670898438, | |
| "learning_rate": 9.16e-06, | |
| "loss": 1.0854, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.0900225056264066, | |
| "grad_norm": 6.189631938934326, | |
| "learning_rate": 9.56e-06, | |
| "loss": 1.054, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.09377344336084022, | |
| "grad_norm": 4.8885393142700195, | |
| "learning_rate": 9.960000000000001e-06, | |
| "loss": 1.0693, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.09752438109527382, | |
| "grad_norm": 6.190073490142822, | |
| "learning_rate": 1.036e-05, | |
| "loss": 1.0786, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.10127531882970743, | |
| "grad_norm": 8.178174018859863, | |
| "learning_rate": 1.0760000000000002e-05, | |
| "loss": 1.0374, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.10502625656414104, | |
| "grad_norm": 5.824592113494873, | |
| "learning_rate": 1.1160000000000002e-05, | |
| "loss": 1.0829, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.10877719429857464, | |
| "grad_norm": 7.339807033538818, | |
| "learning_rate": 1.156e-05, | |
| "loss": 1.1085, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.11252813203300825, | |
| "grad_norm": 6.39154577255249, | |
| "learning_rate": 1.196e-05, | |
| "loss": 1.0505, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.11627906976744186, | |
| "grad_norm": 7.54710054397583, | |
| "learning_rate": 1.236e-05, | |
| "loss": 1.049, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.12003000750187547, | |
| "grad_norm": 10.610452651977539, | |
| "learning_rate": 1.2760000000000001e-05, | |
| "loss": 1.1105, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.12378094523630907, | |
| "grad_norm": 6.961548328399658, | |
| "learning_rate": 1.3160000000000001e-05, | |
| "loss": 1.0392, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.1275318829707427, | |
| "grad_norm": 8.800139427185059, | |
| "learning_rate": 1.3560000000000002e-05, | |
| "loss": 1.1473, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.1312828207051763, | |
| "grad_norm": 7.540011405944824, | |
| "learning_rate": 1.396e-05, | |
| "loss": 1.0891, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.1350337584396099, | |
| "grad_norm": 11.337075233459473, | |
| "learning_rate": 1.4360000000000001e-05, | |
| "loss": 1.0715, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.13878469617404351, | |
| "grad_norm": 5.6576457023620605, | |
| "learning_rate": 1.4760000000000001e-05, | |
| "loss": 1.0702, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.14253563390847712, | |
| "grad_norm": 8.98009967803955, | |
| "learning_rate": 1.516e-05, | |
| "loss": 1.0752, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.14628657164291073, | |
| "grad_norm": 4.932474613189697, | |
| "learning_rate": 1.556e-05, | |
| "loss": 1.0641, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.15003750937734434, | |
| "grad_norm": 6.130215644836426, | |
| "learning_rate": 1.5960000000000003e-05, | |
| "loss": 1.0133, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.15378844711177794, | |
| "grad_norm": 16.0273380279541, | |
| "learning_rate": 1.636e-05, | |
| "loss": 1.0442, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.15753938484621155, | |
| "grad_norm": 12.93301010131836, | |
| "learning_rate": 1.6760000000000002e-05, | |
| "loss": 1.1161, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.16129032258064516, | |
| "grad_norm": 9.27346420288086, | |
| "learning_rate": 1.7160000000000002e-05, | |
| "loss": 1.0539, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.16504126031507876, | |
| "grad_norm": 5.5671186447143555, | |
| "learning_rate": 1.756e-05, | |
| "loss": 0.9452, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.16879219804951237, | |
| "grad_norm": 7.939000606536865, | |
| "learning_rate": 1.796e-05, | |
| "loss": 1.0522, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.17254313578394598, | |
| "grad_norm": 9.265899658203125, | |
| "learning_rate": 1.8360000000000004e-05, | |
| "loss": 1.0866, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.17629407351837958, | |
| "grad_norm": 6.934913158416748, | |
| "learning_rate": 1.876e-05, | |
| "loss": 0.9723, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.1800450112528132, | |
| "grad_norm": 6.007977485656738, | |
| "learning_rate": 1.916e-05, | |
| "loss": 0.9742, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.1837959489872468, | |
| "grad_norm": 7.842029094696045, | |
| "learning_rate": 1.9560000000000002e-05, | |
| "loss": 0.9334, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.18754688672168043, | |
| "grad_norm": 10.53432559967041, | |
| "learning_rate": 1.9960000000000002e-05, | |
| "loss": 0.8706, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.19129782445611404, | |
| "grad_norm": 9.365771293640137, | |
| "learning_rate": 1.997599359829288e-05, | |
| "loss": 1.0399, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.19504876219054765, | |
| "grad_norm": 9.351228713989258, | |
| "learning_rate": 1.99493198186183e-05, | |
| "loss": 0.9525, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.19879969992498125, | |
| "grad_norm": 12.21917724609375, | |
| "learning_rate": 1.992264603894372e-05, | |
| "loss": 0.9793, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.20255063765941486, | |
| "grad_norm": 17.076719284057617, | |
| "learning_rate": 1.9895972259269142e-05, | |
| "loss": 1.0403, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.20630157539384847, | |
| "grad_norm": 6.928652286529541, | |
| "learning_rate": 1.9869298479594562e-05, | |
| "loss": 0.9047, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.21005251312828208, | |
| "grad_norm": 6.858879089355469, | |
| "learning_rate": 1.984262469991998e-05, | |
| "loss": 1.012, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.21380345086271568, | |
| "grad_norm": 5.987520217895508, | |
| "learning_rate": 1.98159509202454e-05, | |
| "loss": 0.9345, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.2175543885971493, | |
| "grad_norm": 12.161517143249512, | |
| "learning_rate": 1.978927714057082e-05, | |
| "loss": 0.9955, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.2213053263315829, | |
| "grad_norm": 9.229764938354492, | |
| "learning_rate": 1.976260336089624e-05, | |
| "loss": 0.998, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.2250562640660165, | |
| "grad_norm": 9.257465362548828, | |
| "learning_rate": 1.973592958122166e-05, | |
| "loss": 0.9882, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.2288072018004501, | |
| "grad_norm": 11.260259628295898, | |
| "learning_rate": 1.970925580154708e-05, | |
| "loss": 0.9727, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.23255813953488372, | |
| "grad_norm": 5.7551984786987305, | |
| "learning_rate": 1.96825820218725e-05, | |
| "loss": 0.9139, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.23630907726931732, | |
| "grad_norm": 7.264505863189697, | |
| "learning_rate": 1.9655908242197922e-05, | |
| "loss": 0.8718, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.24006001500375093, | |
| "grad_norm": 13.518917083740234, | |
| "learning_rate": 1.9629234462523342e-05, | |
| "loss": 1.0478, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.24381095273818454, | |
| "grad_norm": 7.133944034576416, | |
| "learning_rate": 1.960256068284876e-05, | |
| "loss": 0.951, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.24756189047261815, | |
| "grad_norm": 10.491629600524902, | |
| "learning_rate": 1.957588690317418e-05, | |
| "loss": 0.9271, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.25131282820705175, | |
| "grad_norm": 6.807431697845459, | |
| "learning_rate": 1.95492131234996e-05, | |
| "loss": 1.0804, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.2550637659414854, | |
| "grad_norm": 9.180730819702148, | |
| "learning_rate": 1.9522539343825024e-05, | |
| "loss": 0.9079, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.25881470367591897, | |
| "grad_norm": 6.459209442138672, | |
| "learning_rate": 1.9495865564150443e-05, | |
| "loss": 0.9989, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.2625656414103526, | |
| "grad_norm": 5.8546929359436035, | |
| "learning_rate": 1.9469191784475863e-05, | |
| "loss": 0.951, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.2663165791447862, | |
| "grad_norm": 10.301909446716309, | |
| "learning_rate": 1.9442518004801282e-05, | |
| "loss": 0.8549, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.2700675168792198, | |
| "grad_norm": 17.759777069091797, | |
| "learning_rate": 1.9415844225126702e-05, | |
| "loss": 1.1818, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.2738184546136534, | |
| "grad_norm": 7.105804920196533, | |
| "learning_rate": 1.938917044545212e-05, | |
| "loss": 1.034, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.27756939234808703, | |
| "grad_norm": 8.125602722167969, | |
| "learning_rate": 1.936249666577754e-05, | |
| "loss": 0.9509, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.2813203300825206, | |
| "grad_norm": 6.968907833099365, | |
| "learning_rate": 1.933582288610296e-05, | |
| "loss": 0.9292, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.28507126781695424, | |
| "grad_norm": 9.841052055358887, | |
| "learning_rate": 1.930914910642838e-05, | |
| "loss": 1.0401, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.2888222055513878, | |
| "grad_norm": 6.7177910804748535, | |
| "learning_rate": 1.9282475326753804e-05, | |
| "loss": 1.0079, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.29257314328582146, | |
| "grad_norm": 8.652711868286133, | |
| "learning_rate": 1.9255801547079223e-05, | |
| "loss": 0.8986, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.29632408102025504, | |
| "grad_norm": 7.266161918640137, | |
| "learning_rate": 1.9229127767404643e-05, | |
| "loss": 0.9805, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.30007501875468867, | |
| "grad_norm": 7.372107982635498, | |
| "learning_rate": 1.9202453987730062e-05, | |
| "loss": 1.0254, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.3038259564891223, | |
| "grad_norm": 6.467881202697754, | |
| "learning_rate": 1.9175780208055482e-05, | |
| "loss": 0.9931, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.3075768942235559, | |
| "grad_norm": 8.692418098449707, | |
| "learning_rate": 1.9149106428380905e-05, | |
| "loss": 0.8585, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.3113278319579895, | |
| "grad_norm": 7.981175422668457, | |
| "learning_rate": 1.9122432648706325e-05, | |
| "loss": 0.9164, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.3150787696924231, | |
| "grad_norm": 11.882697105407715, | |
| "learning_rate": 1.9095758869031744e-05, | |
| "loss": 1.0325, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.31882970742685673, | |
| "grad_norm": 10.736306190490723, | |
| "learning_rate": 1.9069085089357164e-05, | |
| "loss": 0.9888, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.3225806451612903, | |
| "grad_norm": 5.334744453430176, | |
| "learning_rate": 1.9042411309682583e-05, | |
| "loss": 0.9364, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.32633158289572395, | |
| "grad_norm": 6.579550743103027, | |
| "learning_rate": 1.9015737530008003e-05, | |
| "loss": 0.9395, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.3300825206301575, | |
| "grad_norm": 7.336994171142578, | |
| "learning_rate": 1.8989063750333423e-05, | |
| "loss": 0.9363, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.33383345836459116, | |
| "grad_norm": 9.523600578308105, | |
| "learning_rate": 1.8962389970658842e-05, | |
| "loss": 0.9405, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.33758439609902474, | |
| "grad_norm": 9.350625038146973, | |
| "learning_rate": 1.8935716190984262e-05, | |
| "loss": 1.0351, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.3413353338334584, | |
| "grad_norm": 9.00391674041748, | |
| "learning_rate": 1.8909042411309685e-05, | |
| "loss": 0.9721, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.34508627156789196, | |
| "grad_norm": 5.69331693649292, | |
| "learning_rate": 1.8882368631635105e-05, | |
| "loss": 0.8811, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.3488372093023256, | |
| "grad_norm": 6.127689361572266, | |
| "learning_rate": 1.8855694851960524e-05, | |
| "loss": 1.0079, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.35258814703675917, | |
| "grad_norm": 11.653777122497559, | |
| "learning_rate": 1.8829021072285944e-05, | |
| "loss": 0.9518, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.3563390847711928, | |
| "grad_norm": 7.30828332901001, | |
| "learning_rate": 1.8802347292611363e-05, | |
| "loss": 0.8464, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.3600900225056264, | |
| "grad_norm": 9.21927547454834, | |
| "learning_rate": 1.8775673512936786e-05, | |
| "loss": 1.0584, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.36384096024006, | |
| "grad_norm": 6.939789772033691, | |
| "learning_rate": 1.8748999733262206e-05, | |
| "loss": 0.9, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.3675918979744936, | |
| "grad_norm": 12.434165954589844, | |
| "learning_rate": 1.8722325953587626e-05, | |
| "loss": 1.015, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.37134283570892723, | |
| "grad_norm": 11.779828071594238, | |
| "learning_rate": 1.8695652173913045e-05, | |
| "loss": 0.9725, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.37509377344336087, | |
| "grad_norm": 12.166790962219238, | |
| "learning_rate": 1.8668978394238465e-05, | |
| "loss": 1.0591, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.37884471117779445, | |
| "grad_norm": 8.87903881072998, | |
| "learning_rate": 1.8642304614563884e-05, | |
| "loss": 0.9767, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.3825956489122281, | |
| "grad_norm": 5.176930904388428, | |
| "learning_rate": 1.8615630834889304e-05, | |
| "loss": 0.8934, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.38634658664666166, | |
| "grad_norm": 7.772132396697998, | |
| "learning_rate": 1.8588957055214724e-05, | |
| "loss": 0.9488, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.3900975243810953, | |
| "grad_norm": 10.097055435180664, | |
| "learning_rate": 1.8562283275540143e-05, | |
| "loss": 0.9725, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.3938484621155289, | |
| "grad_norm": 10.014994621276855, | |
| "learning_rate": 1.8535609495865566e-05, | |
| "loss": 0.9432, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.3975993998499625, | |
| "grad_norm": 10.885961532592773, | |
| "learning_rate": 1.8508935716190986e-05, | |
| "loss": 1.0393, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.4013503375843961, | |
| "grad_norm": 7.621641635894775, | |
| "learning_rate": 1.8482261936516406e-05, | |
| "loss": 0.9801, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.4051012753188297, | |
| "grad_norm": 6.268519878387451, | |
| "learning_rate": 1.8455588156841825e-05, | |
| "loss": 0.9922, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.4088522130532633, | |
| "grad_norm": 6.714245796203613, | |
| "learning_rate": 1.8428914377167245e-05, | |
| "loss": 1.0355, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.41260315078769694, | |
| "grad_norm": 11.643074035644531, | |
| "learning_rate": 1.8402240597492668e-05, | |
| "loss": 1.0575, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.4163540885221305, | |
| "grad_norm": 6.439828395843506, | |
| "learning_rate": 1.8375566817818087e-05, | |
| "loss": 0.9101, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.42010502625656415, | |
| "grad_norm": 6.833279609680176, | |
| "learning_rate": 1.8348893038143507e-05, | |
| "loss": 0.935, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.42385596399099773, | |
| "grad_norm": 7.262381553649902, | |
| "learning_rate": 1.8322219258468927e-05, | |
| "loss": 0.977, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.42760690172543137, | |
| "grad_norm": 5.480360984802246, | |
| "learning_rate": 1.8295545478794346e-05, | |
| "loss": 0.8673, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.43135783945986494, | |
| "grad_norm": 8.4745454788208, | |
| "learning_rate": 1.8268871699119766e-05, | |
| "loss": 0.88, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.4351087771942986, | |
| "grad_norm": 16.769878387451172, | |
| "learning_rate": 1.8242197919445185e-05, | |
| "loss": 0.9576, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.43885971492873216, | |
| "grad_norm": 7.4179582595825195, | |
| "learning_rate": 1.8215524139770605e-05, | |
| "loss": 0.9263, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.4426106526631658, | |
| "grad_norm": 11.899470329284668, | |
| "learning_rate": 1.8188850360096028e-05, | |
| "loss": 0.9328, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.4463615903975994, | |
| "grad_norm": 8.113855361938477, | |
| "learning_rate": 1.8162176580421448e-05, | |
| "loss": 0.9684, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.450112528132033, | |
| "grad_norm": 7.619154453277588, | |
| "learning_rate": 1.8135502800746867e-05, | |
| "loss": 0.918, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.45386346586646664, | |
| "grad_norm": 7.7961602210998535, | |
| "learning_rate": 1.8108829021072287e-05, | |
| "loss": 0.8574, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.4576144036009002, | |
| "grad_norm": 8.734787940979004, | |
| "learning_rate": 1.8082155241397707e-05, | |
| "loss": 0.9009, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.46136534133533386, | |
| "grad_norm": 5.773232936859131, | |
| "learning_rate": 1.8055481461723126e-05, | |
| "loss": 1.0554, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.46511627906976744, | |
| "grad_norm": 7.872585773468018, | |
| "learning_rate": 1.802880768204855e-05, | |
| "loss": 0.8688, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.46886721680420107, | |
| "grad_norm": 7.2498602867126465, | |
| "learning_rate": 1.800213390237397e-05, | |
| "loss": 0.9726, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.47261815453863465, | |
| "grad_norm": 11.007004737854004, | |
| "learning_rate": 1.797546012269939e-05, | |
| "loss": 0.9338, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.4763690922730683, | |
| "grad_norm": 10.418313980102539, | |
| "learning_rate": 1.7948786343024808e-05, | |
| "loss": 0.9217, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.48012003000750186, | |
| "grad_norm": 11.935880661010742, | |
| "learning_rate": 1.7922112563350228e-05, | |
| "loss": 0.8656, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.4838709677419355, | |
| "grad_norm": 10.331807136535645, | |
| "learning_rate": 1.789543878367565e-05, | |
| "loss": 0.9948, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.4876219054763691, | |
| "grad_norm": 7.979977607727051, | |
| "learning_rate": 1.7868765004001067e-05, | |
| "loss": 0.9068, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.4913728432108027, | |
| "grad_norm": 7.865904808044434, | |
| "learning_rate": 1.7842091224326486e-05, | |
| "loss": 0.8362, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.4951237809452363, | |
| "grad_norm": 11.6406888961792, | |
| "learning_rate": 1.781541744465191e-05, | |
| "loss": 1.0061, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.4988747186796699, | |
| "grad_norm": 9.274069786071777, | |
| "learning_rate": 1.778874366497733e-05, | |
| "loss": 0.9448, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.5026256564141035, | |
| "grad_norm": 9.999556541442871, | |
| "learning_rate": 1.776206988530275e-05, | |
| "loss": 0.9188, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.5063765941485371, | |
| "grad_norm": 10.032958984375, | |
| "learning_rate": 1.773539610562817e-05, | |
| "loss": 0.9794, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.5101275318829708, | |
| "grad_norm": 5.453114032745361, | |
| "learning_rate": 1.7708722325953588e-05, | |
| "loss": 1.0102, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.5138784696174044, | |
| "grad_norm": 13.257373809814453, | |
| "learning_rate": 1.7682048546279008e-05, | |
| "loss": 0.9801, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.5176294073518379, | |
| "grad_norm": 5.355706691741943, | |
| "learning_rate": 1.765537476660443e-05, | |
| "loss": 0.9126, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.5213803450862715, | |
| "grad_norm": 9.768399238586426, | |
| "learning_rate": 1.762870098692985e-05, | |
| "loss": 0.9423, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.5251312828207052, | |
| "grad_norm": 8.362143516540527, | |
| "learning_rate": 1.760202720725527e-05, | |
| "loss": 1.0289, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.5288822205551388, | |
| "grad_norm": 10.58354377746582, | |
| "learning_rate": 1.757535342758069e-05, | |
| "loss": 0.9593, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.5326331582895724, | |
| "grad_norm": 8.964977264404297, | |
| "learning_rate": 1.754867964790611e-05, | |
| "loss": 1.1002, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.536384096024006, | |
| "grad_norm": 11.886764526367188, | |
| "learning_rate": 1.7522005868231532e-05, | |
| "loss": 0.842, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.5401350337584396, | |
| "grad_norm": 9.155001640319824, | |
| "learning_rate": 1.7495332088556948e-05, | |
| "loss": 1.0402, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.5438859714928732, | |
| "grad_norm": 7.865649223327637, | |
| "learning_rate": 1.7468658308882368e-05, | |
| "loss": 0.9502, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.5476369092273068, | |
| "grad_norm": 8.232137680053711, | |
| "learning_rate": 1.744198452920779e-05, | |
| "loss": 0.9042, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.5513878469617405, | |
| "grad_norm": 7.428460597991943, | |
| "learning_rate": 1.741531074953321e-05, | |
| "loss": 0.8664, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.5551387846961741, | |
| "grad_norm": 6.769949913024902, | |
| "learning_rate": 1.738863696985863e-05, | |
| "loss": 0.9676, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.5588897224306076, | |
| "grad_norm": 7.262323379516602, | |
| "learning_rate": 1.736196319018405e-05, | |
| "loss": 0.9461, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.5626406601650412, | |
| "grad_norm": 7.46332311630249, | |
| "learning_rate": 1.733528941050947e-05, | |
| "loss": 0.9928, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.5663915978994749, | |
| "grad_norm": 13.346348762512207, | |
| "learning_rate": 1.7308615630834892e-05, | |
| "loss": 0.9645, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.5701425356339085, | |
| "grad_norm": 7.057946681976318, | |
| "learning_rate": 1.7281941851160312e-05, | |
| "loss": 0.872, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.5738934733683421, | |
| "grad_norm": 11.920793533325195, | |
| "learning_rate": 1.725526807148573e-05, | |
| "loss": 0.9084, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.5776444111027756, | |
| "grad_norm": 4.696298122406006, | |
| "learning_rate": 1.722859429181115e-05, | |
| "loss": 0.9184, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.5813953488372093, | |
| "grad_norm": 9.623963356018066, | |
| "learning_rate": 1.720192051213657e-05, | |
| "loss": 0.8924, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.5851462865716429, | |
| "grad_norm": 10.262091636657715, | |
| "learning_rate": 1.717524673246199e-05, | |
| "loss": 0.9476, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.5888972243060765, | |
| "grad_norm": 10.587578773498535, | |
| "learning_rate": 1.7148572952787413e-05, | |
| "loss": 0.9443, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.5926481620405101, | |
| "grad_norm": 8.189558029174805, | |
| "learning_rate": 1.7121899173112833e-05, | |
| "loss": 0.9245, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.5963990997749438, | |
| "grad_norm": 7.582670211791992, | |
| "learning_rate": 1.709522539343825e-05, | |
| "loss": 0.8533, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.6001500375093773, | |
| "grad_norm": 8.973713874816895, | |
| "learning_rate": 1.7068551613763672e-05, | |
| "loss": 0.9197, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.6039009752438109, | |
| "grad_norm": 7.140238285064697, | |
| "learning_rate": 1.7041877834089092e-05, | |
| "loss": 0.8815, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.6076519129782446, | |
| "grad_norm": 7.83927059173584, | |
| "learning_rate": 1.701520405441451e-05, | |
| "loss": 0.9782, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.6114028507126782, | |
| "grad_norm": 6.876523494720459, | |
| "learning_rate": 1.698853027473993e-05, | |
| "loss": 0.9575, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.6151537884471118, | |
| "grad_norm": 10.362568855285645, | |
| "learning_rate": 1.696185649506535e-05, | |
| "loss": 0.8977, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.6189047261815454, | |
| "grad_norm": 9.509383201599121, | |
| "learning_rate": 1.6935182715390774e-05, | |
| "loss": 0.996, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.622655663915979, | |
| "grad_norm": 5.023642539978027, | |
| "learning_rate": 1.6908508935716193e-05, | |
| "loss": 0.9131, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.6264066016504126, | |
| "grad_norm": 6.320276260375977, | |
| "learning_rate": 1.6881835156041613e-05, | |
| "loss": 0.9765, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.6301575393848462, | |
| "grad_norm": 10.261762619018555, | |
| "learning_rate": 1.6855161376367033e-05, | |
| "loss": 0.9057, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.6339084771192798, | |
| "grad_norm": 8.115468978881836, | |
| "learning_rate": 1.6828487596692452e-05, | |
| "loss": 0.8892, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.6376594148537135, | |
| "grad_norm": 10.657661437988281, | |
| "learning_rate": 1.6801813817017875e-05, | |
| "loss": 0.9186, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.641410352588147, | |
| "grad_norm": 7.065814018249512, | |
| "learning_rate": 1.6775140037343295e-05, | |
| "loss": 0.8878, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.6451612903225806, | |
| "grad_norm": 8.048439979553223, | |
| "learning_rate": 1.6748466257668714e-05, | |
| "loss": 0.946, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.6489122280570142, | |
| "grad_norm": 10.228202819824219, | |
| "learning_rate": 1.672179247799413e-05, | |
| "loss": 0.838, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.6526631657914479, | |
| "grad_norm": 10.011300086975098, | |
| "learning_rate": 1.6695118698319554e-05, | |
| "loss": 1.0565, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.6564141035258815, | |
| "grad_norm": 8.266985893249512, | |
| "learning_rate": 1.6668444918644973e-05, | |
| "loss": 0.9523, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.660165041260315, | |
| "grad_norm": 7.511131763458252, | |
| "learning_rate": 1.6641771138970393e-05, | |
| "loss": 1.0325, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.6639159789947486, | |
| "grad_norm": 7.235232830047607, | |
| "learning_rate": 1.6615097359295813e-05, | |
| "loss": 0.9197, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.6676669167291823, | |
| "grad_norm": 8.137916564941406, | |
| "learning_rate": 1.6588423579621232e-05, | |
| "loss": 0.886, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.6714178544636159, | |
| "grad_norm": 7.320621013641357, | |
| "learning_rate": 1.6561749799946655e-05, | |
| "loss": 0.8866, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.6751687921980495, | |
| "grad_norm": 8.104268074035645, | |
| "learning_rate": 1.6535076020272075e-05, | |
| "loss": 0.9554, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.6789197299324832, | |
| "grad_norm": 8.669350624084473, | |
| "learning_rate": 1.6508402240597494e-05, | |
| "loss": 0.907, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 0.6826706676669168, | |
| "grad_norm": 7.718722820281982, | |
| "learning_rate": 1.6481728460922914e-05, | |
| "loss": 0.9931, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.6864216054013503, | |
| "grad_norm": 6.479692459106445, | |
| "learning_rate": 1.6455054681248334e-05, | |
| "loss": 0.9669, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.6901725431357839, | |
| "grad_norm": 5.159636497497559, | |
| "learning_rate": 1.6428380901573757e-05, | |
| "loss": 1.0003, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.6939234808702176, | |
| "grad_norm": 6.043707847595215, | |
| "learning_rate": 1.6401707121899176e-05, | |
| "loss": 0.894, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.6976744186046512, | |
| "grad_norm": 8.509610176086426, | |
| "learning_rate": 1.6375033342224596e-05, | |
| "loss": 1.0656, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.7014253563390848, | |
| "grad_norm": 10.496292114257812, | |
| "learning_rate": 1.6348359562550015e-05, | |
| "loss": 0.9162, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 0.7051762940735183, | |
| "grad_norm": 9.357151985168457, | |
| "learning_rate": 1.6321685782875435e-05, | |
| "loss": 0.8575, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 0.708927231807952, | |
| "grad_norm": 7.78256368637085, | |
| "learning_rate": 1.6295012003200855e-05, | |
| "loss": 0.7904, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 0.7126781695423856, | |
| "grad_norm": 6.14832067489624, | |
| "learning_rate": 1.6268338223526274e-05, | |
| "loss": 0.8348, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.7164291072768192, | |
| "grad_norm": 7.879366874694824, | |
| "learning_rate": 1.6241664443851694e-05, | |
| "loss": 0.8826, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 0.7201800450112528, | |
| "grad_norm": 6.204752445220947, | |
| "learning_rate": 1.6214990664177114e-05, | |
| "loss": 0.9157, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.7239309827456865, | |
| "grad_norm": 7.274019241333008, | |
| "learning_rate": 1.6188316884502537e-05, | |
| "loss": 0.8869, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 0.72768192048012, | |
| "grad_norm": 5.929676055908203, | |
| "learning_rate": 1.6161643104827956e-05, | |
| "loss": 0.9372, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 0.7314328582145536, | |
| "grad_norm": 9.161755561828613, | |
| "learning_rate": 1.6134969325153376e-05, | |
| "loss": 0.9211, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.7351837959489872, | |
| "grad_norm": 5.079675674438477, | |
| "learning_rate": 1.6108295545478795e-05, | |
| "loss": 0.8084, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 0.7389347336834209, | |
| "grad_norm": 8.15173053741455, | |
| "learning_rate": 1.6081621765804215e-05, | |
| "loss": 1.0033, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 0.7426856714178545, | |
| "grad_norm": 6.805727005004883, | |
| "learning_rate": 1.6054947986129638e-05, | |
| "loss": 1.0074, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 0.746436609152288, | |
| "grad_norm": 8.05391788482666, | |
| "learning_rate": 1.6028274206455058e-05, | |
| "loss": 0.9942, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 0.7501875468867217, | |
| "grad_norm": 6.02817440032959, | |
| "learning_rate": 1.6001600426780477e-05, | |
| "loss": 1.0494, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.7539384846211553, | |
| "grad_norm": 9.404801368713379, | |
| "learning_rate": 1.5974926647105897e-05, | |
| "loss": 0.9451, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 0.7576894223555889, | |
| "grad_norm": 5.526783466339111, | |
| "learning_rate": 1.5948252867431316e-05, | |
| "loss": 0.9378, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 0.7614403600900225, | |
| "grad_norm": 8.972588539123535, | |
| "learning_rate": 1.5921579087756736e-05, | |
| "loss": 0.9808, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 0.7651912978244562, | |
| "grad_norm": 4.961981296539307, | |
| "learning_rate": 1.5894905308082156e-05, | |
| "loss": 0.9078, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 0.7689422355588897, | |
| "grad_norm": 3.8509440422058105, | |
| "learning_rate": 1.5868231528407575e-05, | |
| "loss": 1.0518, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.7726931732933233, | |
| "grad_norm": 7.673577785491943, | |
| "learning_rate": 1.5841557748732995e-05, | |
| "loss": 0.9075, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 0.7764441110277569, | |
| "grad_norm": 8.731016159057617, | |
| "learning_rate": 1.5814883969058418e-05, | |
| "loss": 0.9208, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 0.7801950487621906, | |
| "grad_norm": 6.979492664337158, | |
| "learning_rate": 1.5788210189383838e-05, | |
| "loss": 0.8977, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 0.7839459864966242, | |
| "grad_norm": 8.666240692138672, | |
| "learning_rate": 1.5761536409709257e-05, | |
| "loss": 0.899, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 0.7876969242310577, | |
| "grad_norm": 6.528694152832031, | |
| "learning_rate": 1.5734862630034677e-05, | |
| "loss": 0.844, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.7914478619654913, | |
| "grad_norm": 7.253232479095459, | |
| "learning_rate": 1.5708188850360096e-05, | |
| "loss": 0.7766, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 0.795198799699925, | |
| "grad_norm": 6.888519287109375, | |
| "learning_rate": 1.568151507068552e-05, | |
| "loss": 0.9393, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 0.7989497374343586, | |
| "grad_norm": 6.408233165740967, | |
| "learning_rate": 1.565484129101094e-05, | |
| "loss": 1.0171, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 0.8027006751687922, | |
| "grad_norm": 9.36056137084961, | |
| "learning_rate": 1.562816751133636e-05, | |
| "loss": 0.9127, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 0.8064516129032258, | |
| "grad_norm": 11.695134162902832, | |
| "learning_rate": 1.5601493731661778e-05, | |
| "loss": 1.0232, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.8102025506376594, | |
| "grad_norm": 6.716568470001221, | |
| "learning_rate": 1.5574819951987198e-05, | |
| "loss": 0.9904, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 0.813953488372093, | |
| "grad_norm": 5.994268417358398, | |
| "learning_rate": 1.5548146172312617e-05, | |
| "loss": 0.8897, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 0.8177044261065266, | |
| "grad_norm": 8.419204711914062, | |
| "learning_rate": 1.5521472392638037e-05, | |
| "loss": 0.8315, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 0.8214553638409603, | |
| "grad_norm": 6.702762603759766, | |
| "learning_rate": 1.5494798612963457e-05, | |
| "loss": 0.9393, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 0.8252063015753939, | |
| "grad_norm": 9.53264045715332, | |
| "learning_rate": 1.5468124833288876e-05, | |
| "loss": 1.0074, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.8289572393098275, | |
| "grad_norm": 5.6720476150512695, | |
| "learning_rate": 1.54414510536143e-05, | |
| "loss": 0.7935, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 0.832708177044261, | |
| "grad_norm": 7.338003158569336, | |
| "learning_rate": 1.541477727393972e-05, | |
| "loss": 0.898, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 0.8364591147786947, | |
| "grad_norm": 6.529892444610596, | |
| "learning_rate": 1.538810349426514e-05, | |
| "loss": 0.8197, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 0.8402100525131283, | |
| "grad_norm": 9.971487045288086, | |
| "learning_rate": 1.5361429714590558e-05, | |
| "loss": 0.9551, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 0.8439609902475619, | |
| "grad_norm": 5.594128608703613, | |
| "learning_rate": 1.5334755934915978e-05, | |
| "loss": 1.1114, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.8477119279819955, | |
| "grad_norm": 5.723794460296631, | |
| "learning_rate": 1.53080821552414e-05, | |
| "loss": 0.9341, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 0.8514628657164292, | |
| "grad_norm": 5.728211879730225, | |
| "learning_rate": 1.528140837556682e-05, | |
| "loss": 0.9961, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 0.8552138034508627, | |
| "grad_norm": 7.517919063568115, | |
| "learning_rate": 1.525473459589224e-05, | |
| "loss": 0.8542, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 0.8589647411852963, | |
| "grad_norm": 4.70159387588501, | |
| "learning_rate": 1.522806081621766e-05, | |
| "loss": 1.0348, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 0.8627156789197299, | |
| "grad_norm": 5.308437347412109, | |
| "learning_rate": 1.5201387036543081e-05, | |
| "loss": 0.9645, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.8664666166541636, | |
| "grad_norm": 5.659054756164551, | |
| "learning_rate": 1.5174713256868499e-05, | |
| "loss": 0.8317, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 0.8702175543885972, | |
| "grad_norm": 5.970462799072266, | |
| "learning_rate": 1.5148039477193918e-05, | |
| "loss": 0.9889, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 0.8739684921230307, | |
| "grad_norm": 5.605343818664551, | |
| "learning_rate": 1.512136569751934e-05, | |
| "loss": 0.8545, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 0.8777194298574643, | |
| "grad_norm": 9.641878128051758, | |
| "learning_rate": 1.509469191784476e-05, | |
| "loss": 1.0026, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 0.881470367591898, | |
| "grad_norm": 9.36474323272705, | |
| "learning_rate": 1.5068018138170179e-05, | |
| "loss": 0.927, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.8852213053263316, | |
| "grad_norm": 8.28822135925293, | |
| "learning_rate": 1.50413443584956e-05, | |
| "loss": 0.9955, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 0.8889722430607652, | |
| "grad_norm": 7.714781284332275, | |
| "learning_rate": 1.501467057882102e-05, | |
| "loss": 0.9366, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 0.8927231807951987, | |
| "grad_norm": 3.879307508468628, | |
| "learning_rate": 1.498799679914644e-05, | |
| "loss": 0.9002, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 0.8964741185296324, | |
| "grad_norm": 5.898133754730225, | |
| "learning_rate": 1.4961323019471861e-05, | |
| "loss": 0.8564, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 0.900225056264066, | |
| "grad_norm": 6.275933265686035, | |
| "learning_rate": 1.493464923979728e-05, | |
| "loss": 0.9471, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.9039759939984996, | |
| "grad_norm": 6.680263519287109, | |
| "learning_rate": 1.4907975460122702e-05, | |
| "loss": 0.8609, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 0.9077269317329333, | |
| "grad_norm": 7.0698676109313965, | |
| "learning_rate": 1.4881301680448121e-05, | |
| "loss": 0.7758, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 0.9114778694673669, | |
| "grad_norm": 10.66848373413086, | |
| "learning_rate": 1.4854627900773541e-05, | |
| "loss": 0.8225, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 0.9152288072018004, | |
| "grad_norm": 8.714693069458008, | |
| "learning_rate": 1.4827954121098962e-05, | |
| "loss": 0.8777, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 0.918979744936234, | |
| "grad_norm": 31.062232971191406, | |
| "learning_rate": 1.480128034142438e-05, | |
| "loss": 1.0204, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.9227306826706677, | |
| "grad_norm": 11.140453338623047, | |
| "learning_rate": 1.47746065617498e-05, | |
| "loss": 0.9509, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 0.9264816204051013, | |
| "grad_norm": 6.338695526123047, | |
| "learning_rate": 1.4747932782075221e-05, | |
| "loss": 0.8125, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 0.9302325581395349, | |
| "grad_norm": 8.720800399780273, | |
| "learning_rate": 1.472125900240064e-05, | |
| "loss": 0.8114, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 0.9339834958739685, | |
| "grad_norm": 11.407164573669434, | |
| "learning_rate": 1.469458522272606e-05, | |
| "loss": 1.0623, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 0.9377344336084021, | |
| "grad_norm": 6.310417652130127, | |
| "learning_rate": 1.4667911443051482e-05, | |
| "loss": 0.9014, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.9414853713428357, | |
| "grad_norm": 5.94149923324585, | |
| "learning_rate": 1.4641237663376901e-05, | |
| "loss": 0.7657, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 0.9452363090772693, | |
| "grad_norm": 9.478999137878418, | |
| "learning_rate": 1.4614563883702323e-05, | |
| "loss": 0.8412, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 0.9489872468117029, | |
| "grad_norm": 8.735868453979492, | |
| "learning_rate": 1.4587890104027742e-05, | |
| "loss": 0.9043, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 0.9527381845461366, | |
| "grad_norm": 6.766534328460693, | |
| "learning_rate": 1.4561216324353162e-05, | |
| "loss": 0.9538, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 0.9564891222805701, | |
| "grad_norm": 18.577468872070312, | |
| "learning_rate": 1.4534542544678583e-05, | |
| "loss": 0.9458, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.9602400600150037, | |
| "grad_norm": 9.248088836669922, | |
| "learning_rate": 1.4507868765004003e-05, | |
| "loss": 0.913, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 0.9639909977494373, | |
| "grad_norm": 7.771203994750977, | |
| "learning_rate": 1.4481194985329422e-05, | |
| "loss": 0.931, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 0.967741935483871, | |
| "grad_norm": 7.330334663391113, | |
| "learning_rate": 1.4454521205654844e-05, | |
| "loss": 0.9681, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 0.9714928732183046, | |
| "grad_norm": 6.74515438079834, | |
| "learning_rate": 1.4427847425980263e-05, | |
| "loss": 0.9477, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 0.9752438109527382, | |
| "grad_norm": 8.954100608825684, | |
| "learning_rate": 1.4401173646305681e-05, | |
| "loss": 0.8958, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.9789947486871718, | |
| "grad_norm": 11.33262825012207, | |
| "learning_rate": 1.4374499866631103e-05, | |
| "loss": 0.7998, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 0.9827456864216054, | |
| "grad_norm": 7.142065048217773, | |
| "learning_rate": 1.4347826086956522e-05, | |
| "loss": 0.9897, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 0.986496624156039, | |
| "grad_norm": 8.922056198120117, | |
| "learning_rate": 1.4321152307281942e-05, | |
| "loss": 0.9172, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 0.9902475618904726, | |
| "grad_norm": 5.288200378417969, | |
| "learning_rate": 1.4294478527607363e-05, | |
| "loss": 0.8836, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 0.9939984996249063, | |
| "grad_norm": 10.067593574523926, | |
| "learning_rate": 1.4267804747932783e-05, | |
| "loss": 1.0019, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 0.9977494373593399, | |
| "grad_norm": 5.186861515045166, | |
| "learning_rate": 1.4241130968258204e-05, | |
| "loss": 0.8005, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.5890295358649789, | |
| "eval_f1_macro": 0.5855792301386851, | |
| "eval_f1_weighted": 0.5883403945261724, | |
| "eval_loss": 0.9054797887802124, | |
| "eval_precision_macro": 0.5964531108356991, | |
| "eval_precision_weighted": 0.5920764019753845, | |
| "eval_recall_macro": 0.5799936335134275, | |
| "eval_recall_weighted": 0.5890295358649789, | |
| "eval_runtime": 4.8377, | |
| "eval_samples_per_second": 489.903, | |
| "eval_steps_per_second": 61.393, | |
| "step": 2666 | |
| }, | |
| { | |
| "epoch": 1.0015003750937734, | |
| "grad_norm": 7.454843044281006, | |
| "learning_rate": 1.4214457188583624e-05, | |
| "loss": 0.9261, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 1.005251312828207, | |
| "grad_norm": 7.612959384918213, | |
| "learning_rate": 1.4187783408909043e-05, | |
| "loss": 0.8656, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 1.0090022505626406, | |
| "grad_norm": 5.689546585083008, | |
| "learning_rate": 1.4161109629234465e-05, | |
| "loss": 0.8539, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 1.0127531882970742, | |
| "grad_norm": 9.812941551208496, | |
| "learning_rate": 1.4134435849559884e-05, | |
| "loss": 0.8154, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 1.016504126031508, | |
| "grad_norm": 6.9208550453186035, | |
| "learning_rate": 1.4107762069885304e-05, | |
| "loss": 0.8441, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 1.0202550637659416, | |
| "grad_norm": 5.310056686401367, | |
| "learning_rate": 1.4081088290210725e-05, | |
| "loss": 0.9471, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 1.0240060015003751, | |
| "grad_norm": 9.985223770141602, | |
| "learning_rate": 1.4054414510536145e-05, | |
| "loss": 0.853, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 1.0277569392348087, | |
| "grad_norm": 21.524646759033203, | |
| "learning_rate": 1.4027740730861563e-05, | |
| "loss": 0.9408, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 1.0315078769692423, | |
| "grad_norm": 9.250083923339844, | |
| "learning_rate": 1.4001066951186984e-05, | |
| "loss": 0.8023, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 1.0352588147036759, | |
| "grad_norm": 6.028738975524902, | |
| "learning_rate": 1.3974393171512404e-05, | |
| "loss": 0.7849, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 1.0390097524381094, | |
| "grad_norm": 9.787884712219238, | |
| "learning_rate": 1.3947719391837823e-05, | |
| "loss": 0.7474, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 1.042760690172543, | |
| "grad_norm": 12.639663696289062, | |
| "learning_rate": 1.3921045612163244e-05, | |
| "loss": 0.8167, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 1.0465116279069768, | |
| "grad_norm": 15.691644668579102, | |
| "learning_rate": 1.3894371832488664e-05, | |
| "loss": 0.7467, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 1.0502625656414104, | |
| "grad_norm": 7.864928722381592, | |
| "learning_rate": 1.3867698052814085e-05, | |
| "loss": 0.9476, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 1.054013503375844, | |
| "grad_norm": 8.662647247314453, | |
| "learning_rate": 1.3841024273139505e-05, | |
| "loss": 0.8529, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 1.0577644411102776, | |
| "grad_norm": 8.244277954101562, | |
| "learning_rate": 1.3814350493464925e-05, | |
| "loss": 0.753, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 1.0615153788447111, | |
| "grad_norm": 8.806965827941895, | |
| "learning_rate": 1.3787676713790346e-05, | |
| "loss": 0.7577, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 1.0652663165791447, | |
| "grad_norm": 11.864466667175293, | |
| "learning_rate": 1.3761002934115766e-05, | |
| "loss": 0.8227, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 1.0690172543135783, | |
| "grad_norm": 16.477638244628906, | |
| "learning_rate": 1.3734329154441187e-05, | |
| "loss": 0.8603, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 1.072768192048012, | |
| "grad_norm": 10.029014587402344, | |
| "learning_rate": 1.3707655374766607e-05, | |
| "loss": 0.7507, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 1.0765191297824457, | |
| "grad_norm": 42.02882766723633, | |
| "learning_rate": 1.3680981595092026e-05, | |
| "loss": 0.8731, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 1.0802700675168793, | |
| "grad_norm": 11.340489387512207, | |
| "learning_rate": 1.3654307815417447e-05, | |
| "loss": 0.8736, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 1.0840210052513128, | |
| "grad_norm": 10.736079216003418, | |
| "learning_rate": 1.3627634035742865e-05, | |
| "loss": 0.7387, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 1.0877719429857464, | |
| "grad_norm": 12.158968925476074, | |
| "learning_rate": 1.3600960256068285e-05, | |
| "loss": 0.8563, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 1.09152288072018, | |
| "grad_norm": 4.968686103820801, | |
| "learning_rate": 1.3574286476393706e-05, | |
| "loss": 0.8865, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 1.0952738184546136, | |
| "grad_norm": 9.05169677734375, | |
| "learning_rate": 1.3547612696719126e-05, | |
| "loss": 0.9706, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 1.0990247561890472, | |
| "grad_norm": 8.993448257446289, | |
| "learning_rate": 1.3520938917044546e-05, | |
| "loss": 0.7936, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 1.102775693923481, | |
| "grad_norm": 9.852548599243164, | |
| "learning_rate": 1.3494265137369967e-05, | |
| "loss": 0.9188, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 1.1065266316579145, | |
| "grad_norm": 8.509963035583496, | |
| "learning_rate": 1.3467591357695386e-05, | |
| "loss": 0.9182, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 1.1102775693923481, | |
| "grad_norm": 9.74703311920166, | |
| "learning_rate": 1.3440917578020806e-05, | |
| "loss": 0.8979, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 1.1140285071267817, | |
| "grad_norm": 11.76938247680664, | |
| "learning_rate": 1.3414243798346227e-05, | |
| "loss": 0.8431, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 1.1177794448612153, | |
| "grad_norm": 8.194916725158691, | |
| "learning_rate": 1.3387570018671647e-05, | |
| "loss": 0.8794, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 1.1215303825956489, | |
| "grad_norm": 5.259307861328125, | |
| "learning_rate": 1.3360896238997068e-05, | |
| "loss": 0.8688, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 1.1252813203300824, | |
| "grad_norm": 8.892224311828613, | |
| "learning_rate": 1.3334222459322488e-05, | |
| "loss": 0.8924, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 1.129032258064516, | |
| "grad_norm": 10.505491256713867, | |
| "learning_rate": 1.3307548679647908e-05, | |
| "loss": 0.8347, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 1.1327831957989498, | |
| "grad_norm": 4.74807071685791, | |
| "learning_rate": 1.3280874899973329e-05, | |
| "loss": 0.7728, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 1.1365341335333834, | |
| "grad_norm": 12.980900764465332, | |
| "learning_rate": 1.3254201120298747e-05, | |
| "loss": 0.7915, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 1.140285071267817, | |
| "grad_norm": 12.24691104888916, | |
| "learning_rate": 1.3227527340624166e-05, | |
| "loss": 0.8422, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 1.1440360090022506, | |
| "grad_norm": 6.215153217315674, | |
| "learning_rate": 1.3200853560949588e-05, | |
| "loss": 0.8067, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 1.1477869467366841, | |
| "grad_norm": 15.73306941986084, | |
| "learning_rate": 1.3174179781275007e-05, | |
| "loss": 0.8135, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 1.1515378844711177, | |
| "grad_norm": 12.068921089172363, | |
| "learning_rate": 1.3147506001600427e-05, | |
| "loss": 0.7305, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 1.1552888222055513, | |
| "grad_norm": 6.1044464111328125, | |
| "learning_rate": 1.3120832221925848e-05, | |
| "loss": 0.9578, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 1.159039759939985, | |
| "grad_norm": 10.416324615478516, | |
| "learning_rate": 1.3094158442251268e-05, | |
| "loss": 0.8362, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 1.1627906976744187, | |
| "grad_norm": 13.548623085021973, | |
| "learning_rate": 1.3067484662576687e-05, | |
| "loss": 0.7862, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 1.1665416354088523, | |
| "grad_norm": 9.015273094177246, | |
| "learning_rate": 1.3040810882902109e-05, | |
| "loss": 0.8432, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 1.1702925731432858, | |
| "grad_norm": 4.893497467041016, | |
| "learning_rate": 1.3014137103227528e-05, | |
| "loss": 0.719, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 1.1740435108777194, | |
| "grad_norm": 12.783862113952637, | |
| "learning_rate": 1.298746332355295e-05, | |
| "loss": 0.9088, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 1.177794448612153, | |
| "grad_norm": 10.826465606689453, | |
| "learning_rate": 1.296078954387837e-05, | |
| "loss": 0.8758, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 1.1815453863465866, | |
| "grad_norm": 9.32836627960205, | |
| "learning_rate": 1.2934115764203789e-05, | |
| "loss": 0.7643, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 1.1852963240810204, | |
| "grad_norm": 9.504363059997559, | |
| "learning_rate": 1.290744198452921e-05, | |
| "loss": 0.8174, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 1.189047261815454, | |
| "grad_norm": 12.839066505432129, | |
| "learning_rate": 1.2880768204854628e-05, | |
| "loss": 0.7992, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 1.1927981995498875, | |
| "grad_norm": 9.912968635559082, | |
| "learning_rate": 1.2854094425180048e-05, | |
| "loss": 0.7793, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 1.196549137284321, | |
| "grad_norm": 9.632975578308105, | |
| "learning_rate": 1.2827420645505469e-05, | |
| "loss": 0.8062, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 1.2003000750187547, | |
| "grad_norm": 15.091144561767578, | |
| "learning_rate": 1.2800746865830889e-05, | |
| "loss": 0.8319, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 1.2040510127531883, | |
| "grad_norm": 9.834930419921875, | |
| "learning_rate": 1.2774073086156308e-05, | |
| "loss": 0.7946, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 1.2078019504876218, | |
| "grad_norm": 9.097467422485352, | |
| "learning_rate": 1.274739930648173e-05, | |
| "loss": 0.7423, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 1.2115528882220554, | |
| "grad_norm": 7.097741603851318, | |
| "learning_rate": 1.272072552680715e-05, | |
| "loss": 0.7668, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 1.215303825956489, | |
| "grad_norm": 16.66200828552246, | |
| "learning_rate": 1.269405174713257e-05, | |
| "loss": 0.8292, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 1.2190547636909228, | |
| "grad_norm": 4.819615840911865, | |
| "learning_rate": 1.266737796745799e-05, | |
| "loss": 0.7641, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 1.2228057014253564, | |
| "grad_norm": 12.379060745239258, | |
| "learning_rate": 1.264070418778341e-05, | |
| "loss": 0.7749, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 1.22655663915979, | |
| "grad_norm": 10.446650505065918, | |
| "learning_rate": 1.2614030408108831e-05, | |
| "loss": 0.8306, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 1.2303075768942235, | |
| "grad_norm": 13.330952644348145, | |
| "learning_rate": 1.258735662843425e-05, | |
| "loss": 0.7924, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 1.2340585146286571, | |
| "grad_norm": 11.163646697998047, | |
| "learning_rate": 1.256068284875967e-05, | |
| "loss": 0.8505, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 1.2378094523630907, | |
| "grad_norm": 10.235424995422363, | |
| "learning_rate": 1.2534009069085092e-05, | |
| "loss": 0.7111, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 1.2415603900975243, | |
| "grad_norm": 9.529205322265625, | |
| "learning_rate": 1.2507335289410511e-05, | |
| "loss": 0.9559, | |
| "step": 3310 | |
| }, | |
| { | |
| "epoch": 1.245311327831958, | |
| "grad_norm": 9.511346817016602, | |
| "learning_rate": 1.2480661509735929e-05, | |
| "loss": 1.0107, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 1.2490622655663917, | |
| "grad_norm": 5.115582466125488, | |
| "learning_rate": 1.245398773006135e-05, | |
| "loss": 0.8128, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 1.2528132033008252, | |
| "grad_norm": 10.270365715026855, | |
| "learning_rate": 1.242731395038677e-05, | |
| "loss": 0.7537, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 1.2565641410352588, | |
| "grad_norm": 15.309682846069336, | |
| "learning_rate": 1.240064017071219e-05, | |
| "loss": 0.776, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 1.2603150787696924, | |
| "grad_norm": 6.9617414474487305, | |
| "learning_rate": 1.2373966391037611e-05, | |
| "loss": 0.7818, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 1.264066016504126, | |
| "grad_norm": 14.111533164978027, | |
| "learning_rate": 1.234729261136303e-05, | |
| "loss": 0.8766, | |
| "step": 3370 | |
| }, | |
| { | |
| "epoch": 1.2678169542385596, | |
| "grad_norm": 15.513258934020996, | |
| "learning_rate": 1.2320618831688452e-05, | |
| "loss": 0.8124, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 1.2715678919729934, | |
| "grad_norm": 10.617011070251465, | |
| "learning_rate": 1.2293945052013872e-05, | |
| "loss": 0.7367, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 1.275318829707427, | |
| "grad_norm": 10.756956100463867, | |
| "learning_rate": 1.2267271272339291e-05, | |
| "loss": 0.9371, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 1.2790697674418605, | |
| "grad_norm": 20.27239990234375, | |
| "learning_rate": 1.2240597492664712e-05, | |
| "loss": 0.7812, | |
| "step": 3410 | |
| }, | |
| { | |
| "epoch": 1.282820705176294, | |
| "grad_norm": 13.26762580871582, | |
| "learning_rate": 1.2213923712990132e-05, | |
| "loss": 0.9214, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 1.2865716429107277, | |
| "grad_norm": 6.740780830383301, | |
| "learning_rate": 1.2187249933315552e-05, | |
| "loss": 0.7254, | |
| "step": 3430 | |
| }, | |
| { | |
| "epoch": 1.2903225806451613, | |
| "grad_norm": 8.460843086242676, | |
| "learning_rate": 1.2160576153640973e-05, | |
| "loss": 0.8793, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 1.2940735183795948, | |
| "grad_norm": 8.37424373626709, | |
| "learning_rate": 1.2133902373966393e-05, | |
| "loss": 0.7178, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 1.2978244561140286, | |
| "grad_norm": 9.57453441619873, | |
| "learning_rate": 1.210722859429181e-05, | |
| "loss": 0.9584, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 1.301575393848462, | |
| "grad_norm": 15.27446460723877, | |
| "learning_rate": 1.2080554814617232e-05, | |
| "loss": 0.7314, | |
| "step": 3470 | |
| }, | |
| { | |
| "epoch": 1.3053263315828958, | |
| "grad_norm": 16.266162872314453, | |
| "learning_rate": 1.2053881034942651e-05, | |
| "loss": 0.8651, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 1.3090772693173294, | |
| "grad_norm": 9.161102294921875, | |
| "learning_rate": 1.2027207255268071e-05, | |
| "loss": 0.7086, | |
| "step": 3490 | |
| }, | |
| { | |
| "epoch": 1.312828207051763, | |
| "grad_norm": 12.645145416259766, | |
| "learning_rate": 1.2000533475593492e-05, | |
| "loss": 0.874, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 1.3165791447861965, | |
| "grad_norm": 9.018929481506348, | |
| "learning_rate": 1.1973859695918912e-05, | |
| "loss": 0.7457, | |
| "step": 3510 | |
| }, | |
| { | |
| "epoch": 1.32033008252063, | |
| "grad_norm": 10.96903133392334, | |
| "learning_rate": 1.1947185916244333e-05, | |
| "loss": 0.865, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 1.3240810202550637, | |
| "grad_norm": 15.08077621459961, | |
| "learning_rate": 1.1920512136569753e-05, | |
| "loss": 0.8127, | |
| "step": 3530 | |
| }, | |
| { | |
| "epoch": 1.3278319579894973, | |
| "grad_norm": 6.171741962432861, | |
| "learning_rate": 1.1893838356895173e-05, | |
| "loss": 0.7038, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 1.331582895723931, | |
| "grad_norm": 12.167604446411133, | |
| "learning_rate": 1.1867164577220594e-05, | |
| "loss": 0.7373, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 1.3353338334583646, | |
| "grad_norm": 12.859063148498535, | |
| "learning_rate": 1.1840490797546013e-05, | |
| "loss": 0.8292, | |
| "step": 3560 | |
| }, | |
| { | |
| "epoch": 1.3390847711927982, | |
| "grad_norm": 9.17769718170166, | |
| "learning_rate": 1.1813817017871435e-05, | |
| "loss": 0.8117, | |
| "step": 3570 | |
| }, | |
| { | |
| "epoch": 1.3428357089272318, | |
| "grad_norm": 7.380620002746582, | |
| "learning_rate": 1.1787143238196854e-05, | |
| "loss": 0.7943, | |
| "step": 3580 | |
| }, | |
| { | |
| "epoch": 1.3465866466616654, | |
| "grad_norm": 19.143110275268555, | |
| "learning_rate": 1.1760469458522274e-05, | |
| "loss": 0.7798, | |
| "step": 3590 | |
| }, | |
| { | |
| "epoch": 1.350337584396099, | |
| "grad_norm": 14.915560722351074, | |
| "learning_rate": 1.1733795678847695e-05, | |
| "loss": 0.8568, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 1.3540885221305325, | |
| "grad_norm": 16.487377166748047, | |
| "learning_rate": 1.1707121899173113e-05, | |
| "loss": 0.8586, | |
| "step": 3610 | |
| }, | |
| { | |
| "epoch": 1.3578394598649663, | |
| "grad_norm": 9.255929946899414, | |
| "learning_rate": 1.1680448119498533e-05, | |
| "loss": 0.7957, | |
| "step": 3620 | |
| }, | |
| { | |
| "epoch": 1.3615903975994, | |
| "grad_norm": 12.38227653503418, | |
| "learning_rate": 1.1653774339823954e-05, | |
| "loss": 0.7957, | |
| "step": 3630 | |
| }, | |
| { | |
| "epoch": 1.3653413353338335, | |
| "grad_norm": 10.949649810791016, | |
| "learning_rate": 1.1627100560149374e-05, | |
| "loss": 0.6871, | |
| "step": 3640 | |
| }, | |
| { | |
| "epoch": 1.369092273068267, | |
| "grad_norm": 7.265697956085205, | |
| "learning_rate": 1.1600426780474793e-05, | |
| "loss": 0.7341, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 1.3728432108027007, | |
| "grad_norm": 12.582711219787598, | |
| "learning_rate": 1.1573753000800215e-05, | |
| "loss": 0.8242, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 1.3765941485371342, | |
| "grad_norm": 12.345062255859375, | |
| "learning_rate": 1.1547079221125634e-05, | |
| "loss": 0.8768, | |
| "step": 3670 | |
| }, | |
| { | |
| "epoch": 1.3803450862715678, | |
| "grad_norm": 8.697713851928711, | |
| "learning_rate": 1.1520405441451054e-05, | |
| "loss": 0.855, | |
| "step": 3680 | |
| }, | |
| { | |
| "epoch": 1.3840960240060016, | |
| "grad_norm": 9.254758834838867, | |
| "learning_rate": 1.1493731661776475e-05, | |
| "loss": 0.909, | |
| "step": 3690 | |
| }, | |
| { | |
| "epoch": 1.387846961740435, | |
| "grad_norm": 9.739770889282227, | |
| "learning_rate": 1.1467057882101895e-05, | |
| "loss": 0.8582, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 1.3915978994748688, | |
| "grad_norm": 12.004996299743652, | |
| "learning_rate": 1.1440384102427316e-05, | |
| "loss": 0.7344, | |
| "step": 3710 | |
| }, | |
| { | |
| "epoch": 1.3953488372093024, | |
| "grad_norm": 13.092066764831543, | |
| "learning_rate": 1.1413710322752736e-05, | |
| "loss": 0.8916, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 1.399099774943736, | |
| "grad_norm": 12.259298324584961, | |
| "learning_rate": 1.1387036543078155e-05, | |
| "loss": 0.9096, | |
| "step": 3730 | |
| }, | |
| { | |
| "epoch": 1.4028507126781695, | |
| "grad_norm": 8.312166213989258, | |
| "learning_rate": 1.1360362763403577e-05, | |
| "loss": 0.8647, | |
| "step": 3740 | |
| }, | |
| { | |
| "epoch": 1.406601650412603, | |
| "grad_norm": 8.59150218963623, | |
| "learning_rate": 1.1333688983728995e-05, | |
| "loss": 0.9202, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 1.4103525881470367, | |
| "grad_norm": 8.444820404052734, | |
| "learning_rate": 1.1307015204054414e-05, | |
| "loss": 0.7343, | |
| "step": 3760 | |
| }, | |
| { | |
| "epoch": 1.4141035258814703, | |
| "grad_norm": 12.232796669006348, | |
| "learning_rate": 1.1280341424379836e-05, | |
| "loss": 0.7329, | |
| "step": 3770 | |
| }, | |
| { | |
| "epoch": 1.417854463615904, | |
| "grad_norm": 9.038057327270508, | |
| "learning_rate": 1.1253667644705255e-05, | |
| "loss": 0.8751, | |
| "step": 3780 | |
| }, | |
| { | |
| "epoch": 1.4216054013503376, | |
| "grad_norm": 5.729677200317383, | |
| "learning_rate": 1.1226993865030675e-05, | |
| "loss": 0.7319, | |
| "step": 3790 | |
| }, | |
| { | |
| "epoch": 1.4253563390847712, | |
| "grad_norm": 7.777376651763916, | |
| "learning_rate": 1.1200320085356096e-05, | |
| "loss": 0.802, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 1.4291072768192048, | |
| "grad_norm": 13.165481567382812, | |
| "learning_rate": 1.1173646305681516e-05, | |
| "loss": 0.7195, | |
| "step": 3810 | |
| }, | |
| { | |
| "epoch": 1.4328582145536384, | |
| "grad_norm": 10.966960906982422, | |
| "learning_rate": 1.1146972526006935e-05, | |
| "loss": 0.8234, | |
| "step": 3820 | |
| }, | |
| { | |
| "epoch": 1.436609152288072, | |
| "grad_norm": 8.237056732177734, | |
| "learning_rate": 1.1120298746332357e-05, | |
| "loss": 0.7832, | |
| "step": 3830 | |
| }, | |
| { | |
| "epoch": 1.4403600900225055, | |
| "grad_norm": 10.419988632202148, | |
| "learning_rate": 1.1093624966657776e-05, | |
| "loss": 0.8292, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 1.4441110277569393, | |
| "grad_norm": 14.655726432800293, | |
| "learning_rate": 1.1066951186983198e-05, | |
| "loss": 0.8523, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 1.447861965491373, | |
| "grad_norm": 10.38304328918457, | |
| "learning_rate": 1.1040277407308617e-05, | |
| "loss": 0.856, | |
| "step": 3860 | |
| }, | |
| { | |
| "epoch": 1.4516129032258065, | |
| "grad_norm": 13.249422073364258, | |
| "learning_rate": 1.1013603627634037e-05, | |
| "loss": 0.8403, | |
| "step": 3870 | |
| }, | |
| { | |
| "epoch": 1.45536384096024, | |
| "grad_norm": 9.854536056518555, | |
| "learning_rate": 1.0986929847959458e-05, | |
| "loss": 0.794, | |
| "step": 3880 | |
| }, | |
| { | |
| "epoch": 1.4591147786946737, | |
| "grad_norm": 11.48951530456543, | |
| "learning_rate": 1.0960256068284876e-05, | |
| "loss": 0.7569, | |
| "step": 3890 | |
| }, | |
| { | |
| "epoch": 1.4628657164291072, | |
| "grad_norm": 8.955044746398926, | |
| "learning_rate": 1.0933582288610296e-05, | |
| "loss": 0.8064, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 1.4666166541635408, | |
| "grad_norm": 16.088743209838867, | |
| "learning_rate": 1.0906908508935717e-05, | |
| "loss": 0.8518, | |
| "step": 3910 | |
| }, | |
| { | |
| "epoch": 1.4703675918979746, | |
| "grad_norm": 9.207806587219238, | |
| "learning_rate": 1.0880234729261137e-05, | |
| "loss": 0.875, | |
| "step": 3920 | |
| }, | |
| { | |
| "epoch": 1.474118529632408, | |
| "grad_norm": 18.738187789916992, | |
| "learning_rate": 1.0853560949586556e-05, | |
| "loss": 0.8164, | |
| "step": 3930 | |
| }, | |
| { | |
| "epoch": 1.4778694673668418, | |
| "grad_norm": 10.138594627380371, | |
| "learning_rate": 1.0826887169911977e-05, | |
| "loss": 0.791, | |
| "step": 3940 | |
| }, | |
| { | |
| "epoch": 1.4816204051012754, | |
| "grad_norm": 9.635621070861816, | |
| "learning_rate": 1.0800213390237397e-05, | |
| "loss": 0.7878, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 1.485371342835709, | |
| "grad_norm": 9.569879531860352, | |
| "learning_rate": 1.0773539610562818e-05, | |
| "loss": 0.8404, | |
| "step": 3960 | |
| }, | |
| { | |
| "epoch": 1.4891222805701425, | |
| "grad_norm": 9.855542182922363, | |
| "learning_rate": 1.0746865830888238e-05, | |
| "loss": 0.8726, | |
| "step": 3970 | |
| }, | |
| { | |
| "epoch": 1.492873218304576, | |
| "grad_norm": 16.710786819458008, | |
| "learning_rate": 1.0720192051213658e-05, | |
| "loss": 0.8706, | |
| "step": 3980 | |
| }, | |
| { | |
| "epoch": 1.49662415603901, | |
| "grad_norm": 13.603216171264648, | |
| "learning_rate": 1.0693518271539079e-05, | |
| "loss": 0.8437, | |
| "step": 3990 | |
| }, | |
| { | |
| "epoch": 1.5003750937734432, | |
| "grad_norm": 11.3872652053833, | |
| "learning_rate": 1.0666844491864499e-05, | |
| "loss": 0.6512, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 1.504126031507877, | |
| "grad_norm": 10.2975492477417, | |
| "learning_rate": 1.0640170712189918e-05, | |
| "loss": 0.8774, | |
| "step": 4010 | |
| }, | |
| { | |
| "epoch": 1.5078769692423106, | |
| "grad_norm": 7.741751194000244, | |
| "learning_rate": 1.061349693251534e-05, | |
| "loss": 0.7528, | |
| "step": 4020 | |
| }, | |
| { | |
| "epoch": 1.5116279069767442, | |
| "grad_norm": 9.902315139770508, | |
| "learning_rate": 1.0586823152840759e-05, | |
| "loss": 0.7995, | |
| "step": 4030 | |
| }, | |
| { | |
| "epoch": 1.5153788447111778, | |
| "grad_norm": 11.541082382202148, | |
| "learning_rate": 1.0560149373166177e-05, | |
| "loss": 0.7694, | |
| "step": 4040 | |
| }, | |
| { | |
| "epoch": 1.5191297824456114, | |
| "grad_norm": 8.56485366821289, | |
| "learning_rate": 1.0533475593491598e-05, | |
| "loss": 0.8002, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 1.5228807201800452, | |
| "grad_norm": 8.866626739501953, | |
| "learning_rate": 1.0506801813817018e-05, | |
| "loss": 0.792, | |
| "step": 4060 | |
| }, | |
| { | |
| "epoch": 1.5266316579144785, | |
| "grad_norm": 10.332854270935059, | |
| "learning_rate": 1.0480128034142438e-05, | |
| "loss": 0.7378, | |
| "step": 4070 | |
| }, | |
| { | |
| "epoch": 1.5303825956489123, | |
| "grad_norm": 8.805913925170898, | |
| "learning_rate": 1.0453454254467859e-05, | |
| "loss": 0.8287, | |
| "step": 4080 | |
| }, | |
| { | |
| "epoch": 1.5341335333833457, | |
| "grad_norm": 10.885342597961426, | |
| "learning_rate": 1.0426780474793278e-05, | |
| "loss": 0.8454, | |
| "step": 4090 | |
| }, | |
| { | |
| "epoch": 1.5378844711177795, | |
| "grad_norm": 11.047041893005371, | |
| "learning_rate": 1.04001066951187e-05, | |
| "loss": 0.8955, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 1.541635408852213, | |
| "grad_norm": 12.287060737609863, | |
| "learning_rate": 1.037343291544412e-05, | |
| "loss": 0.9106, | |
| "step": 4110 | |
| }, | |
| { | |
| "epoch": 1.5453863465866466, | |
| "grad_norm": 7.6913628578186035, | |
| "learning_rate": 1.0346759135769539e-05, | |
| "loss": 0.8287, | |
| "step": 4120 | |
| }, | |
| { | |
| "epoch": 1.5491372843210802, | |
| "grad_norm": 12.864625930786133, | |
| "learning_rate": 1.032008535609496e-05, | |
| "loss": 0.8176, | |
| "step": 4130 | |
| }, | |
| { | |
| "epoch": 1.5528882220555138, | |
| "grad_norm": 17.12616539001465, | |
| "learning_rate": 1.029341157642038e-05, | |
| "loss": 0.8964, | |
| "step": 4140 | |
| }, | |
| { | |
| "epoch": 1.5566391597899476, | |
| "grad_norm": 9.076611518859863, | |
| "learning_rate": 1.02667377967458e-05, | |
| "loss": 0.8222, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 1.560390097524381, | |
| "grad_norm": 9.327693939208984, | |
| "learning_rate": 1.0240064017071221e-05, | |
| "loss": 0.9358, | |
| "step": 4160 | |
| }, | |
| { | |
| "epoch": 1.5641410352588148, | |
| "grad_norm": 7.653916358947754, | |
| "learning_rate": 1.021339023739664e-05, | |
| "loss": 0.7721, | |
| "step": 4170 | |
| }, | |
| { | |
| "epoch": 1.5678919729932483, | |
| "grad_norm": 10.110307693481445, | |
| "learning_rate": 1.0186716457722058e-05, | |
| "loss": 0.9346, | |
| "step": 4180 | |
| }, | |
| { | |
| "epoch": 1.571642910727682, | |
| "grad_norm": 11.298696517944336, | |
| "learning_rate": 1.016004267804748e-05, | |
| "loss": 0.8456, | |
| "step": 4190 | |
| }, | |
| { | |
| "epoch": 1.5753938484621155, | |
| "grad_norm": 13.459417343139648, | |
| "learning_rate": 1.01333688983729e-05, | |
| "loss": 0.7815, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 1.579144786196549, | |
| "grad_norm": 16.08547592163086, | |
| "learning_rate": 1.0106695118698319e-05, | |
| "loss": 0.7656, | |
| "step": 4210 | |
| }, | |
| { | |
| "epoch": 1.5828957239309829, | |
| "grad_norm": 8.995433807373047, | |
| "learning_rate": 1.008002133902374e-05, | |
| "loss": 0.8248, | |
| "step": 4220 | |
| }, | |
| { | |
| "epoch": 1.5866466616654162, | |
| "grad_norm": 10.426254272460938, | |
| "learning_rate": 1.005334755934916e-05, | |
| "loss": 0.7957, | |
| "step": 4230 | |
| }, | |
| { | |
| "epoch": 1.59039759939985, | |
| "grad_norm": 8.310003280639648, | |
| "learning_rate": 1.0026673779674581e-05, | |
| "loss": 0.8313, | |
| "step": 4240 | |
| }, | |
| { | |
| "epoch": 1.5941485371342836, | |
| "grad_norm": 14.415204048156738, | |
| "learning_rate": 1e-05, | |
| "loss": 0.7711, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 1.5978994748687172, | |
| "grad_norm": 8.948083877563477, | |
| "learning_rate": 9.97332622032542e-06, | |
| "loss": 0.7868, | |
| "step": 4260 | |
| }, | |
| { | |
| "epoch": 1.6016504126031508, | |
| "grad_norm": 16.681766510009766, | |
| "learning_rate": 9.946652440650842e-06, | |
| "loss": 0.8633, | |
| "step": 4270 | |
| }, | |
| { | |
| "epoch": 1.6054013503375844, | |
| "grad_norm": 11.883402824401855, | |
| "learning_rate": 9.919978660976261e-06, | |
| "loss": 0.8195, | |
| "step": 4280 | |
| }, | |
| { | |
| "epoch": 1.6091522880720182, | |
| "grad_norm": 11.386548042297363, | |
| "learning_rate": 9.893304881301681e-06, | |
| "loss": 0.7621, | |
| "step": 4290 | |
| }, | |
| { | |
| "epoch": 1.6129032258064515, | |
| "grad_norm": 13.255663871765137, | |
| "learning_rate": 9.8666311016271e-06, | |
| "loss": 1.0233, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 1.6166541635408853, | |
| "grad_norm": 10.955714225769043, | |
| "learning_rate": 9.839957321952522e-06, | |
| "loss": 0.9456, | |
| "step": 4310 | |
| }, | |
| { | |
| "epoch": 1.6204051012753187, | |
| "grad_norm": 7.624833583831787, | |
| "learning_rate": 9.813283542277942e-06, | |
| "loss": 0.9029, | |
| "step": 4320 | |
| }, | |
| { | |
| "epoch": 1.6241560390097525, | |
| "grad_norm": 8.860147476196289, | |
| "learning_rate": 9.786609762603361e-06, | |
| "loss": 0.835, | |
| "step": 4330 | |
| }, | |
| { | |
| "epoch": 1.627906976744186, | |
| "grad_norm": 13.29971981048584, | |
| "learning_rate": 9.759935982928782e-06, | |
| "loss": 0.848, | |
| "step": 4340 | |
| }, | |
| { | |
| "epoch": 1.6316579144786196, | |
| "grad_norm": 10.151264190673828, | |
| "learning_rate": 9.733262203254202e-06, | |
| "loss": 0.7443, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 1.6354088522130532, | |
| "grad_norm": 14.21789264678955, | |
| "learning_rate": 9.706588423579622e-06, | |
| "loss": 0.908, | |
| "step": 4360 | |
| }, | |
| { | |
| "epoch": 1.6391597899474868, | |
| "grad_norm": 7.94905424118042, | |
| "learning_rate": 9.679914643905041e-06, | |
| "loss": 0.6919, | |
| "step": 4370 | |
| }, | |
| { | |
| "epoch": 1.6429107276819206, | |
| "grad_norm": 8.60908031463623, | |
| "learning_rate": 9.653240864230463e-06, | |
| "loss": 0.7309, | |
| "step": 4380 | |
| }, | |
| { | |
| "epoch": 1.646661665416354, | |
| "grad_norm": 15.03842544555664, | |
| "learning_rate": 9.626567084555882e-06, | |
| "loss": 0.9343, | |
| "step": 4390 | |
| }, | |
| { | |
| "epoch": 1.6504126031507877, | |
| "grad_norm": 11.684754371643066, | |
| "learning_rate": 9.599893304881302e-06, | |
| "loss": 0.7532, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 1.6541635408852213, | |
| "grad_norm": 6.24261999130249, | |
| "learning_rate": 9.573219525206723e-06, | |
| "loss": 0.8449, | |
| "step": 4410 | |
| }, | |
| { | |
| "epoch": 1.657914478619655, | |
| "grad_norm": 5.580635070800781, | |
| "learning_rate": 9.546545745532143e-06, | |
| "loss": 0.644, | |
| "step": 4420 | |
| }, | |
| { | |
| "epoch": 1.6616654163540885, | |
| "grad_norm": 13.382287979125977, | |
| "learning_rate": 9.519871965857564e-06, | |
| "loss": 0.8713, | |
| "step": 4430 | |
| }, | |
| { | |
| "epoch": 1.665416354088522, | |
| "grad_norm": 11.218451499938965, | |
| "learning_rate": 9.493198186182982e-06, | |
| "loss": 0.6552, | |
| "step": 4440 | |
| }, | |
| { | |
| "epoch": 1.6691672918229559, | |
| "grad_norm": 16.548782348632812, | |
| "learning_rate": 9.466524406508403e-06, | |
| "loss": 0.8024, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 1.6729182295573892, | |
| "grad_norm": 17.210647583007812, | |
| "learning_rate": 9.439850626833823e-06, | |
| "loss": 0.7543, | |
| "step": 4460 | |
| }, | |
| { | |
| "epoch": 1.676669167291823, | |
| "grad_norm": 13.630977630615234, | |
| "learning_rate": 9.413176847159243e-06, | |
| "loss": 0.8754, | |
| "step": 4470 | |
| }, | |
| { | |
| "epoch": 1.6804201050262566, | |
| "grad_norm": 13.967558860778809, | |
| "learning_rate": 9.386503067484664e-06, | |
| "loss": 0.7699, | |
| "step": 4480 | |
| }, | |
| { | |
| "epoch": 1.6841710427606902, | |
| "grad_norm": 11.707578659057617, | |
| "learning_rate": 9.359829287810083e-06, | |
| "loss": 0.8324, | |
| "step": 4490 | |
| }, | |
| { | |
| "epoch": 1.6879219804951238, | |
| "grad_norm": 9.124420166015625, | |
| "learning_rate": 9.333155508135505e-06, | |
| "loss": 0.794, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 1.6916729182295573, | |
| "grad_norm": 10.910788536071777, | |
| "learning_rate": 9.306481728460923e-06, | |
| "loss": 0.8241, | |
| "step": 4510 | |
| }, | |
| { | |
| "epoch": 1.6954238559639911, | |
| "grad_norm": 13.6180419921875, | |
| "learning_rate": 9.279807948786344e-06, | |
| "loss": 0.8882, | |
| "step": 4520 | |
| }, | |
| { | |
| "epoch": 1.6991747936984245, | |
| "grad_norm": 7.055276393890381, | |
| "learning_rate": 9.253134169111764e-06, | |
| "loss": 0.9011, | |
| "step": 4530 | |
| }, | |
| { | |
| "epoch": 1.7029257314328583, | |
| "grad_norm": 14.100971221923828, | |
| "learning_rate": 9.226460389437183e-06, | |
| "loss": 0.8026, | |
| "step": 4540 | |
| }, | |
| { | |
| "epoch": 1.7066766691672917, | |
| "grad_norm": 6.9184441566467285, | |
| "learning_rate": 9.199786609762605e-06, | |
| "loss": 0.6888, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 1.7104276069017255, | |
| "grad_norm": 9.915225982666016, | |
| "learning_rate": 9.173112830088024e-06, | |
| "loss": 0.8456, | |
| "step": 4560 | |
| }, | |
| { | |
| "epoch": 1.714178544636159, | |
| "grad_norm": 11.1101655960083, | |
| "learning_rate": 9.146439050413445e-06, | |
| "loss": 0.8979, | |
| "step": 4570 | |
| }, | |
| { | |
| "epoch": 1.7179294823705926, | |
| "grad_norm": 11.128944396972656, | |
| "learning_rate": 9.119765270738863e-06, | |
| "loss": 0.8386, | |
| "step": 4580 | |
| }, | |
| { | |
| "epoch": 1.7216804201050264, | |
| "grad_norm": 8.845916748046875, | |
| "learning_rate": 9.093091491064285e-06, | |
| "loss": 0.8375, | |
| "step": 4590 | |
| }, | |
| { | |
| "epoch": 1.7254313578394598, | |
| "grad_norm": 12.3989839553833, | |
| "learning_rate": 9.066417711389704e-06, | |
| "loss": 0.7884, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 1.7291822955738936, | |
| "grad_norm": 8.899964332580566, | |
| "learning_rate": 9.039743931715126e-06, | |
| "loss": 0.8391, | |
| "step": 4610 | |
| }, | |
| { | |
| "epoch": 1.732933233308327, | |
| "grad_norm": 11.830737113952637, | |
| "learning_rate": 9.013070152040545e-06, | |
| "loss": 0.836, | |
| "step": 4620 | |
| }, | |
| { | |
| "epoch": 1.7366841710427607, | |
| "grad_norm": 14.875555038452148, | |
| "learning_rate": 8.986396372365965e-06, | |
| "loss": 0.8148, | |
| "step": 4630 | |
| }, | |
| { | |
| "epoch": 1.7404351087771943, | |
| "grad_norm": 8.44090461730957, | |
| "learning_rate": 8.959722592691386e-06, | |
| "loss": 0.7033, | |
| "step": 4640 | |
| }, | |
| { | |
| "epoch": 1.744186046511628, | |
| "grad_norm": 7.954046726226807, | |
| "learning_rate": 8.933048813016804e-06, | |
| "loss": 0.8364, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 1.7479369842460615, | |
| "grad_norm": 14.886021614074707, | |
| "learning_rate": 8.906375033342225e-06, | |
| "loss": 0.7641, | |
| "step": 4660 | |
| }, | |
| { | |
| "epoch": 1.751687921980495, | |
| "grad_norm": 15.42341136932373, | |
| "learning_rate": 8.879701253667645e-06, | |
| "loss": 0.7152, | |
| "step": 4670 | |
| }, | |
| { | |
| "epoch": 1.7554388597149289, | |
| "grad_norm": 18.62801742553711, | |
| "learning_rate": 8.853027473993066e-06, | |
| "loss": 0.9192, | |
| "step": 4680 | |
| }, | |
| { | |
| "epoch": 1.7591897974493622, | |
| "grad_norm": 9.787707328796387, | |
| "learning_rate": 8.826353694318486e-06, | |
| "loss": 1.0479, | |
| "step": 4690 | |
| }, | |
| { | |
| "epoch": 1.762940735183796, | |
| "grad_norm": 10.803950309753418, | |
| "learning_rate": 8.799679914643906e-06, | |
| "loss": 0.7217, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 1.7666916729182296, | |
| "grad_norm": 5.519962787628174, | |
| "learning_rate": 8.773006134969327e-06, | |
| "loss": 0.8044, | |
| "step": 4710 | |
| }, | |
| { | |
| "epoch": 1.7704426106526632, | |
| "grad_norm": 10.77694320678711, | |
| "learning_rate": 8.746332355294745e-06, | |
| "loss": 0.7759, | |
| "step": 4720 | |
| }, | |
| { | |
| "epoch": 1.7741935483870968, | |
| "grad_norm": 8.671502113342285, | |
| "learning_rate": 8.719658575620166e-06, | |
| "loss": 0.834, | |
| "step": 4730 | |
| }, | |
| { | |
| "epoch": 1.7779444861215303, | |
| "grad_norm": 10.25809097290039, | |
| "learning_rate": 8.692984795945586e-06, | |
| "loss": 0.7088, | |
| "step": 4740 | |
| }, | |
| { | |
| "epoch": 1.7816954238559641, | |
| "grad_norm": 11.049978256225586, | |
| "learning_rate": 8.666311016271007e-06, | |
| "loss": 0.8777, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 1.7854463615903975, | |
| "grad_norm": 6.090721130371094, | |
| "learning_rate": 8.639637236596427e-06, | |
| "loss": 0.8311, | |
| "step": 4760 | |
| }, | |
| { | |
| "epoch": 1.7891972993248313, | |
| "grad_norm": 7.393324375152588, | |
| "learning_rate": 8.612963456921846e-06, | |
| "loss": 0.7623, | |
| "step": 4770 | |
| }, | |
| { | |
| "epoch": 1.7929482370592649, | |
| "grad_norm": 9.985932350158691, | |
| "learning_rate": 8.586289677247268e-06, | |
| "loss": 0.7164, | |
| "step": 4780 | |
| }, | |
| { | |
| "epoch": 1.7966991747936985, | |
| "grad_norm": 23.15224266052246, | |
| "learning_rate": 8.559615897572687e-06, | |
| "loss": 0.8139, | |
| "step": 4790 | |
| }, | |
| { | |
| "epoch": 1.800450112528132, | |
| "grad_norm": 15.539804458618164, | |
| "learning_rate": 8.532942117898107e-06, | |
| "loss": 0.9038, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 1.8042010502625656, | |
| "grad_norm": 20.424936294555664, | |
| "learning_rate": 8.506268338223526e-06, | |
| "loss": 0.9228, | |
| "step": 4810 | |
| }, | |
| { | |
| "epoch": 1.8079519879969994, | |
| "grad_norm": 12.960927963256836, | |
| "learning_rate": 8.479594558548948e-06, | |
| "loss": 0.8129, | |
| "step": 4820 | |
| }, | |
| { | |
| "epoch": 1.8117029257314328, | |
| "grad_norm": 12.578907012939453, | |
| "learning_rate": 8.452920778874367e-06, | |
| "loss": 0.7919, | |
| "step": 4830 | |
| }, | |
| { | |
| "epoch": 1.8154538634658666, | |
| "grad_norm": 9.88344955444336, | |
| "learning_rate": 8.426246999199787e-06, | |
| "loss": 0.8888, | |
| "step": 4840 | |
| }, | |
| { | |
| "epoch": 1.8192048012003, | |
| "grad_norm": 9.531432151794434, | |
| "learning_rate": 8.399573219525208e-06, | |
| "loss": 0.8074, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 1.8229557389347337, | |
| "grad_norm": 10.701923370361328, | |
| "learning_rate": 8.372899439850628e-06, | |
| "loss": 0.8598, | |
| "step": 4860 | |
| }, | |
| { | |
| "epoch": 1.8267066766691673, | |
| "grad_norm": 10.894915580749512, | |
| "learning_rate": 8.346225660176047e-06, | |
| "loss": 0.6588, | |
| "step": 4870 | |
| }, | |
| { | |
| "epoch": 1.8304576144036009, | |
| "grad_norm": 9.2036714553833, | |
| "learning_rate": 8.319551880501467e-06, | |
| "loss": 0.8323, | |
| "step": 4880 | |
| }, | |
| { | |
| "epoch": 1.8342085521380345, | |
| "grad_norm": 8.6634521484375, | |
| "learning_rate": 8.292878100826888e-06, | |
| "loss": 0.7526, | |
| "step": 4890 | |
| }, | |
| { | |
| "epoch": 1.837959489872468, | |
| "grad_norm": 14.781025886535645, | |
| "learning_rate": 8.266204321152308e-06, | |
| "loss": 0.6999, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 1.8417104276069018, | |
| "grad_norm": 12.273209571838379, | |
| "learning_rate": 8.239530541477728e-06, | |
| "loss": 0.6734, | |
| "step": 4910 | |
| }, | |
| { | |
| "epoch": 1.8454613653413352, | |
| "grad_norm": 11.974825859069824, | |
| "learning_rate": 8.212856761803149e-06, | |
| "loss": 0.7195, | |
| "step": 4920 | |
| }, | |
| { | |
| "epoch": 1.849212303075769, | |
| "grad_norm": 12.195642471313477, | |
| "learning_rate": 8.186182982128569e-06, | |
| "loss": 0.8301, | |
| "step": 4930 | |
| }, | |
| { | |
| "epoch": 1.8529632408102026, | |
| "grad_norm": 6.2414751052856445, | |
| "learning_rate": 8.159509202453988e-06, | |
| "loss": 0.8528, | |
| "step": 4940 | |
| }, | |
| { | |
| "epoch": 1.8567141785446362, | |
| "grad_norm": 9.026991844177246, | |
| "learning_rate": 8.132835422779408e-06, | |
| "loss": 0.8165, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 1.8604651162790697, | |
| "grad_norm": 13.745824813842773, | |
| "learning_rate": 8.106161643104829e-06, | |
| "loss": 0.9866, | |
| "step": 4960 | |
| }, | |
| { | |
| "epoch": 1.8642160540135033, | |
| "grad_norm": 8.861783027648926, | |
| "learning_rate": 8.079487863430249e-06, | |
| "loss": 0.9738, | |
| "step": 4970 | |
| }, | |
| { | |
| "epoch": 1.8679669917479371, | |
| "grad_norm": 7.437354564666748, | |
| "learning_rate": 8.052814083755668e-06, | |
| "loss": 0.7223, | |
| "step": 4980 | |
| }, | |
| { | |
| "epoch": 1.8717179294823705, | |
| "grad_norm": 14.148890495300293, | |
| "learning_rate": 8.02614030408109e-06, | |
| "loss": 0.8356, | |
| "step": 4990 | |
| }, | |
| { | |
| "epoch": 1.8754688672168043, | |
| "grad_norm": 13.688013076782227, | |
| "learning_rate": 7.99946652440651e-06, | |
| "loss": 0.8949, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 1.8792198049512379, | |
| "grad_norm": 16.709125518798828, | |
| "learning_rate": 7.972792744731929e-06, | |
| "loss": 0.8775, | |
| "step": 5010 | |
| }, | |
| { | |
| "epoch": 1.8829707426856714, | |
| "grad_norm": 9.73661994934082, | |
| "learning_rate": 7.946118965057348e-06, | |
| "loss": 0.832, | |
| "step": 5020 | |
| }, | |
| { | |
| "epoch": 1.886721680420105, | |
| "grad_norm": 10.575983047485352, | |
| "learning_rate": 7.91944518538277e-06, | |
| "loss": 0.7976, | |
| "step": 5030 | |
| }, | |
| { | |
| "epoch": 1.8904726181545386, | |
| "grad_norm": 9.284303665161133, | |
| "learning_rate": 7.89277140570819e-06, | |
| "loss": 0.9656, | |
| "step": 5040 | |
| }, | |
| { | |
| "epoch": 1.8942235558889724, | |
| "grad_norm": 6.543034553527832, | |
| "learning_rate": 7.866097626033609e-06, | |
| "loss": 0.7157, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 1.8979744936234058, | |
| "grad_norm": 5.064873218536377, | |
| "learning_rate": 7.83942384635903e-06, | |
| "loss": 0.7334, | |
| "step": 5060 | |
| }, | |
| { | |
| "epoch": 1.9017254313578396, | |
| "grad_norm": 16.654563903808594, | |
| "learning_rate": 7.81275006668445e-06, | |
| "loss": 0.7677, | |
| "step": 5070 | |
| }, | |
| { | |
| "epoch": 1.905476369092273, | |
| "grad_norm": 20.614212036132812, | |
| "learning_rate": 7.78607628700987e-06, | |
| "loss": 0.8525, | |
| "step": 5080 | |
| }, | |
| { | |
| "epoch": 1.9092273068267067, | |
| "grad_norm": 13.709310531616211, | |
| "learning_rate": 7.75940250733529e-06, | |
| "loss": 0.7237, | |
| "step": 5090 | |
| }, | |
| { | |
| "epoch": 1.9129782445611403, | |
| "grad_norm": 17.662317276000977, | |
| "learning_rate": 7.73272872766071e-06, | |
| "loss": 0.8529, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 1.9167291822955739, | |
| "grad_norm": 9.610177040100098, | |
| "learning_rate": 7.70605494798613e-06, | |
| "loss": 0.9447, | |
| "step": 5110 | |
| }, | |
| { | |
| "epoch": 1.9204801200300075, | |
| "grad_norm": 19.19601821899414, | |
| "learning_rate": 7.67938116831155e-06, | |
| "loss": 0.8738, | |
| "step": 5120 | |
| }, | |
| { | |
| "epoch": 1.924231057764441, | |
| "grad_norm": 8.228813171386719, | |
| "learning_rate": 7.652707388636971e-06, | |
| "loss": 0.8096, | |
| "step": 5130 | |
| }, | |
| { | |
| "epoch": 1.9279819954988748, | |
| "grad_norm": 14.475564956665039, | |
| "learning_rate": 7.626033608962391e-06, | |
| "loss": 0.7235, | |
| "step": 5140 | |
| }, | |
| { | |
| "epoch": 1.9317329332333082, | |
| "grad_norm": 17.313648223876953, | |
| "learning_rate": 7.599359829287811e-06, | |
| "loss": 0.7778, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 1.935483870967742, | |
| "grad_norm": 6.775811672210693, | |
| "learning_rate": 7.572686049613231e-06, | |
| "loss": 0.7627, | |
| "step": 5160 | |
| }, | |
| { | |
| "epoch": 1.9392348087021756, | |
| "grad_norm": 11.815681457519531, | |
| "learning_rate": 7.54601226993865e-06, | |
| "loss": 0.8978, | |
| "step": 5170 | |
| }, | |
| { | |
| "epoch": 1.9429857464366092, | |
| "grad_norm": 13.653975486755371, | |
| "learning_rate": 7.519338490264071e-06, | |
| "loss": 0.7364, | |
| "step": 5180 | |
| }, | |
| { | |
| "epoch": 1.9467366841710427, | |
| "grad_norm": 9.049905776977539, | |
| "learning_rate": 7.492664710589491e-06, | |
| "loss": 0.8631, | |
| "step": 5190 | |
| }, | |
| { | |
| "epoch": 1.9504876219054763, | |
| "grad_norm": 14.149343490600586, | |
| "learning_rate": 7.465990930914912e-06, | |
| "loss": 0.8279, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 1.9542385596399101, | |
| "grad_norm": 15.612215995788574, | |
| "learning_rate": 7.439317151240331e-06, | |
| "loss": 0.9058, | |
| "step": 5210 | |
| }, | |
| { | |
| "epoch": 1.9579894973743435, | |
| "grad_norm": 11.682372093200684, | |
| "learning_rate": 7.412643371565752e-06, | |
| "loss": 0.8859, | |
| "step": 5220 | |
| }, | |
| { | |
| "epoch": 1.9617404351087773, | |
| "grad_norm": 9.87074089050293, | |
| "learning_rate": 7.385969591891171e-06, | |
| "loss": 0.8733, | |
| "step": 5230 | |
| }, | |
| { | |
| "epoch": 1.9654913728432108, | |
| "grad_norm": 9.963356971740723, | |
| "learning_rate": 7.359295812216591e-06, | |
| "loss": 0.7134, | |
| "step": 5240 | |
| }, | |
| { | |
| "epoch": 1.9692423105776444, | |
| "grad_norm": 4.6800537109375, | |
| "learning_rate": 7.3326220325420115e-06, | |
| "loss": 0.7594, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 1.972993248312078, | |
| "grad_norm": 13.148963928222656, | |
| "learning_rate": 7.305948252867432e-06, | |
| "loss": 0.947, | |
| "step": 5260 | |
| }, | |
| { | |
| "epoch": 1.9767441860465116, | |
| "grad_norm": 10.073929786682129, | |
| "learning_rate": 7.279274473192852e-06, | |
| "loss": 0.8769, | |
| "step": 5270 | |
| }, | |
| { | |
| "epoch": 1.9804951237809454, | |
| "grad_norm": 11.67326831817627, | |
| "learning_rate": 7.252600693518272e-06, | |
| "loss": 0.7545, | |
| "step": 5280 | |
| }, | |
| { | |
| "epoch": 1.9842460615153787, | |
| "grad_norm": 7.498824119567871, | |
| "learning_rate": 7.2259269138436925e-06, | |
| "loss": 0.7997, | |
| "step": 5290 | |
| }, | |
| { | |
| "epoch": 1.9879969992498125, | |
| "grad_norm": 9.357927322387695, | |
| "learning_rate": 7.199253134169112e-06, | |
| "loss": 0.8754, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 1.991747936984246, | |
| "grad_norm": 12.50817584991455, | |
| "learning_rate": 7.172579354494532e-06, | |
| "loss": 0.79, | |
| "step": 5310 | |
| }, | |
| { | |
| "epoch": 1.9954988747186797, | |
| "grad_norm": 14.613991737365723, | |
| "learning_rate": 7.145905574819952e-06, | |
| "loss": 0.8005, | |
| "step": 5320 | |
| }, | |
| { | |
| "epoch": 1.9992498124531133, | |
| "grad_norm": 9.007129669189453, | |
| "learning_rate": 7.119231795145373e-06, | |
| "loss": 0.9009, | |
| "step": 5330 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.580168776371308, | |
| "eval_f1_macro": 0.5788189436128865, | |
| "eval_f1_weighted": 0.5800618837244829, | |
| "eval_loss": 0.9064968228340149, | |
| "eval_precision_macro": 0.5789782500874713, | |
| "eval_precision_weighted": 0.5804785651892536, | |
| "eval_recall_macro": 0.5792145494510413, | |
| "eval_recall_weighted": 0.580168776371308, | |
| "eval_runtime": 4.8637, | |
| "eval_samples_per_second": 487.284, | |
| "eval_steps_per_second": 61.065, | |
| "step": 5332 | |
| }, | |
| { | |
| "epoch": 2.003000750187547, | |
| "grad_norm": 10.368429183959961, | |
| "learning_rate": 7.092558015470793e-06, | |
| "loss": 0.6842, | |
| "step": 5340 | |
| }, | |
| { | |
| "epoch": 2.0067516879219807, | |
| "grad_norm": 10.329928398132324, | |
| "learning_rate": 7.065884235796214e-06, | |
| "loss": 0.7709, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 2.010502625656414, | |
| "grad_norm": 13.128575325012207, | |
| "learning_rate": 7.039210456121633e-06, | |
| "loss": 0.7054, | |
| "step": 5360 | |
| }, | |
| { | |
| "epoch": 2.014253563390848, | |
| "grad_norm": 10.884894371032715, | |
| "learning_rate": 7.012536676447053e-06, | |
| "loss": 0.8103, | |
| "step": 5370 | |
| }, | |
| { | |
| "epoch": 2.018004501125281, | |
| "grad_norm": 17.327537536621094, | |
| "learning_rate": 6.985862896772473e-06, | |
| "loss": 0.6551, | |
| "step": 5380 | |
| }, | |
| { | |
| "epoch": 2.021755438859715, | |
| "grad_norm": 9.725515365600586, | |
| "learning_rate": 6.959189117097893e-06, | |
| "loss": 0.6534, | |
| "step": 5390 | |
| }, | |
| { | |
| "epoch": 2.0255063765941483, | |
| "grad_norm": 9.302525520324707, | |
| "learning_rate": 6.932515337423313e-06, | |
| "loss": 0.6741, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 2.029257314328582, | |
| "grad_norm": 12.362338066101074, | |
| "learning_rate": 6.905841557748734e-06, | |
| "loss": 0.7026, | |
| "step": 5410 | |
| }, | |
| { | |
| "epoch": 2.033008252063016, | |
| "grad_norm": 7.654306411743164, | |
| "learning_rate": 6.879167778074154e-06, | |
| "loss": 0.5962, | |
| "step": 5420 | |
| }, | |
| { | |
| "epoch": 2.0367591897974493, | |
| "grad_norm": 14.547067642211914, | |
| "learning_rate": 6.852493998399574e-06, | |
| "loss": 0.5578, | |
| "step": 5430 | |
| }, | |
| { | |
| "epoch": 2.040510127531883, | |
| "grad_norm": 12.792427062988281, | |
| "learning_rate": 6.8258202187249935e-06, | |
| "loss": 0.7636, | |
| "step": 5440 | |
| }, | |
| { | |
| "epoch": 2.0442610652663165, | |
| "grad_norm": 8.322968482971191, | |
| "learning_rate": 6.799146439050414e-06, | |
| "loss": 0.5881, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 2.0480120030007503, | |
| "grad_norm": 14.064526557922363, | |
| "learning_rate": 6.772472659375834e-06, | |
| "loss": 0.6907, | |
| "step": 5460 | |
| }, | |
| { | |
| "epoch": 2.0517629407351836, | |
| "grad_norm": 11.318249702453613, | |
| "learning_rate": 6.745798879701254e-06, | |
| "loss": 0.6179, | |
| "step": 5470 | |
| }, | |
| { | |
| "epoch": 2.0555138784696174, | |
| "grad_norm": 7.615289688110352, | |
| "learning_rate": 6.7191251000266745e-06, | |
| "loss": 0.5912, | |
| "step": 5480 | |
| }, | |
| { | |
| "epoch": 2.059264816204051, | |
| "grad_norm": 20.249950408935547, | |
| "learning_rate": 6.692451320352095e-06, | |
| "loss": 0.7777, | |
| "step": 5490 | |
| }, | |
| { | |
| "epoch": 2.0630157539384846, | |
| "grad_norm": 13.289349555969238, | |
| "learning_rate": 6.665777540677515e-06, | |
| "loss": 0.6271, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 2.0667666916729184, | |
| "grad_norm": 14.625772476196289, | |
| "learning_rate": 6.639103761002935e-06, | |
| "loss": 0.7248, | |
| "step": 5510 | |
| }, | |
| { | |
| "epoch": 2.0705176294073517, | |
| "grad_norm": 14.428004264831543, | |
| "learning_rate": 6.612429981328355e-06, | |
| "loss": 0.6791, | |
| "step": 5520 | |
| }, | |
| { | |
| "epoch": 2.0742685671417855, | |
| "grad_norm": 21.052837371826172, | |
| "learning_rate": 6.585756201653774e-06, | |
| "loss": 0.6244, | |
| "step": 5530 | |
| }, | |
| { | |
| "epoch": 2.078019504876219, | |
| "grad_norm": 17.523300170898438, | |
| "learning_rate": 6.559082421979195e-06, | |
| "loss": 0.6498, | |
| "step": 5540 | |
| }, | |
| { | |
| "epoch": 2.0817704426106527, | |
| "grad_norm": 9.524145126342773, | |
| "learning_rate": 6.532408642304615e-06, | |
| "loss": 0.7792, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 2.085521380345086, | |
| "grad_norm": 14.92676830291748, | |
| "learning_rate": 6.505734862630036e-06, | |
| "loss": 0.5748, | |
| "step": 5560 | |
| }, | |
| { | |
| "epoch": 2.08927231807952, | |
| "grad_norm": 18.87467384338379, | |
| "learning_rate": 6.479061082955455e-06, | |
| "loss": 0.7199, | |
| "step": 5570 | |
| }, | |
| { | |
| "epoch": 2.0930232558139537, | |
| "grad_norm": 10.356287002563477, | |
| "learning_rate": 6.452387303280876e-06, | |
| "loss": 0.7016, | |
| "step": 5580 | |
| }, | |
| { | |
| "epoch": 2.096774193548387, | |
| "grad_norm": 11.189599990844727, | |
| "learning_rate": 6.425713523606295e-06, | |
| "loss": 0.6511, | |
| "step": 5590 | |
| }, | |
| { | |
| "epoch": 2.100525131282821, | |
| "grad_norm": 12.267254829406738, | |
| "learning_rate": 6.399039743931715e-06, | |
| "loss": 0.6421, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 2.104276069017254, | |
| "grad_norm": 19.524673461914062, | |
| "learning_rate": 6.3723659642571354e-06, | |
| "loss": 0.6963, | |
| "step": 5610 | |
| }, | |
| { | |
| "epoch": 2.108027006751688, | |
| "grad_norm": 13.466742515563965, | |
| "learning_rate": 6.345692184582556e-06, | |
| "loss": 0.6727, | |
| "step": 5620 | |
| }, | |
| { | |
| "epoch": 2.1117779444861213, | |
| "grad_norm": 20.707855224609375, | |
| "learning_rate": 6.319018404907976e-06, | |
| "loss": 0.6695, | |
| "step": 5630 | |
| }, | |
| { | |
| "epoch": 2.115528882220555, | |
| "grad_norm": 15.425350189208984, | |
| "learning_rate": 6.292344625233396e-06, | |
| "loss": 0.673, | |
| "step": 5640 | |
| }, | |
| { | |
| "epoch": 2.119279819954989, | |
| "grad_norm": 5.349853038787842, | |
| "learning_rate": 6.2656708455588164e-06, | |
| "loss": 0.6275, | |
| "step": 5650 | |
| }, | |
| { | |
| "epoch": 2.1230307576894223, | |
| "grad_norm": 13.552290916442871, | |
| "learning_rate": 6.238997065884236e-06, | |
| "loss": 0.6945, | |
| "step": 5660 | |
| }, | |
| { | |
| "epoch": 2.126781695423856, | |
| "grad_norm": 17.840105056762695, | |
| "learning_rate": 6.212323286209656e-06, | |
| "loss": 0.8054, | |
| "step": 5670 | |
| }, | |
| { | |
| "epoch": 2.1305326331582894, | |
| "grad_norm": 21.012237548828125, | |
| "learning_rate": 6.185649506535076e-06, | |
| "loss": 0.7306, | |
| "step": 5680 | |
| }, | |
| { | |
| "epoch": 2.1342835708927232, | |
| "grad_norm": 13.1303129196167, | |
| "learning_rate": 6.158975726860497e-06, | |
| "loss": 0.871, | |
| "step": 5690 | |
| }, | |
| { | |
| "epoch": 2.1380345086271566, | |
| "grad_norm": 11.506791114807129, | |
| "learning_rate": 6.132301947185917e-06, | |
| "loss": 0.6722, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 2.1417854463615904, | |
| "grad_norm": 9.709290504455566, | |
| "learning_rate": 6.1056281675113375e-06, | |
| "loss": 0.6695, | |
| "step": 5710 | |
| }, | |
| { | |
| "epoch": 2.145536384096024, | |
| "grad_norm": 8.551689147949219, | |
| "learning_rate": 6.078954387836757e-06, | |
| "loss": 0.7001, | |
| "step": 5720 | |
| }, | |
| { | |
| "epoch": 2.1492873218304576, | |
| "grad_norm": 12.69763469696045, | |
| "learning_rate": 6.052280608162177e-06, | |
| "loss": 0.6778, | |
| "step": 5730 | |
| }, | |
| { | |
| "epoch": 2.1530382595648914, | |
| "grad_norm": 10.49093246459961, | |
| "learning_rate": 6.025606828487597e-06, | |
| "loss": 0.6671, | |
| "step": 5740 | |
| }, | |
| { | |
| "epoch": 2.1567891972993247, | |
| "grad_norm": 7.214636325836182, | |
| "learning_rate": 5.998933048813017e-06, | |
| "loss": 0.616, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 2.1605401350337585, | |
| "grad_norm": 8.58086109161377, | |
| "learning_rate": 5.972259269138437e-06, | |
| "loss": 0.6024, | |
| "step": 5760 | |
| }, | |
| { | |
| "epoch": 2.164291072768192, | |
| "grad_norm": 7.856104373931885, | |
| "learning_rate": 5.945585489463858e-06, | |
| "loss": 0.6202, | |
| "step": 5770 | |
| }, | |
| { | |
| "epoch": 2.1680420105026257, | |
| "grad_norm": 6.472407341003418, | |
| "learning_rate": 5.918911709789278e-06, | |
| "loss": 0.6141, | |
| "step": 5780 | |
| }, | |
| { | |
| "epoch": 2.1717929482370595, | |
| "grad_norm": 6.612668991088867, | |
| "learning_rate": 5.892237930114698e-06, | |
| "loss": 0.7841, | |
| "step": 5790 | |
| }, | |
| { | |
| "epoch": 2.175543885971493, | |
| "grad_norm": 9.869592666625977, | |
| "learning_rate": 5.865564150440118e-06, | |
| "loss": 0.5949, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 2.1792948237059266, | |
| "grad_norm": 12.85415267944336, | |
| "learning_rate": 5.838890370765538e-06, | |
| "loss": 0.663, | |
| "step": 5810 | |
| }, | |
| { | |
| "epoch": 2.18304576144036, | |
| "grad_norm": 22.380807876586914, | |
| "learning_rate": 5.8122165910909575e-06, | |
| "loss": 0.6532, | |
| "step": 5820 | |
| }, | |
| { | |
| "epoch": 2.186796699174794, | |
| "grad_norm": 23.866607666015625, | |
| "learning_rate": 5.785542811416378e-06, | |
| "loss": 0.6704, | |
| "step": 5830 | |
| }, | |
| { | |
| "epoch": 2.190547636909227, | |
| "grad_norm": 12.608299255371094, | |
| "learning_rate": 5.7588690317417985e-06, | |
| "loss": 0.6231, | |
| "step": 5840 | |
| }, | |
| { | |
| "epoch": 2.194298574643661, | |
| "grad_norm": 27.60419464111328, | |
| "learning_rate": 5.732195252067219e-06, | |
| "loss": 0.6369, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 2.1980495123780943, | |
| "grad_norm": 10.39966869354248, | |
| "learning_rate": 5.7055214723926385e-06, | |
| "loss": 0.5964, | |
| "step": 5860 | |
| }, | |
| { | |
| "epoch": 2.201800450112528, | |
| "grad_norm": 23.611059188842773, | |
| "learning_rate": 5.678847692718059e-06, | |
| "loss": 0.7365, | |
| "step": 5870 | |
| }, | |
| { | |
| "epoch": 2.205551387846962, | |
| "grad_norm": 10.59642505645752, | |
| "learning_rate": 5.652173913043479e-06, | |
| "loss": 0.6305, | |
| "step": 5880 | |
| }, | |
| { | |
| "epoch": 2.2093023255813953, | |
| "grad_norm": 15.549806594848633, | |
| "learning_rate": 5.625500133368898e-06, | |
| "loss": 0.624, | |
| "step": 5890 | |
| }, | |
| { | |
| "epoch": 2.213053263315829, | |
| "grad_norm": 17.546363830566406, | |
| "learning_rate": 5.598826353694319e-06, | |
| "loss": 0.7103, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 2.2168042010502624, | |
| "grad_norm": 19.833606719970703, | |
| "learning_rate": 5.572152574019739e-06, | |
| "loss": 0.4821, | |
| "step": 5910 | |
| }, | |
| { | |
| "epoch": 2.2205551387846962, | |
| "grad_norm": 18.05365562438965, | |
| "learning_rate": 5.54547879434516e-06, | |
| "loss": 0.6953, | |
| "step": 5920 | |
| }, | |
| { | |
| "epoch": 2.2243060765191296, | |
| "grad_norm": 3.1533432006835938, | |
| "learning_rate": 5.518805014670579e-06, | |
| "loss": 0.6899, | |
| "step": 5930 | |
| }, | |
| { | |
| "epoch": 2.2280570142535634, | |
| "grad_norm": 21.84452247619629, | |
| "learning_rate": 5.492131234996e-06, | |
| "loss": 0.8146, | |
| "step": 5940 | |
| }, | |
| { | |
| "epoch": 2.231807951987997, | |
| "grad_norm": 20.791135787963867, | |
| "learning_rate": 5.465457455321419e-06, | |
| "loss": 0.4915, | |
| "step": 5950 | |
| }, | |
| { | |
| "epoch": 2.2355588897224306, | |
| "grad_norm": 16.44775390625, | |
| "learning_rate": 5.438783675646839e-06, | |
| "loss": 0.5946, | |
| "step": 5960 | |
| }, | |
| { | |
| "epoch": 2.2393098274568644, | |
| "grad_norm": 8.386981964111328, | |
| "learning_rate": 5.412109895972259e-06, | |
| "loss": 0.7348, | |
| "step": 5970 | |
| }, | |
| { | |
| "epoch": 2.2430607651912977, | |
| "grad_norm": 26.47071075439453, | |
| "learning_rate": 5.38543611629768e-06, | |
| "loss": 0.6261, | |
| "step": 5980 | |
| }, | |
| { | |
| "epoch": 2.2468117029257315, | |
| "grad_norm": 11.219141960144043, | |
| "learning_rate": 5.3587623366231e-06, | |
| "loss": 0.5324, | |
| "step": 5990 | |
| }, | |
| { | |
| "epoch": 2.250562640660165, | |
| "grad_norm": 15.969422340393066, | |
| "learning_rate": 5.33208855694852e-06, | |
| "loss": 0.7459, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 2.2543135783945987, | |
| "grad_norm": 20.990497589111328, | |
| "learning_rate": 5.30541477727394e-06, | |
| "loss": 0.5593, | |
| "step": 6010 | |
| }, | |
| { | |
| "epoch": 2.258064516129032, | |
| "grad_norm": 10.82603645324707, | |
| "learning_rate": 5.27874099759936e-06, | |
| "loss": 0.6698, | |
| "step": 6020 | |
| }, | |
| { | |
| "epoch": 2.261815453863466, | |
| "grad_norm": 19.865243911743164, | |
| "learning_rate": 5.25206721792478e-06, | |
| "loss": 0.732, | |
| "step": 6030 | |
| }, | |
| { | |
| "epoch": 2.2655663915978996, | |
| "grad_norm": 25.37660026550293, | |
| "learning_rate": 5.2253934382502e-06, | |
| "loss": 0.5585, | |
| "step": 6040 | |
| }, | |
| { | |
| "epoch": 2.269317329332333, | |
| "grad_norm": 19.796749114990234, | |
| "learning_rate": 5.1987196585756205e-06, | |
| "loss": 0.7108, | |
| "step": 6050 | |
| }, | |
| { | |
| "epoch": 2.273068267066767, | |
| "grad_norm": 12.207030296325684, | |
| "learning_rate": 5.172045878901041e-06, | |
| "loss": 0.6683, | |
| "step": 6060 | |
| }, | |
| { | |
| "epoch": 2.2768192048012, | |
| "grad_norm": 20.979265213012695, | |
| "learning_rate": 5.1453720992264615e-06, | |
| "loss": 0.6962, | |
| "step": 6070 | |
| }, | |
| { | |
| "epoch": 2.280570142535634, | |
| "grad_norm": 13.058587074279785, | |
| "learning_rate": 5.118698319551881e-06, | |
| "loss": 0.6119, | |
| "step": 6080 | |
| }, | |
| { | |
| "epoch": 2.2843210802700673, | |
| "grad_norm": 7.18276309967041, | |
| "learning_rate": 5.092024539877301e-06, | |
| "loss": 0.606, | |
| "step": 6090 | |
| }, | |
| { | |
| "epoch": 2.288072018004501, | |
| "grad_norm": 21.568151473999023, | |
| "learning_rate": 5.065350760202721e-06, | |
| "loss": 0.6909, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 2.291822955738935, | |
| "grad_norm": 28.49129867553711, | |
| "learning_rate": 5.038676980528141e-06, | |
| "loss": 0.6764, | |
| "step": 6110 | |
| }, | |
| { | |
| "epoch": 2.2955738934733683, | |
| "grad_norm": 12.39367389678955, | |
| "learning_rate": 5.012003200853561e-06, | |
| "loss": 0.753, | |
| "step": 6120 | |
| }, | |
| { | |
| "epoch": 2.299324831207802, | |
| "grad_norm": 17.55943489074707, | |
| "learning_rate": 4.985329421178982e-06, | |
| "loss": 0.7103, | |
| "step": 6130 | |
| }, | |
| { | |
| "epoch": 2.3030757689422354, | |
| "grad_norm": 16.813745498657227, | |
| "learning_rate": 4.958655641504402e-06, | |
| "loss": 0.645, | |
| "step": 6140 | |
| }, | |
| { | |
| "epoch": 2.3068267066766692, | |
| "grad_norm": 20.711591720581055, | |
| "learning_rate": 4.931981861829822e-06, | |
| "loss": 0.6337, | |
| "step": 6150 | |
| }, | |
| { | |
| "epoch": 2.3105776444111026, | |
| "grad_norm": 5.449891567230225, | |
| "learning_rate": 4.905308082155241e-06, | |
| "loss": 0.6224, | |
| "step": 6160 | |
| }, | |
| { | |
| "epoch": 2.3143285821455364, | |
| "grad_norm": 15.508672714233398, | |
| "learning_rate": 4.878634302480662e-06, | |
| "loss": 0.6718, | |
| "step": 6170 | |
| }, | |
| { | |
| "epoch": 2.31807951987997, | |
| "grad_norm": 12.16860294342041, | |
| "learning_rate": 4.8519605228060815e-06, | |
| "loss": 0.6044, | |
| "step": 6180 | |
| }, | |
| { | |
| "epoch": 2.3218304576144035, | |
| "grad_norm": 16.671234130859375, | |
| "learning_rate": 4.825286743131502e-06, | |
| "loss": 0.7397, | |
| "step": 6190 | |
| }, | |
| { | |
| "epoch": 2.3255813953488373, | |
| "grad_norm": 27.95615577697754, | |
| "learning_rate": 4.798612963456922e-06, | |
| "loss": 0.6451, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 2.3293323330832707, | |
| "grad_norm": 23.62805938720703, | |
| "learning_rate": 4.771939183782343e-06, | |
| "loss": 0.7978, | |
| "step": 6210 | |
| }, | |
| { | |
| "epoch": 2.3330832708177045, | |
| "grad_norm": 17.226280212402344, | |
| "learning_rate": 4.7452654041077625e-06, | |
| "loss": 0.6442, | |
| "step": 6220 | |
| }, | |
| { | |
| "epoch": 2.336834208552138, | |
| "grad_norm": 22.371273040771484, | |
| "learning_rate": 4.718591624433183e-06, | |
| "loss": 0.6885, | |
| "step": 6230 | |
| }, | |
| { | |
| "epoch": 2.3405851462865717, | |
| "grad_norm": 12.560019493103027, | |
| "learning_rate": 4.6919178447586026e-06, | |
| "loss": 0.6033, | |
| "step": 6240 | |
| }, | |
| { | |
| "epoch": 2.3443360840210055, | |
| "grad_norm": 14.103109359741211, | |
| "learning_rate": 4.665244065084023e-06, | |
| "loss": 0.6683, | |
| "step": 6250 | |
| }, | |
| { | |
| "epoch": 2.348087021755439, | |
| "grad_norm": 11.051913261413574, | |
| "learning_rate": 4.638570285409443e-06, | |
| "loss": 0.7092, | |
| "step": 6260 | |
| }, | |
| { | |
| "epoch": 2.3518379594898726, | |
| "grad_norm": 15.613760948181152, | |
| "learning_rate": 4.611896505734863e-06, | |
| "loss": 0.6974, | |
| "step": 6270 | |
| }, | |
| { | |
| "epoch": 2.355588897224306, | |
| "grad_norm": 19.85428237915039, | |
| "learning_rate": 4.5852227260602836e-06, | |
| "loss": 0.6637, | |
| "step": 6280 | |
| }, | |
| { | |
| "epoch": 2.35933983495874, | |
| "grad_norm": 15.703207015991211, | |
| "learning_rate": 4.558548946385703e-06, | |
| "loss": 0.6508, | |
| "step": 6290 | |
| }, | |
| { | |
| "epoch": 2.363090772693173, | |
| "grad_norm": 11.342123985290527, | |
| "learning_rate": 4.531875166711124e-06, | |
| "loss": 0.7348, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 2.366841710427607, | |
| "grad_norm": 11.049941062927246, | |
| "learning_rate": 4.505201387036543e-06, | |
| "loss": 0.6421, | |
| "step": 6310 | |
| }, | |
| { | |
| "epoch": 2.3705926481620407, | |
| "grad_norm": 24.488731384277344, | |
| "learning_rate": 4.478527607361964e-06, | |
| "loss": 0.7123, | |
| "step": 6320 | |
| }, | |
| { | |
| "epoch": 2.374343585896474, | |
| "grad_norm": 14.967778205871582, | |
| "learning_rate": 4.451853827687383e-06, | |
| "loss": 0.7142, | |
| "step": 6330 | |
| }, | |
| { | |
| "epoch": 2.378094523630908, | |
| "grad_norm": 9.328021049499512, | |
| "learning_rate": 4.425180048012804e-06, | |
| "loss": 0.6251, | |
| "step": 6340 | |
| }, | |
| { | |
| "epoch": 2.3818454613653413, | |
| "grad_norm": 17.42303466796875, | |
| "learning_rate": 4.398506268338224e-06, | |
| "loss": 0.6355, | |
| "step": 6350 | |
| }, | |
| { | |
| "epoch": 2.385596399099775, | |
| "grad_norm": 15.201652526855469, | |
| "learning_rate": 4.371832488663644e-06, | |
| "loss": 0.7441, | |
| "step": 6360 | |
| }, | |
| { | |
| "epoch": 2.3893473368342084, | |
| "grad_norm": 23.0561466217041, | |
| "learning_rate": 4.345158708989064e-06, | |
| "loss": 0.6641, | |
| "step": 6370 | |
| }, | |
| { | |
| "epoch": 2.393098274568642, | |
| "grad_norm": 14.52270221710205, | |
| "learning_rate": 4.318484929314484e-06, | |
| "loss": 0.7073, | |
| "step": 6380 | |
| }, | |
| { | |
| "epoch": 2.396849212303076, | |
| "grad_norm": 13.747902870178223, | |
| "learning_rate": 4.291811149639904e-06, | |
| "loss": 0.81, | |
| "step": 6390 | |
| }, | |
| { | |
| "epoch": 2.4006001500375094, | |
| "grad_norm": 14.231673240661621, | |
| "learning_rate": 4.265137369965324e-06, | |
| "loss": 0.6939, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 2.404351087771943, | |
| "grad_norm": 7.63701057434082, | |
| "learning_rate": 4.2384635902907445e-06, | |
| "loss": 0.6873, | |
| "step": 6410 | |
| }, | |
| { | |
| "epoch": 2.4081020255063765, | |
| "grad_norm": 20.752126693725586, | |
| "learning_rate": 4.211789810616165e-06, | |
| "loss": 0.571, | |
| "step": 6420 | |
| }, | |
| { | |
| "epoch": 2.4118529632408103, | |
| "grad_norm": 13.460418701171875, | |
| "learning_rate": 4.185116030941585e-06, | |
| "loss": 0.6506, | |
| "step": 6430 | |
| }, | |
| { | |
| "epoch": 2.4156039009752437, | |
| "grad_norm": 8.838345527648926, | |
| "learning_rate": 4.158442251267005e-06, | |
| "loss": 0.5745, | |
| "step": 6440 | |
| }, | |
| { | |
| "epoch": 2.4193548387096775, | |
| "grad_norm": 10.570659637451172, | |
| "learning_rate": 4.131768471592425e-06, | |
| "loss": 0.6607, | |
| "step": 6450 | |
| }, | |
| { | |
| "epoch": 2.423105776444111, | |
| "grad_norm": 12.49052619934082, | |
| "learning_rate": 4.105094691917845e-06, | |
| "loss": 0.5026, | |
| "step": 6460 | |
| }, | |
| { | |
| "epoch": 2.4268567141785446, | |
| "grad_norm": 9.46437931060791, | |
| "learning_rate": 4.078420912243265e-06, | |
| "loss": 0.6005, | |
| "step": 6470 | |
| }, | |
| { | |
| "epoch": 2.430607651912978, | |
| "grad_norm": 29.9566593170166, | |
| "learning_rate": 4.051747132568685e-06, | |
| "loss": 0.6292, | |
| "step": 6480 | |
| }, | |
| { | |
| "epoch": 2.434358589647412, | |
| "grad_norm": 12.318580627441406, | |
| "learning_rate": 4.025073352894106e-06, | |
| "loss": 0.7056, | |
| "step": 6490 | |
| }, | |
| { | |
| "epoch": 2.4381095273818456, | |
| "grad_norm": 20.635848999023438, | |
| "learning_rate": 3.998399573219526e-06, | |
| "loss": 0.6663, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 2.441860465116279, | |
| "grad_norm": 13.231310844421387, | |
| "learning_rate": 3.971725793544946e-06, | |
| "loss": 0.7405, | |
| "step": 6510 | |
| }, | |
| { | |
| "epoch": 2.4456114028507128, | |
| "grad_norm": 16.560197830200195, | |
| "learning_rate": 3.945052013870365e-06, | |
| "loss": 0.6678, | |
| "step": 6520 | |
| }, | |
| { | |
| "epoch": 2.449362340585146, | |
| "grad_norm": 21.45167350769043, | |
| "learning_rate": 3.918378234195786e-06, | |
| "loss": 0.6032, | |
| "step": 6530 | |
| }, | |
| { | |
| "epoch": 2.45311327831958, | |
| "grad_norm": 37.360843658447266, | |
| "learning_rate": 3.891704454521205e-06, | |
| "loss": 0.8438, | |
| "step": 6540 | |
| }, | |
| { | |
| "epoch": 2.4568642160540133, | |
| "grad_norm": 30.98585319519043, | |
| "learning_rate": 3.865030674846626e-06, | |
| "loss": 0.6035, | |
| "step": 6550 | |
| }, | |
| { | |
| "epoch": 2.460615153788447, | |
| "grad_norm": 13.408466339111328, | |
| "learning_rate": 3.838356895172046e-06, | |
| "loss": 0.5181, | |
| "step": 6560 | |
| }, | |
| { | |
| "epoch": 2.464366091522881, | |
| "grad_norm": 16.84627914428711, | |
| "learning_rate": 3.8116831154974664e-06, | |
| "loss": 0.6353, | |
| "step": 6570 | |
| }, | |
| { | |
| "epoch": 2.4681170292573142, | |
| "grad_norm": 19.02153968811035, | |
| "learning_rate": 3.785009335822886e-06, | |
| "loss": 0.6052, | |
| "step": 6580 | |
| }, | |
| { | |
| "epoch": 2.471867966991748, | |
| "grad_norm": 13.263850212097168, | |
| "learning_rate": 3.7583355561483065e-06, | |
| "loss": 0.8126, | |
| "step": 6590 | |
| }, | |
| { | |
| "epoch": 2.4756189047261814, | |
| "grad_norm": 22.753215789794922, | |
| "learning_rate": 3.731661776473727e-06, | |
| "loss": 0.6449, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 2.479369842460615, | |
| "grad_norm": 13.979212760925293, | |
| "learning_rate": 3.704987996799147e-06, | |
| "loss": 0.7421, | |
| "step": 6610 | |
| }, | |
| { | |
| "epoch": 2.4831207801950486, | |
| "grad_norm": 23.614389419555664, | |
| "learning_rate": 3.6783142171245666e-06, | |
| "loss": 0.8168, | |
| "step": 6620 | |
| }, | |
| { | |
| "epoch": 2.4868717179294824, | |
| "grad_norm": 7.810019493103027, | |
| "learning_rate": 3.651640437449987e-06, | |
| "loss": 0.6301, | |
| "step": 6630 | |
| }, | |
| { | |
| "epoch": 2.490622655663916, | |
| "grad_norm": 17.90605926513672, | |
| "learning_rate": 3.624966657775407e-06, | |
| "loss": 0.6369, | |
| "step": 6640 | |
| }, | |
| { | |
| "epoch": 2.4943735933983495, | |
| "grad_norm": 10.375251770019531, | |
| "learning_rate": 3.598292878100827e-06, | |
| "loss": 0.6254, | |
| "step": 6650 | |
| }, | |
| { | |
| "epoch": 2.4981245311327833, | |
| "grad_norm": 15.813028335571289, | |
| "learning_rate": 3.571619098426247e-06, | |
| "loss": 0.7866, | |
| "step": 6660 | |
| }, | |
| { | |
| "epoch": 2.5018754688672167, | |
| "grad_norm": 8.438957214355469, | |
| "learning_rate": 3.5449453187516676e-06, | |
| "loss": 0.7288, | |
| "step": 6670 | |
| }, | |
| { | |
| "epoch": 2.5056264066016505, | |
| "grad_norm": 23.076040267944336, | |
| "learning_rate": 3.5182715390770877e-06, | |
| "loss": 0.6743, | |
| "step": 6680 | |
| }, | |
| { | |
| "epoch": 2.509377344336084, | |
| "grad_norm": 14.966166496276855, | |
| "learning_rate": 3.4915977594025073e-06, | |
| "loss": 0.6408, | |
| "step": 6690 | |
| }, | |
| { | |
| "epoch": 2.5131282820705176, | |
| "grad_norm": 19.553081512451172, | |
| "learning_rate": 3.4649239797279277e-06, | |
| "loss": 0.613, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 2.5168792198049514, | |
| "grad_norm": 12.050764083862305, | |
| "learning_rate": 3.4382502000533478e-06, | |
| "loss": 0.6547, | |
| "step": 6710 | |
| }, | |
| { | |
| "epoch": 2.520630157539385, | |
| "grad_norm": 14.52085018157959, | |
| "learning_rate": 3.411576420378768e-06, | |
| "loss": 0.7239, | |
| "step": 6720 | |
| }, | |
| { | |
| "epoch": 2.5243810952738186, | |
| "grad_norm": 20.222137451171875, | |
| "learning_rate": 3.384902640704188e-06, | |
| "loss": 0.7656, | |
| "step": 6730 | |
| }, | |
| { | |
| "epoch": 2.528132033008252, | |
| "grad_norm": 14.729280471801758, | |
| "learning_rate": 3.3582288610296083e-06, | |
| "loss": 0.6013, | |
| "step": 6740 | |
| }, | |
| { | |
| "epoch": 2.5318829707426858, | |
| "grad_norm": 21.984832763671875, | |
| "learning_rate": 3.3315550813550284e-06, | |
| "loss": 0.6453, | |
| "step": 6750 | |
| }, | |
| { | |
| "epoch": 2.535633908477119, | |
| "grad_norm": 19.643138885498047, | |
| "learning_rate": 3.304881301680448e-06, | |
| "loss": 0.8221, | |
| "step": 6760 | |
| }, | |
| { | |
| "epoch": 2.539384846211553, | |
| "grad_norm": 17.281740188598633, | |
| "learning_rate": 3.2782075220058684e-06, | |
| "loss": 0.6348, | |
| "step": 6770 | |
| }, | |
| { | |
| "epoch": 2.5431357839459867, | |
| "grad_norm": 17.821035385131836, | |
| "learning_rate": 3.251533742331289e-06, | |
| "loss": 0.5855, | |
| "step": 6780 | |
| }, | |
| { | |
| "epoch": 2.54688672168042, | |
| "grad_norm": 14.015131950378418, | |
| "learning_rate": 3.224859962656709e-06, | |
| "loss": 0.5544, | |
| "step": 6790 | |
| }, | |
| { | |
| "epoch": 2.550637659414854, | |
| "grad_norm": 10.391494750976562, | |
| "learning_rate": 3.1981861829821286e-06, | |
| "loss": 0.588, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 2.5543885971492872, | |
| "grad_norm": 14.990039825439453, | |
| "learning_rate": 3.171512403307549e-06, | |
| "loss": 0.5782, | |
| "step": 6810 | |
| }, | |
| { | |
| "epoch": 2.558139534883721, | |
| "grad_norm": 13.448775291442871, | |
| "learning_rate": 3.144838623632969e-06, | |
| "loss": 0.8525, | |
| "step": 6820 | |
| }, | |
| { | |
| "epoch": 2.5618904726181544, | |
| "grad_norm": 13.461121559143066, | |
| "learning_rate": 3.118164843958389e-06, | |
| "loss": 0.6256, | |
| "step": 6830 | |
| }, | |
| { | |
| "epoch": 2.565641410352588, | |
| "grad_norm": 13.295988082885742, | |
| "learning_rate": 3.091491064283809e-06, | |
| "loss": 0.7059, | |
| "step": 6840 | |
| }, | |
| { | |
| "epoch": 2.569392348087022, | |
| "grad_norm": 14.871612548828125, | |
| "learning_rate": 3.0648172846092296e-06, | |
| "loss": 0.6338, | |
| "step": 6850 | |
| }, | |
| { | |
| "epoch": 2.5731432858214554, | |
| "grad_norm": 30.46957778930664, | |
| "learning_rate": 3.0381435049346496e-06, | |
| "loss": 0.7072, | |
| "step": 6860 | |
| }, | |
| { | |
| "epoch": 2.5768942235558887, | |
| "grad_norm": 20.661733627319336, | |
| "learning_rate": 3.0114697252600693e-06, | |
| "loss": 0.6634, | |
| "step": 6870 | |
| }, | |
| { | |
| "epoch": 2.5806451612903225, | |
| "grad_norm": 10.35488224029541, | |
| "learning_rate": 2.9847959455854897e-06, | |
| "loss": 0.5709, | |
| "step": 6880 | |
| }, | |
| { | |
| "epoch": 2.5843960990247563, | |
| "grad_norm": 22.383169174194336, | |
| "learning_rate": 2.9581221659109098e-06, | |
| "loss": 0.433, | |
| "step": 6890 | |
| }, | |
| { | |
| "epoch": 2.5881470367591897, | |
| "grad_norm": 21.173015594482422, | |
| "learning_rate": 2.93144838623633e-06, | |
| "loss": 0.6109, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 2.5918979744936235, | |
| "grad_norm": 25.366735458374023, | |
| "learning_rate": 2.90477460656175e-06, | |
| "loss": 0.8177, | |
| "step": 6910 | |
| }, | |
| { | |
| "epoch": 2.5956489122280573, | |
| "grad_norm": 18.91875457763672, | |
| "learning_rate": 2.8781008268871703e-06, | |
| "loss": 0.7214, | |
| "step": 6920 | |
| }, | |
| { | |
| "epoch": 2.5993998499624906, | |
| "grad_norm": 12.457830429077148, | |
| "learning_rate": 2.8514270472125903e-06, | |
| "loss": 0.553, | |
| "step": 6930 | |
| }, | |
| { | |
| "epoch": 2.603150787696924, | |
| "grad_norm": 6.222160816192627, | |
| "learning_rate": 2.82475326753801e-06, | |
| "loss": 0.679, | |
| "step": 6940 | |
| }, | |
| { | |
| "epoch": 2.606901725431358, | |
| "grad_norm": 8.99958324432373, | |
| "learning_rate": 2.7980794878634304e-06, | |
| "loss": 0.5929, | |
| "step": 6950 | |
| }, | |
| { | |
| "epoch": 2.6106526631657916, | |
| "grad_norm": 11.063492774963379, | |
| "learning_rate": 2.771405708188851e-06, | |
| "loss": 0.5185, | |
| "step": 6960 | |
| }, | |
| { | |
| "epoch": 2.614403600900225, | |
| "grad_norm": 10.320928573608398, | |
| "learning_rate": 2.744731928514271e-06, | |
| "loss": 0.5286, | |
| "step": 6970 | |
| }, | |
| { | |
| "epoch": 2.6181545386346587, | |
| "grad_norm": 13.718670845031738, | |
| "learning_rate": 2.7180581488396905e-06, | |
| "loss": 0.6508, | |
| "step": 6980 | |
| }, | |
| { | |
| "epoch": 2.6219054763690925, | |
| "grad_norm": 10.613819122314453, | |
| "learning_rate": 2.691384369165111e-06, | |
| "loss": 0.5805, | |
| "step": 6990 | |
| }, | |
| { | |
| "epoch": 2.625656414103526, | |
| "grad_norm": 22.765199661254883, | |
| "learning_rate": 2.664710589490531e-06, | |
| "loss": 0.6691, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 2.6294073518379593, | |
| "grad_norm": 12.34518051147461, | |
| "learning_rate": 2.638036809815951e-06, | |
| "loss": 0.6577, | |
| "step": 7010 | |
| }, | |
| { | |
| "epoch": 2.633158289572393, | |
| "grad_norm": 15.861391067504883, | |
| "learning_rate": 2.611363030141371e-06, | |
| "loss": 0.5159, | |
| "step": 7020 | |
| }, | |
| { | |
| "epoch": 2.636909227306827, | |
| "grad_norm": 7.271751880645752, | |
| "learning_rate": 2.5846892504667916e-06, | |
| "loss": 0.6844, | |
| "step": 7030 | |
| }, | |
| { | |
| "epoch": 2.64066016504126, | |
| "grad_norm": 20.930856704711914, | |
| "learning_rate": 2.5580154707922116e-06, | |
| "loss": 0.7827, | |
| "step": 7040 | |
| }, | |
| { | |
| "epoch": 2.644411102775694, | |
| "grad_norm": 28.042675018310547, | |
| "learning_rate": 2.5313416911176312e-06, | |
| "loss": 0.6383, | |
| "step": 7050 | |
| }, | |
| { | |
| "epoch": 2.6481620405101274, | |
| "grad_norm": 25.815296173095703, | |
| "learning_rate": 2.5046679114430517e-06, | |
| "loss": 0.6866, | |
| "step": 7060 | |
| }, | |
| { | |
| "epoch": 2.651912978244561, | |
| "grad_norm": 16.492206573486328, | |
| "learning_rate": 2.4779941317684717e-06, | |
| "loss": 0.5342, | |
| "step": 7070 | |
| }, | |
| { | |
| "epoch": 2.6556639159789945, | |
| "grad_norm": 23.266910552978516, | |
| "learning_rate": 2.4513203520938918e-06, | |
| "loss": 0.5564, | |
| "step": 7080 | |
| }, | |
| { | |
| "epoch": 2.6594148537134283, | |
| "grad_norm": 11.591928482055664, | |
| "learning_rate": 2.424646572419312e-06, | |
| "loss": 0.573, | |
| "step": 7090 | |
| }, | |
| { | |
| "epoch": 2.663165791447862, | |
| "grad_norm": 14.71267032623291, | |
| "learning_rate": 2.3979727927447323e-06, | |
| "loss": 0.6456, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 2.6669167291822955, | |
| "grad_norm": 7.238256454467773, | |
| "learning_rate": 2.3712990130701523e-06, | |
| "loss": 0.7731, | |
| "step": 7110 | |
| }, | |
| { | |
| "epoch": 2.6706676669167293, | |
| "grad_norm": 38.71699523925781, | |
| "learning_rate": 2.3446252333955723e-06, | |
| "loss": 0.7189, | |
| "step": 7120 | |
| }, | |
| { | |
| "epoch": 2.6744186046511627, | |
| "grad_norm": 24.029537200927734, | |
| "learning_rate": 2.3179514537209924e-06, | |
| "loss": 0.7435, | |
| "step": 7130 | |
| }, | |
| { | |
| "epoch": 2.6781695423855965, | |
| "grad_norm": 17.704763412475586, | |
| "learning_rate": 2.291277674046413e-06, | |
| "loss": 0.838, | |
| "step": 7140 | |
| }, | |
| { | |
| "epoch": 2.68192048012003, | |
| "grad_norm": 36.12045669555664, | |
| "learning_rate": 2.2646038943718325e-06, | |
| "loss": 0.7103, | |
| "step": 7150 | |
| }, | |
| { | |
| "epoch": 2.6856714178544636, | |
| "grad_norm": 20.062591552734375, | |
| "learning_rate": 2.237930114697253e-06, | |
| "loss": 0.5748, | |
| "step": 7160 | |
| }, | |
| { | |
| "epoch": 2.6894223555888974, | |
| "grad_norm": 9.567973136901855, | |
| "learning_rate": 2.211256335022673e-06, | |
| "loss": 0.7598, | |
| "step": 7170 | |
| }, | |
| { | |
| "epoch": 2.6931732933233308, | |
| "grad_norm": 19.337631225585938, | |
| "learning_rate": 2.184582555348093e-06, | |
| "loss": 0.5945, | |
| "step": 7180 | |
| }, | |
| { | |
| "epoch": 2.6969242310577646, | |
| "grad_norm": 11.189875602722168, | |
| "learning_rate": 2.157908775673513e-06, | |
| "loss": 0.77, | |
| "step": 7190 | |
| }, | |
| { | |
| "epoch": 2.700675168792198, | |
| "grad_norm": 16.071062088012695, | |
| "learning_rate": 2.131234995998933e-06, | |
| "loss": 0.6758, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 2.7044261065266317, | |
| "grad_norm": 11.37120532989502, | |
| "learning_rate": 2.1045612163243535e-06, | |
| "loss": 0.5912, | |
| "step": 7210 | |
| }, | |
| { | |
| "epoch": 2.708177044261065, | |
| "grad_norm": 25.354324340820312, | |
| "learning_rate": 2.0778874366497736e-06, | |
| "loss": 0.6741, | |
| "step": 7220 | |
| }, | |
| { | |
| "epoch": 2.711927981995499, | |
| "grad_norm": 11.246193885803223, | |
| "learning_rate": 2.0512136569751936e-06, | |
| "loss": 0.6073, | |
| "step": 7230 | |
| }, | |
| { | |
| "epoch": 2.7156789197299327, | |
| "grad_norm": 9.01452350616455, | |
| "learning_rate": 2.0245398773006137e-06, | |
| "loss": 0.7363, | |
| "step": 7240 | |
| }, | |
| { | |
| "epoch": 2.719429857464366, | |
| "grad_norm": 22.3641414642334, | |
| "learning_rate": 1.9978660976260337e-06, | |
| "loss": 0.6278, | |
| "step": 7250 | |
| }, | |
| { | |
| "epoch": 2.7231807951988, | |
| "grad_norm": 14.206088066101074, | |
| "learning_rate": 1.9711923179514537e-06, | |
| "loss": 0.6676, | |
| "step": 7260 | |
| }, | |
| { | |
| "epoch": 2.726931732933233, | |
| "grad_norm": 14.623751640319824, | |
| "learning_rate": 1.9445185382768738e-06, | |
| "loss": 0.6629, | |
| "step": 7270 | |
| }, | |
| { | |
| "epoch": 2.730682670667667, | |
| "grad_norm": 15.682950019836426, | |
| "learning_rate": 1.9178447586022942e-06, | |
| "loss": 0.8008, | |
| "step": 7280 | |
| }, | |
| { | |
| "epoch": 2.7344336084021004, | |
| "grad_norm": 16.56915855407715, | |
| "learning_rate": 1.891170978927714e-06, | |
| "loss": 0.8421, | |
| "step": 7290 | |
| }, | |
| { | |
| "epoch": 2.738184546136534, | |
| "grad_norm": 20.514009475708008, | |
| "learning_rate": 1.8644971992531343e-06, | |
| "loss": 0.6755, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 2.741935483870968, | |
| "grad_norm": 15.838664054870605, | |
| "learning_rate": 1.8378234195785544e-06, | |
| "loss": 0.6463, | |
| "step": 7310 | |
| }, | |
| { | |
| "epoch": 2.7456864216054013, | |
| "grad_norm": 30.3530330657959, | |
| "learning_rate": 1.8111496399039746e-06, | |
| "loss": 0.6295, | |
| "step": 7320 | |
| }, | |
| { | |
| "epoch": 2.7494373593398347, | |
| "grad_norm": 8.959320068359375, | |
| "learning_rate": 1.7844758602293946e-06, | |
| "loss": 0.6443, | |
| "step": 7330 | |
| }, | |
| { | |
| "epoch": 2.7531882970742685, | |
| "grad_norm": 11.156110763549805, | |
| "learning_rate": 1.757802080554815e-06, | |
| "loss": 0.5971, | |
| "step": 7340 | |
| }, | |
| { | |
| "epoch": 2.7569392348087023, | |
| "grad_norm": 21.744304656982422, | |
| "learning_rate": 1.731128300880235e-06, | |
| "loss": 0.5818, | |
| "step": 7350 | |
| }, | |
| { | |
| "epoch": 2.7606901725431356, | |
| "grad_norm": 23.995845794677734, | |
| "learning_rate": 1.7044545212056548e-06, | |
| "loss": 0.6885, | |
| "step": 7360 | |
| }, | |
| { | |
| "epoch": 2.7644411102775694, | |
| "grad_norm": 13.629135131835938, | |
| "learning_rate": 1.677780741531075e-06, | |
| "loss": 0.771, | |
| "step": 7370 | |
| }, | |
| { | |
| "epoch": 2.7681920480120032, | |
| "grad_norm": 6.805270671844482, | |
| "learning_rate": 1.651106961856495e-06, | |
| "loss": 0.687, | |
| "step": 7380 | |
| }, | |
| { | |
| "epoch": 2.7719429857464366, | |
| "grad_norm": 21.93046760559082, | |
| "learning_rate": 1.6244331821819153e-06, | |
| "loss": 0.5681, | |
| "step": 7390 | |
| }, | |
| { | |
| "epoch": 2.77569392348087, | |
| "grad_norm": 22.271133422851562, | |
| "learning_rate": 1.5977594025073353e-06, | |
| "loss": 0.7504, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 2.7794448612153038, | |
| "grad_norm": 19.411861419677734, | |
| "learning_rate": 1.5710856228327556e-06, | |
| "loss": 0.7141, | |
| "step": 7410 | |
| }, | |
| { | |
| "epoch": 2.7831957989497376, | |
| "grad_norm": 21.990013122558594, | |
| "learning_rate": 1.5444118431581756e-06, | |
| "loss": 0.7941, | |
| "step": 7420 | |
| }, | |
| { | |
| "epoch": 2.786946736684171, | |
| "grad_norm": 26.875274658203125, | |
| "learning_rate": 1.5177380634835959e-06, | |
| "loss": 0.7077, | |
| "step": 7430 | |
| }, | |
| { | |
| "epoch": 2.7906976744186047, | |
| "grad_norm": 17.144861221313477, | |
| "learning_rate": 1.491064283809016e-06, | |
| "loss": 0.6153, | |
| "step": 7440 | |
| }, | |
| { | |
| "epoch": 2.7944486121530385, | |
| "grad_norm": 18.100868225097656, | |
| "learning_rate": 1.4643905041344357e-06, | |
| "loss": 0.635, | |
| "step": 7450 | |
| }, | |
| { | |
| "epoch": 2.798199549887472, | |
| "grad_norm": 17.497039794921875, | |
| "learning_rate": 1.437716724459856e-06, | |
| "loss": 0.7681, | |
| "step": 7460 | |
| }, | |
| { | |
| "epoch": 2.8019504876219052, | |
| "grad_norm": 11.748749732971191, | |
| "learning_rate": 1.411042944785276e-06, | |
| "loss": 0.7916, | |
| "step": 7470 | |
| }, | |
| { | |
| "epoch": 2.805701425356339, | |
| "grad_norm": 17.71030616760254, | |
| "learning_rate": 1.3843691651106963e-06, | |
| "loss": 0.6826, | |
| "step": 7480 | |
| }, | |
| { | |
| "epoch": 2.809452363090773, | |
| "grad_norm": 15.269068717956543, | |
| "learning_rate": 1.3576953854361163e-06, | |
| "loss": 0.6008, | |
| "step": 7490 | |
| }, | |
| { | |
| "epoch": 2.813203300825206, | |
| "grad_norm": 16.148839950561523, | |
| "learning_rate": 1.3310216057615366e-06, | |
| "loss": 0.7581, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 2.81695423855964, | |
| "grad_norm": 7.341813564300537, | |
| "learning_rate": 1.3043478260869566e-06, | |
| "loss": 0.4343, | |
| "step": 7510 | |
| }, | |
| { | |
| "epoch": 2.8207051762940734, | |
| "grad_norm": 11.722135543823242, | |
| "learning_rate": 1.2776740464123769e-06, | |
| "loss": 0.5226, | |
| "step": 7520 | |
| }, | |
| { | |
| "epoch": 2.824456114028507, | |
| "grad_norm": 17.107776641845703, | |
| "learning_rate": 1.251000266737797e-06, | |
| "loss": 0.6466, | |
| "step": 7530 | |
| }, | |
| { | |
| "epoch": 2.8282070517629405, | |
| "grad_norm": 15.833941459655762, | |
| "learning_rate": 1.224326487063217e-06, | |
| "loss": 0.7703, | |
| "step": 7540 | |
| }, | |
| { | |
| "epoch": 2.8319579894973743, | |
| "grad_norm": 19.610742568969727, | |
| "learning_rate": 1.197652707388637e-06, | |
| "loss": 0.6359, | |
| "step": 7550 | |
| }, | |
| { | |
| "epoch": 2.835708927231808, | |
| "grad_norm": 12.620158195495605, | |
| "learning_rate": 1.1709789277140572e-06, | |
| "loss": 0.6561, | |
| "step": 7560 | |
| }, | |
| { | |
| "epoch": 2.8394598649662415, | |
| "grad_norm": 20.80132293701172, | |
| "learning_rate": 1.1443051480394773e-06, | |
| "loss": 0.7668, | |
| "step": 7570 | |
| }, | |
| { | |
| "epoch": 2.8432108027006753, | |
| "grad_norm": 9.778907775878906, | |
| "learning_rate": 1.1176313683648973e-06, | |
| "loss": 0.7067, | |
| "step": 7580 | |
| }, | |
| { | |
| "epoch": 2.8469617404351086, | |
| "grad_norm": 11.224839210510254, | |
| "learning_rate": 1.0909575886903174e-06, | |
| "loss": 0.5963, | |
| "step": 7590 | |
| }, | |
| { | |
| "epoch": 2.8507126781695424, | |
| "grad_norm": 11.957784652709961, | |
| "learning_rate": 1.0642838090157376e-06, | |
| "loss": 0.7127, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 2.854463615903976, | |
| "grad_norm": 17.465967178344727, | |
| "learning_rate": 1.0376100293411576e-06, | |
| "loss": 0.5896, | |
| "step": 7610 | |
| }, | |
| { | |
| "epoch": 2.8582145536384096, | |
| "grad_norm": 22.074583053588867, | |
| "learning_rate": 1.010936249666578e-06, | |
| "loss": 0.664, | |
| "step": 7620 | |
| }, | |
| { | |
| "epoch": 2.8619654913728434, | |
| "grad_norm": 45.1811408996582, | |
| "learning_rate": 9.84262469991998e-07, | |
| "loss": 0.6788, | |
| "step": 7630 | |
| }, | |
| { | |
| "epoch": 2.8657164291072768, | |
| "grad_norm": 12.519074440002441, | |
| "learning_rate": 9.57588690317418e-07, | |
| "loss": 0.5208, | |
| "step": 7640 | |
| }, | |
| { | |
| "epoch": 2.8694673668417106, | |
| "grad_norm": 14.533720016479492, | |
| "learning_rate": 9.309149106428382e-07, | |
| "loss": 0.6403, | |
| "step": 7650 | |
| }, | |
| { | |
| "epoch": 2.873218304576144, | |
| "grad_norm": 6.502141952514648, | |
| "learning_rate": 9.042411309682584e-07, | |
| "loss": 0.6661, | |
| "step": 7660 | |
| }, | |
| { | |
| "epoch": 2.8769692423105777, | |
| "grad_norm": 16.4246826171875, | |
| "learning_rate": 8.775673512936784e-07, | |
| "loss": 0.6631, | |
| "step": 7670 | |
| }, | |
| { | |
| "epoch": 2.880720180045011, | |
| "grad_norm": 20.435749053955078, | |
| "learning_rate": 8.508935716190984e-07, | |
| "loss": 0.769, | |
| "step": 7680 | |
| }, | |
| { | |
| "epoch": 2.884471117779445, | |
| "grad_norm": 9.382180213928223, | |
| "learning_rate": 8.242197919445186e-07, | |
| "loss": 0.5407, | |
| "step": 7690 | |
| }, | |
| { | |
| "epoch": 2.8882220555138787, | |
| "grad_norm": 12.802393913269043, | |
| "learning_rate": 7.975460122699387e-07, | |
| "loss": 0.636, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 2.891972993248312, | |
| "grad_norm": 5.997576713562012, | |
| "learning_rate": 7.708722325953588e-07, | |
| "loss": 0.561, | |
| "step": 7710 | |
| }, | |
| { | |
| "epoch": 2.895723930982746, | |
| "grad_norm": 8.369012832641602, | |
| "learning_rate": 7.441984529207789e-07, | |
| "loss": 0.5721, | |
| "step": 7720 | |
| }, | |
| { | |
| "epoch": 2.899474868717179, | |
| "grad_norm": 19.990249633789062, | |
| "learning_rate": 7.175246732461991e-07, | |
| "loss": 0.7267, | |
| "step": 7730 | |
| }, | |
| { | |
| "epoch": 2.903225806451613, | |
| "grad_norm": 19.364540100097656, | |
| "learning_rate": 6.908508935716192e-07, | |
| "loss": 0.6867, | |
| "step": 7740 | |
| }, | |
| { | |
| "epoch": 2.9069767441860463, | |
| "grad_norm": 10.638273239135742, | |
| "learning_rate": 6.641771138970394e-07, | |
| "loss": 0.5809, | |
| "step": 7750 | |
| }, | |
| { | |
| "epoch": 2.91072768192048, | |
| "grad_norm": 24.913246154785156, | |
| "learning_rate": 6.375033342224594e-07, | |
| "loss": 0.5658, | |
| "step": 7760 | |
| }, | |
| { | |
| "epoch": 2.914478619654914, | |
| "grad_norm": 6.1255412101745605, | |
| "learning_rate": 6.108295545478795e-07, | |
| "loss": 0.4796, | |
| "step": 7770 | |
| }, | |
| { | |
| "epoch": 2.9182295573893473, | |
| "grad_norm": 13.97762680053711, | |
| "learning_rate": 5.841557748732996e-07, | |
| "loss": 0.6201, | |
| "step": 7780 | |
| }, | |
| { | |
| "epoch": 2.921980495123781, | |
| "grad_norm": 24.56553840637207, | |
| "learning_rate": 5.574819951987197e-07, | |
| "loss": 0.5206, | |
| "step": 7790 | |
| }, | |
| { | |
| "epoch": 2.9257314328582145, | |
| "grad_norm": 20.081579208374023, | |
| "learning_rate": 5.308082155241398e-07, | |
| "loss": 0.5697, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 2.9294823705926483, | |
| "grad_norm": 11.358619689941406, | |
| "learning_rate": 5.041344358495599e-07, | |
| "loss": 0.6268, | |
| "step": 7810 | |
| }, | |
| { | |
| "epoch": 2.9332333083270816, | |
| "grad_norm": 11.016149520874023, | |
| "learning_rate": 4.7746065617498e-07, | |
| "loss": 0.5753, | |
| "step": 7820 | |
| }, | |
| { | |
| "epoch": 2.9369842460615154, | |
| "grad_norm": 17.64615249633789, | |
| "learning_rate": 4.507868765004002e-07, | |
| "loss": 0.7584, | |
| "step": 7830 | |
| }, | |
| { | |
| "epoch": 2.9407351837959492, | |
| "grad_norm": 17.292207717895508, | |
| "learning_rate": 4.2411309682582024e-07, | |
| "loss": 0.6361, | |
| "step": 7840 | |
| }, | |
| { | |
| "epoch": 2.9444861215303826, | |
| "grad_norm": 17.94815444946289, | |
| "learning_rate": 3.974393171512404e-07, | |
| "loss": 0.7208, | |
| "step": 7850 | |
| }, | |
| { | |
| "epoch": 2.948237059264816, | |
| "grad_norm": 13.073601722717285, | |
| "learning_rate": 3.7076553747666047e-07, | |
| "loss": 0.7179, | |
| "step": 7860 | |
| }, | |
| { | |
| "epoch": 2.9519879969992497, | |
| "grad_norm": 7.956513404846191, | |
| "learning_rate": 3.440917578020806e-07, | |
| "loss": 0.6109, | |
| "step": 7870 | |
| }, | |
| { | |
| "epoch": 2.9557389347336835, | |
| "grad_norm": 18.16693687438965, | |
| "learning_rate": 3.1741797812750066e-07, | |
| "loss": 0.6499, | |
| "step": 7880 | |
| }, | |
| { | |
| "epoch": 2.959489872468117, | |
| "grad_norm": 25.006132125854492, | |
| "learning_rate": 2.907441984529208e-07, | |
| "loss": 0.5358, | |
| "step": 7890 | |
| }, | |
| { | |
| "epoch": 2.9632408102025507, | |
| "grad_norm": 20.937856674194336, | |
| "learning_rate": 2.640704187783409e-07, | |
| "loss": 0.6364, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 2.9669917479369845, | |
| "grad_norm": 12.37922477722168, | |
| "learning_rate": 2.3739663910376104e-07, | |
| "loss": 0.4916, | |
| "step": 7910 | |
| }, | |
| { | |
| "epoch": 2.970742685671418, | |
| "grad_norm": 8.240549087524414, | |
| "learning_rate": 2.1072285942918113e-07, | |
| "loss": 0.6811, | |
| "step": 7920 | |
| }, | |
| { | |
| "epoch": 2.974493623405851, | |
| "grad_norm": 9.405010223388672, | |
| "learning_rate": 1.8404907975460125e-07, | |
| "loss": 0.5338, | |
| "step": 7930 | |
| }, | |
| { | |
| "epoch": 2.978244561140285, | |
| "grad_norm": 13.773921966552734, | |
| "learning_rate": 1.5737530008002134e-07, | |
| "loss": 0.6314, | |
| "step": 7940 | |
| }, | |
| { | |
| "epoch": 2.981995498874719, | |
| "grad_norm": 12.41072940826416, | |
| "learning_rate": 1.3070152040544146e-07, | |
| "loss": 0.5497, | |
| "step": 7950 | |
| }, | |
| { | |
| "epoch": 2.985746436609152, | |
| "grad_norm": 17.232473373413086, | |
| "learning_rate": 1.0402774073086158e-07, | |
| "loss": 0.588, | |
| "step": 7960 | |
| }, | |
| { | |
| "epoch": 2.989497374343586, | |
| "grad_norm": 27.516319274902344, | |
| "learning_rate": 7.735396105628168e-08, | |
| "loss": 0.7673, | |
| "step": 7970 | |
| }, | |
| { | |
| "epoch": 2.99324831207802, | |
| "grad_norm": 16.864728927612305, | |
| "learning_rate": 5.0680181381701795e-08, | |
| "loss": 0.6883, | |
| "step": 7980 | |
| }, | |
| { | |
| "epoch": 2.996999249812453, | |
| "grad_norm": 16.803760528564453, | |
| "learning_rate": 2.40064017071219e-08, | |
| "loss": 0.6337, | |
| "step": 7990 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.5919831223628692, | |
| "eval_f1_macro": 0.5904844573730711, | |
| "eval_f1_weighted": 0.5917816930917, | |
| "eval_loss": 1.0033386945724487, | |
| "eval_precision_macro": 0.5945695673493336, | |
| "eval_precision_weighted": 0.5926704635628428, | |
| "eval_recall_macro": 0.5877159391363334, | |
| "eval_recall_weighted": 0.5919831223628692, | |
| "eval_runtime": 4.8573, | |
| "eval_samples_per_second": 487.928, | |
| "eval_steps_per_second": 61.145, | |
| "step": 7998 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 7998, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.68326808991488e+16, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |