| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9996003729852138, | |
| "eval_steps": 500, | |
| "global_step": 1876, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0005328360197149328, | |
| "grad_norm": 0.3923943299533747, | |
| "learning_rate": 1.0638297872340427e-06, | |
| "loss": 1.5282, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.0026641800985746636, | |
| "grad_norm": 0.39997680726308193, | |
| "learning_rate": 5.319148936170213e-06, | |
| "loss": 1.5368, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.005328360197149327, | |
| "grad_norm": 0.42655574017759024, | |
| "learning_rate": 1.0638297872340426e-05, | |
| "loss": 1.5771, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.007992540295723992, | |
| "grad_norm": 0.4800547793020182, | |
| "learning_rate": 1.595744680851064e-05, | |
| "loss": 1.5624, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.010656720394298654, | |
| "grad_norm": 0.36870917165643946, | |
| "learning_rate": 2.1276595744680852e-05, | |
| "loss": 1.5192, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.013320900492873319, | |
| "grad_norm": 0.23219684210183744, | |
| "learning_rate": 2.6595744680851064e-05, | |
| "loss": 1.5223, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.015985080591447983, | |
| "grad_norm": 0.2091606557073564, | |
| "learning_rate": 3.191489361702128e-05, | |
| "loss": 1.4527, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.018649260690022644, | |
| "grad_norm": 0.18998890324763035, | |
| "learning_rate": 3.723404255319149e-05, | |
| "loss": 1.4828, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.02131344078859731, | |
| "grad_norm": 0.14996191641971635, | |
| "learning_rate": 4.2553191489361704e-05, | |
| "loss": 1.4084, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.023977620887171973, | |
| "grad_norm": 0.16249029940771254, | |
| "learning_rate": 4.787234042553192e-05, | |
| "loss": 1.4001, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.026641800985746637, | |
| "grad_norm": 0.1460528370416976, | |
| "learning_rate": 5.319148936170213e-05, | |
| "loss": 1.3865, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.0293059810843213, | |
| "grad_norm": 0.12748888043071394, | |
| "learning_rate": 5.851063829787234e-05, | |
| "loss": 1.3832, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.031970161182895966, | |
| "grad_norm": 0.1047553716550612, | |
| "learning_rate": 6.382978723404256e-05, | |
| "loss": 1.3383, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.03463434128147063, | |
| "grad_norm": 1.0945957696792898, | |
| "learning_rate": 6.914893617021277e-05, | |
| "loss": 1.3216, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.03729852138004529, | |
| "grad_norm": 0.0891343071370869, | |
| "learning_rate": 7.446808510638298e-05, | |
| "loss": 1.3098, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.03996270147861995, | |
| "grad_norm": 0.07870697792715504, | |
| "learning_rate": 7.978723404255319e-05, | |
| "loss": 1.3138, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.04262688157719462, | |
| "grad_norm": 0.08760750591416677, | |
| "learning_rate": 8.510638297872341e-05, | |
| "loss": 1.3006, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.04529106167576928, | |
| "grad_norm": 0.08120742351067671, | |
| "learning_rate": 9.042553191489363e-05, | |
| "loss": 1.2385, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.047955241774343946, | |
| "grad_norm": 0.08142820910966997, | |
| "learning_rate": 9.574468085106384e-05, | |
| "loss": 1.2945, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.05061942187291861, | |
| "grad_norm": 0.09177922715037878, | |
| "learning_rate": 0.00010106382978723406, | |
| "loss": 1.2761, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.053283601971493275, | |
| "grad_norm": 0.07677835076593886, | |
| "learning_rate": 0.00010638297872340425, | |
| "loss": 1.244, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.05594778207006794, | |
| "grad_norm": 0.08525332172522379, | |
| "learning_rate": 0.00011170212765957446, | |
| "loss": 1.2838, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.0586119621686426, | |
| "grad_norm": 0.10003274873557398, | |
| "learning_rate": 0.00011702127659574468, | |
| "loss": 1.2489, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.06127614226721726, | |
| "grad_norm": 0.09434455492112725, | |
| "learning_rate": 0.0001223404255319149, | |
| "loss": 1.2503, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.06394032236579193, | |
| "grad_norm": 0.0960230638906738, | |
| "learning_rate": 0.00012765957446808513, | |
| "loss": 1.2205, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.0666045024643666, | |
| "grad_norm": 0.09721981845211174, | |
| "learning_rate": 0.00013297872340425532, | |
| "loss": 1.2624, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.06926868256294126, | |
| "grad_norm": 0.08809066774892928, | |
| "learning_rate": 0.00013829787234042554, | |
| "loss": 1.2545, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.07193286266151591, | |
| "grad_norm": 0.26229245154975894, | |
| "learning_rate": 0.00014361702127659576, | |
| "loss": 1.2408, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.07459704276009058, | |
| "grad_norm": 0.10552899642439768, | |
| "learning_rate": 0.00014893617021276596, | |
| "loss": 1.2392, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.07726122285866524, | |
| "grad_norm": 0.10911221765360271, | |
| "learning_rate": 0.00015425531914893618, | |
| "loss": 1.2148, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.0799254029572399, | |
| "grad_norm": 0.11632059103832315, | |
| "learning_rate": 0.00015957446808510637, | |
| "loss": 1.2382, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.08258958305581457, | |
| "grad_norm": 0.10281933721760748, | |
| "learning_rate": 0.00016489361702127662, | |
| "loss": 1.226, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.08525376315438923, | |
| "grad_norm": 0.10561194502249595, | |
| "learning_rate": 0.00017021276595744682, | |
| "loss": 1.2531, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.0879179432529639, | |
| "grad_norm": 0.10407844313384682, | |
| "learning_rate": 0.000175531914893617, | |
| "loss": 1.2428, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.09058212335153856, | |
| "grad_norm": 0.08952286824052161, | |
| "learning_rate": 0.00018085106382978726, | |
| "loss": 1.2176, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.09324630345011323, | |
| "grad_norm": 0.0938821785588311, | |
| "learning_rate": 0.00018617021276595746, | |
| "loss": 1.2307, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.09591048354868789, | |
| "grad_norm": 0.1162063476232978, | |
| "learning_rate": 0.00019148936170212768, | |
| "loss": 1.2276, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.09857466364726256, | |
| "grad_norm": 0.09632362372953375, | |
| "learning_rate": 0.00019680851063829787, | |
| "loss": 1.213, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.10123884374583722, | |
| "grad_norm": 0.0984821085857903, | |
| "learning_rate": 0.00019999930723752516, | |
| "loss": 1.2093, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.10390302384441188, | |
| "grad_norm": 0.09843324070313318, | |
| "learning_rate": 0.00019999151376991434, | |
| "loss": 1.2405, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.10656720394298655, | |
| "grad_norm": 0.09038802438280195, | |
| "learning_rate": 0.00019997506155872244, | |
| "loss": 1.2226, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.10923138404156121, | |
| "grad_norm": 0.09204563702733434, | |
| "learning_rate": 0.00019994995202862512, | |
| "loss": 1.1841, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.11189556414013588, | |
| "grad_norm": 0.09153264803236888, | |
| "learning_rate": 0.00019991618735397672, | |
| "loss": 1.1963, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.11455974423871054, | |
| "grad_norm": 0.10122556405254433, | |
| "learning_rate": 0.00019987377045862202, | |
| "loss": 1.1912, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.1172239243372852, | |
| "grad_norm": 0.10731171538200276, | |
| "learning_rate": 0.00019982270501564284, | |
| "loss": 1.2206, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.11988810443585986, | |
| "grad_norm": 0.10261300609074893, | |
| "learning_rate": 0.00019976299544704026, | |
| "loss": 1.2063, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.12255228453443452, | |
| "grad_norm": 0.10220846662233612, | |
| "learning_rate": 0.00019969464692335152, | |
| "loss": 1.2176, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.1252164646330092, | |
| "grad_norm": 0.106711961435993, | |
| "learning_rate": 0.00019961766536320225, | |
| "loss": 1.2338, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.12788064473158386, | |
| "grad_norm": 0.08642302944089619, | |
| "learning_rate": 0.0001995320574327941, | |
| "loss": 1.1834, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.13054482483015853, | |
| "grad_norm": 0.10984213170710444, | |
| "learning_rate": 0.00019943783054532732, | |
| "loss": 1.2157, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.1332090049287332, | |
| "grad_norm": 0.09485880486202997, | |
| "learning_rate": 0.00019933499286035894, | |
| "loss": 1.204, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.13587318502730786, | |
| "grad_norm": 0.10445439668189137, | |
| "learning_rate": 0.0001992235532830961, | |
| "loss": 1.2193, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.13853736512588252, | |
| "grad_norm": 0.08363317544398702, | |
| "learning_rate": 0.00019910352146362497, | |
| "loss": 1.1989, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.1412015452244572, | |
| "grad_norm": 0.13246579577470613, | |
| "learning_rate": 0.00019897490779607514, | |
| "loss": 1.1942, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.14386572532303182, | |
| "grad_norm": 0.11140593579855256, | |
| "learning_rate": 0.00019883772341771936, | |
| "loss": 1.226, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.1465299054216065, | |
| "grad_norm": 0.09879562730310984, | |
| "learning_rate": 0.0001986919802080093, | |
| "loss": 1.2206, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.14919408552018115, | |
| "grad_norm": 0.09576696495195093, | |
| "learning_rate": 0.00019853769078754686, | |
| "loss": 1.2156, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.15185826561875582, | |
| "grad_norm": 0.09339602143232201, | |
| "learning_rate": 0.00019837486851699104, | |
| "loss": 1.2136, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.15452244571733048, | |
| "grad_norm": 0.1015702269360243, | |
| "learning_rate": 0.0001982035274959014, | |
| "loss": 1.228, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.15718662581590515, | |
| "grad_norm": 0.10731429321620178, | |
| "learning_rate": 0.0001980236825615166, | |
| "loss": 1.2084, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.1598508059144798, | |
| "grad_norm": 0.11015660781012064, | |
| "learning_rate": 0.00019783534928747006, | |
| "loss": 1.233, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.16251498601305447, | |
| "grad_norm": 0.09317505295867419, | |
| "learning_rate": 0.000197638543982441, | |
| "loss": 1.1949, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.16517916611162914, | |
| "grad_norm": 0.10444172159792364, | |
| "learning_rate": 0.00019743328368874237, | |
| "loss": 1.2077, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.1678433462102038, | |
| "grad_norm": 0.08551570812304626, | |
| "learning_rate": 0.00019721958618084507, | |
| "loss": 1.206, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.17050752630877847, | |
| "grad_norm": 0.08486005829321344, | |
| "learning_rate": 0.00019699746996383878, | |
| "loss": 1.2162, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.17317170640735313, | |
| "grad_norm": 0.08572790019112694, | |
| "learning_rate": 0.00019676695427182938, | |
| "loss": 1.1866, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.1758358865059278, | |
| "grad_norm": 0.0948088094425236, | |
| "learning_rate": 0.00019652805906627356, | |
| "loss": 1.1903, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.17850006660450246, | |
| "grad_norm": 0.09120766035226856, | |
| "learning_rate": 0.00019628080503425013, | |
| "loss": 1.2231, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.18116424670307713, | |
| "grad_norm": 0.10099249661711791, | |
| "learning_rate": 0.0001960252135866687, | |
| "loss": 1.192, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.1838284268016518, | |
| "grad_norm": 0.08142630900860537, | |
| "learning_rate": 0.0001957613068564156, | |
| "loss": 1.2093, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.18649260690022645, | |
| "grad_norm": 0.08243035183498343, | |
| "learning_rate": 0.00019548910769643722, | |
| "loss": 1.2232, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.18915678699880112, | |
| "grad_norm": 0.08113246622113654, | |
| "learning_rate": 0.00019520863967776116, | |
| "loss": 1.1773, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.19182096709737578, | |
| "grad_norm": 0.08217032990907831, | |
| "learning_rate": 0.000194919927087455, | |
| "loss": 1.1909, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.19448514719595045, | |
| "grad_norm": 0.08936356448152723, | |
| "learning_rate": 0.00019462299492652336, | |
| "loss": 1.1768, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.1971493272945251, | |
| "grad_norm": 0.09171279408988006, | |
| "learning_rate": 0.00019431786890774264, | |
| "loss": 1.1899, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.19981350739309978, | |
| "grad_norm": 0.08313643695167447, | |
| "learning_rate": 0.00019400457545343464, | |
| "loss": 1.224, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.20247768749167444, | |
| "grad_norm": 0.08891928174737382, | |
| "learning_rate": 0.00019368314169317856, | |
| "loss": 1.1723, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.2051418675902491, | |
| "grad_norm": 0.08770933776267686, | |
| "learning_rate": 0.00019335359546146156, | |
| "loss": 1.2028, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.20780604768882377, | |
| "grad_norm": 0.08301528792067146, | |
| "learning_rate": 0.00019301596529526854, | |
| "loss": 1.2056, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.21047022778739843, | |
| "grad_norm": 0.08890281842327881, | |
| "learning_rate": 0.00019267028043161094, | |
| "loss": 1.2138, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.2131344078859731, | |
| "grad_norm": 0.0900002819215321, | |
| "learning_rate": 0.0001923165708049951, | |
| "loss": 1.2051, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.21579858798454776, | |
| "grad_norm": 0.10504748978346506, | |
| "learning_rate": 0.00019195486704482977, | |
| "loss": 1.1954, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.21846276808312243, | |
| "grad_norm": 0.09718704604170597, | |
| "learning_rate": 0.0001915852004727742, | |
| "loss": 1.1639, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.2211269481816971, | |
| "grad_norm": 0.08996864675224003, | |
| "learning_rate": 0.00019120760310002545, | |
| "loss": 1.2265, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.22379112828027176, | |
| "grad_norm": 0.07943107198536507, | |
| "learning_rate": 0.0001908221076245466, | |
| "loss": 1.2169, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.22645530837884642, | |
| "grad_norm": 0.08284744366625013, | |
| "learning_rate": 0.0001904287474282353, | |
| "loss": 1.1828, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.22911948847742108, | |
| "grad_norm": 0.07736916644319017, | |
| "learning_rate": 0.00019002755657403298, | |
| "loss": 1.1979, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.23178366857599575, | |
| "grad_norm": 0.08280858698864835, | |
| "learning_rate": 0.00018961856980297513, | |
| "loss": 1.191, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.2344478486745704, | |
| "grad_norm": 0.07937399115370877, | |
| "learning_rate": 0.0001892018225311831, | |
| "loss": 1.2173, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.23711202877314508, | |
| "grad_norm": 0.08409239653616714, | |
| "learning_rate": 0.00018877735084679693, | |
| "loss": 1.1903, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.23977620887171971, | |
| "grad_norm": 0.0825557060792781, | |
| "learning_rate": 0.00018834519150685071, | |
| "loss": 1.1985, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.24244038897029438, | |
| "grad_norm": 0.07982212340626871, | |
| "learning_rate": 0.00018790538193408937, | |
| "loss": 1.2038, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.24510456906886904, | |
| "grad_norm": 0.0774659417434534, | |
| "learning_rate": 0.000187457960213728, | |
| "loss": 1.1788, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.2477687491674437, | |
| "grad_norm": 0.08395126176587492, | |
| "learning_rate": 0.00018700296509015406, | |
| "loss": 1.1862, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.2504329292660184, | |
| "grad_norm": 0.09287329697747186, | |
| "learning_rate": 0.00018654043596357217, | |
| "loss": 1.2092, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.25309710936459306, | |
| "grad_norm": 0.09110234430419713, | |
| "learning_rate": 0.00018607041288659236, | |
| "loss": 1.1974, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.25576128946316773, | |
| "grad_norm": 0.08308651742822264, | |
| "learning_rate": 0.00018559293656076166, | |
| "loss": 1.1869, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.2584254695617424, | |
| "grad_norm": 0.08214254142545631, | |
| "learning_rate": 0.0001851080483330396, | |
| "loss": 1.1831, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.26108964966031706, | |
| "grad_norm": 0.08701064777773787, | |
| "learning_rate": 0.00018461579019221774, | |
| "loss": 1.1879, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.2637538297588917, | |
| "grad_norm": 0.08680397859688439, | |
| "learning_rate": 0.00018411620476528362, | |
| "loss": 1.1929, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.2664180098574664, | |
| "grad_norm": 0.09030897168085561, | |
| "learning_rate": 0.0001836093353137297, | |
| "loss": 1.1902, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.26908218995604105, | |
| "grad_norm": 0.08473182672968443, | |
| "learning_rate": 0.00018309522572980673, | |
| "loss": 1.2044, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.2717463700546157, | |
| "grad_norm": 0.08536458790900281, | |
| "learning_rate": 0.00018257392053272345, | |
| "loss": 1.2259, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.2744105501531904, | |
| "grad_norm": 0.081003787470878, | |
| "learning_rate": 0.00018204546486479096, | |
| "loss": 1.213, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.27707473025176504, | |
| "grad_norm": 0.089272857126091, | |
| "learning_rate": 0.00018150990448751394, | |
| "loss": 1.1791, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.2797389103503397, | |
| "grad_norm": 0.08621840551694437, | |
| "learning_rate": 0.0001809672857776278, | |
| "loss": 1.2024, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.2824030904489144, | |
| "grad_norm": 0.07933042265020962, | |
| "learning_rate": 0.00018041765572308278, | |
| "loss": 1.2025, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.28506727054748904, | |
| "grad_norm": 0.0850752493351441, | |
| "learning_rate": 0.00017986106191897493, | |
| "loss": 1.1994, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 0.28773145064606365, | |
| "grad_norm": 0.07823580475459424, | |
| "learning_rate": 0.00017929755256342479, | |
| "loss": 1.2139, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.2903956307446383, | |
| "grad_norm": 0.07706877273113974, | |
| "learning_rate": 0.0001787271764534035, | |
| "loss": 1.1909, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 0.293059810843213, | |
| "grad_norm": 0.07946013242507238, | |
| "learning_rate": 0.00017814998298050743, | |
| "loss": 1.1795, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.29572399094178764, | |
| "grad_norm": 0.08180878258450536, | |
| "learning_rate": 0.00017756602212668082, | |
| "loss": 1.1906, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 0.2983881710403623, | |
| "grad_norm": 0.09250927603458256, | |
| "learning_rate": 0.00017697534445988803, | |
| "loss": 1.1779, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.30105235113893697, | |
| "grad_norm": 0.09177332060381718, | |
| "learning_rate": 0.00017637800112973428, | |
| "loss": 1.1723, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 0.30371653123751163, | |
| "grad_norm": 0.0837886550110598, | |
| "learning_rate": 0.00017577404386303645, | |
| "loss": 1.1954, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.3063807113360863, | |
| "grad_norm": 0.09291852958714042, | |
| "learning_rate": 0.0001751635249593439, | |
| "loss": 1.1913, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.30904489143466096, | |
| "grad_norm": 0.0751186613096299, | |
| "learning_rate": 0.00017454649728640943, | |
| "loss": 1.1884, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.3117090715332356, | |
| "grad_norm": 0.08335342725723932, | |
| "learning_rate": 0.00017392301427561146, | |
| "loss": 1.2182, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 0.3143732516318103, | |
| "grad_norm": 0.10512490135127568, | |
| "learning_rate": 0.00017329312991732688, | |
| "loss": 1.2022, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.31703743173038496, | |
| "grad_norm": 0.08736469147276345, | |
| "learning_rate": 0.00017265689875625587, | |
| "loss": 1.2034, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 0.3197016118289596, | |
| "grad_norm": 0.08181172036849664, | |
| "learning_rate": 0.00017201437588669878, | |
| "loss": 1.1734, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.3223657919275343, | |
| "grad_norm": 0.10161889430894355, | |
| "learning_rate": 0.0001713656169477849, | |
| "loss": 1.1819, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 0.32502997202610895, | |
| "grad_norm": 0.08576387885641809, | |
| "learning_rate": 0.00017071067811865476, | |
| "loss": 1.2189, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.3276941521246836, | |
| "grad_norm": 0.07896254402827822, | |
| "learning_rate": 0.00017004961611359506, | |
| "loss": 1.1975, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 0.3303583322232583, | |
| "grad_norm": 0.0843725771070502, | |
| "learning_rate": 0.00016938248817712767, | |
| "loss": 1.2049, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.33302251232183294, | |
| "grad_norm": 0.08922430755828152, | |
| "learning_rate": 0.0001687093520790524, | |
| "loss": 1.2, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.3356866924204076, | |
| "grad_norm": 0.09060345427129264, | |
| "learning_rate": 0.00016803026610944462, | |
| "loss": 1.2019, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.33835087251898227, | |
| "grad_norm": 0.08487192271777715, | |
| "learning_rate": 0.0001673452890736074, | |
| "loss": 1.2101, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 0.34101505261755694, | |
| "grad_norm": 0.10237244952453312, | |
| "learning_rate": 0.00016665448028697961, | |
| "loss": 1.1917, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.3436792327161316, | |
| "grad_norm": 0.08297722193267411, | |
| "learning_rate": 0.0001659578995699991, | |
| "loss": 1.1714, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 0.34634341281470626, | |
| "grad_norm": 0.08034790490692274, | |
| "learning_rate": 0.00016525560724292305, | |
| "loss": 1.2135, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.34900759291328093, | |
| "grad_norm": 0.07638178286632234, | |
| "learning_rate": 0.00016454766412060402, | |
| "loss": 1.1725, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 0.3516717730118556, | |
| "grad_norm": 0.08455753089941971, | |
| "learning_rate": 0.00016383413150722415, | |
| "loss": 1.1842, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.35433595311043026, | |
| "grad_norm": 0.07805666953810234, | |
| "learning_rate": 0.00016311507119098627, | |
| "loss": 1.2005, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 0.3570001332090049, | |
| "grad_norm": 0.07613396423899182, | |
| "learning_rate": 0.00016239054543876343, | |
| "loss": 1.1971, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.3596643133075796, | |
| "grad_norm": 0.07964041743179047, | |
| "learning_rate": 0.00016166061699070703, | |
| "loss": 1.2247, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.36232849340615425, | |
| "grad_norm": 0.07545427492471256, | |
| "learning_rate": 0.00016092534905481367, | |
| "loss": 1.1965, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.3649926735047289, | |
| "grad_norm": 0.07935466685302715, | |
| "learning_rate": 0.00016018480530145175, | |
| "loss": 1.1777, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 0.3676568536033036, | |
| "grad_norm": 0.07697591776205723, | |
| "learning_rate": 0.00015943904985784796, | |
| "loss": 1.2043, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.37032103370187824, | |
| "grad_norm": 0.08353307335035773, | |
| "learning_rate": 0.00015868814730253422, | |
| "loss": 1.2016, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 0.3729852138004529, | |
| "grad_norm": 0.12083203321167697, | |
| "learning_rate": 0.00015793216265975538, | |
| "loss": 1.2039, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.3756493938990276, | |
| "grad_norm": 0.08002384831591002, | |
| "learning_rate": 0.0001571711613938387, | |
| "loss": 1.2018, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 0.37831357399760224, | |
| "grad_norm": 0.07416696808155057, | |
| "learning_rate": 0.00015640520940352474, | |
| "loss": 1.1761, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.3809777540961769, | |
| "grad_norm": 0.07477584114343706, | |
| "learning_rate": 0.00015563437301626095, | |
| "loss": 1.1691, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 0.38364193419475157, | |
| "grad_norm": 0.08021029434297425, | |
| "learning_rate": 0.00015485871898245822, | |
| "loss": 1.1879, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.38630611429332623, | |
| "grad_norm": 0.07655243996754035, | |
| "learning_rate": 0.0001540783144697103, | |
| "loss": 1.1895, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 0.3889702943919009, | |
| "grad_norm": 0.07480027547778711, | |
| "learning_rate": 0.0001532932270569778, | |
| "loss": 1.1975, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.39163447449047556, | |
| "grad_norm": 0.0777051603884027, | |
| "learning_rate": 0.00015250352472873603, | |
| "loss": 1.1676, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 0.3942986545890502, | |
| "grad_norm": 0.09274122893001194, | |
| "learning_rate": 0.00015170927586908786, | |
| "loss": 1.185, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.3969628346876249, | |
| "grad_norm": 0.08369763065847857, | |
| "learning_rate": 0.00015091054925584204, | |
| "loss": 1.1839, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 0.39962701478619955, | |
| "grad_norm": 0.0785954093676615, | |
| "learning_rate": 0.0001501074140545575, | |
| "loss": 1.195, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.4022911948847742, | |
| "grad_norm": 0.08125955697905107, | |
| "learning_rate": 0.00014929993981255388, | |
| "loss": 1.167, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 0.4049553749833489, | |
| "grad_norm": 0.0795318945758808, | |
| "learning_rate": 0.00014848819645288915, | |
| "loss": 1.1809, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.40761955508192355, | |
| "grad_norm": 0.08215872783779934, | |
| "learning_rate": 0.0001476722542683045, | |
| "loss": 1.1982, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 0.4102837351804982, | |
| "grad_norm": 0.0902569586423544, | |
| "learning_rate": 0.0001468521839151375, | |
| "loss": 1.2058, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.4129479152790729, | |
| "grad_norm": 0.08977396388549971, | |
| "learning_rate": 0.00014602805640720373, | |
| "loss": 1.2046, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 0.41561209537764754, | |
| "grad_norm": 0.08797468277446341, | |
| "learning_rate": 0.00014519994310964698, | |
| "loss": 1.1767, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.4182762754762222, | |
| "grad_norm": 0.08154618218534682, | |
| "learning_rate": 0.0001443679157327598, | |
| "loss": 1.2016, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 0.42094045557479687, | |
| "grad_norm": 0.08123872265319151, | |
| "learning_rate": 0.00014353204632577352, | |
| "loss": 1.1836, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.42360463567337153, | |
| "grad_norm": 0.09034057444914761, | |
| "learning_rate": 0.00014269240727061928, | |
| "loss": 1.1984, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 0.4262688157719462, | |
| "grad_norm": 0.07751594264193425, | |
| "learning_rate": 0.00014184907127566006, | |
| "loss": 1.1721, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.42893299587052086, | |
| "grad_norm": 0.07817067745827988, | |
| "learning_rate": 0.00014100211136939457, | |
| "loss": 1.2066, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 0.4315971759690955, | |
| "grad_norm": 0.07666379004066795, | |
| "learning_rate": 0.00014015160089413331, | |
| "loss": 1.201, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.4342613560676702, | |
| "grad_norm": 0.07977661703965382, | |
| "learning_rate": 0.00013929761349964755, | |
| "loss": 1.1986, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 0.43692553616624485, | |
| "grad_norm": 0.07515238252186936, | |
| "learning_rate": 0.00013844022313679166, | |
| "loss": 1.1673, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.4395897162648195, | |
| "grad_norm": 0.08668062284259438, | |
| "learning_rate": 0.00013757950405109926, | |
| "loss": 1.2055, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 0.4422538963633942, | |
| "grad_norm": 0.07958110157591258, | |
| "learning_rate": 0.00013671553077635403, | |
| "loss": 1.2052, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.44491807646196885, | |
| "grad_norm": 0.07801662094774196, | |
| "learning_rate": 0.00013584837812813554, | |
| "loss": 1.191, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 0.4475822565605435, | |
| "grad_norm": 0.0766973195738648, | |
| "learning_rate": 0.00013497812119734037, | |
| "loss": 1.1918, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.4502464366591182, | |
| "grad_norm": 0.07475449320976814, | |
| "learning_rate": 0.00013410483534367988, | |
| "loss": 1.1837, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 0.45291061675769284, | |
| "grad_norm": 0.07991050463902843, | |
| "learning_rate": 0.0001332285961891543, | |
| "loss": 1.1941, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.4555747968562675, | |
| "grad_norm": 0.08254589522833469, | |
| "learning_rate": 0.00013234947961150438, | |
| "loss": 1.1782, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 0.45823897695484217, | |
| "grad_norm": 0.07717793943049406, | |
| "learning_rate": 0.0001314675617376406, | |
| "loss": 1.1773, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.46090315705341683, | |
| "grad_norm": 0.07981760014460237, | |
| "learning_rate": 0.00013058291893705123, | |
| "loss": 1.1587, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 0.4635673371519915, | |
| "grad_norm": 0.0746596071448608, | |
| "learning_rate": 0.00012969562781518884, | |
| "loss": 1.1672, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.46623151725056616, | |
| "grad_norm": 0.07546467676535663, | |
| "learning_rate": 0.00012880576520683687, | |
| "loss": 1.188, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 0.4688956973491408, | |
| "grad_norm": 0.0822763180376801, | |
| "learning_rate": 0.00012791340816945609, | |
| "loss": 1.1773, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.4715598774477155, | |
| "grad_norm": 0.08094564516346787, | |
| "learning_rate": 0.00012701863397651176, | |
| "loss": 1.1692, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 0.47422405754629016, | |
| "grad_norm": 0.08080906995900732, | |
| "learning_rate": 0.00012612152011078233, | |
| "loss": 1.1923, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.47688823764486477, | |
| "grad_norm": 0.07799247485478207, | |
| "learning_rate": 0.00012522214425764953, | |
| "loss": 1.1762, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 0.47955241774343943, | |
| "grad_norm": 0.07890128228587008, | |
| "learning_rate": 0.00012432058429837152, | |
| "loss": 1.1872, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.4822165978420141, | |
| "grad_norm": 0.07930485805581218, | |
| "learning_rate": 0.00012341691830333867, | |
| "loss": 1.1801, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 0.48488077794058876, | |
| "grad_norm": 0.07523574346910737, | |
| "learning_rate": 0.000122511224525313, | |
| "loss": 1.1705, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.4875449580391634, | |
| "grad_norm": 0.08019564137195122, | |
| "learning_rate": 0.00012160358139265202, | |
| "loss": 1.1968, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 0.4902091381377381, | |
| "grad_norm": 0.07799871453882441, | |
| "learning_rate": 0.00012069406750251713, | |
| "loss": 1.2037, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.49287331823631275, | |
| "grad_norm": 0.07616406333142303, | |
| "learning_rate": 0.00011978276161406756, | |
| "loss": 1.1771, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 0.4955374983348874, | |
| "grad_norm": 0.08287250388641591, | |
| "learning_rate": 0.00011886974264164037, | |
| "loss": 1.1817, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.4982016784334621, | |
| "grad_norm": 0.07709252333335805, | |
| "learning_rate": 0.00011795508964791659, | |
| "loss": 1.1837, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 0.5008658585320368, | |
| "grad_norm": 0.07774534231906956, | |
| "learning_rate": 0.00011703888183707512, | |
| "loss": 1.1801, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.5035300386306114, | |
| "grad_norm": 0.07836641922774835, | |
| "learning_rate": 0.00011612119854793377, | |
| "loss": 1.1928, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 0.5061942187291861, | |
| "grad_norm": 0.0855819667886154, | |
| "learning_rate": 0.00011520211924707917, | |
| "loss": 1.2115, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.5088583988277607, | |
| "grad_norm": 0.08249753825043267, | |
| "learning_rate": 0.00011428172352198534, | |
| "loss": 1.1902, | |
| "step": 955 | |
| }, | |
| { | |
| "epoch": 0.5115225789263355, | |
| "grad_norm": 0.07960165957450928, | |
| "learning_rate": 0.00011336009107412162, | |
| "loss": 1.1846, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.5141867590249101, | |
| "grad_norm": 0.0844585882351588, | |
| "learning_rate": 0.00011243730171205118, | |
| "loss": 1.1546, | |
| "step": 965 | |
| }, | |
| { | |
| "epoch": 0.5168509391234848, | |
| "grad_norm": 0.07615164060165457, | |
| "learning_rate": 0.00011151343534451994, | |
| "loss": 1.1909, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.5195151192220594, | |
| "grad_norm": 0.08628193709778877, | |
| "learning_rate": 0.00011058857197353683, | |
| "loss": 1.1832, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 0.5221792993206341, | |
| "grad_norm": 0.08464663568633256, | |
| "learning_rate": 0.0001096627916874461, | |
| "loss": 1.19, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.5248434794192087, | |
| "grad_norm": 0.07335066380801168, | |
| "learning_rate": 0.00010873617465399209, | |
| "loss": 1.1962, | |
| "step": 985 | |
| }, | |
| { | |
| "epoch": 0.5275076595177834, | |
| "grad_norm": 0.07968663002138815, | |
| "learning_rate": 0.00010780880111337703, | |
| "loss": 1.1882, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.530171839616358, | |
| "grad_norm": 0.08264162145143913, | |
| "learning_rate": 0.00010688075137131282, | |
| "loss": 1.1731, | |
| "step": 995 | |
| }, | |
| { | |
| "epoch": 0.5328360197149328, | |
| "grad_norm": 0.0801021939451612, | |
| "learning_rate": 0.00010595210579206676, | |
| "loss": 1.1947, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.5355001998135074, | |
| "grad_norm": 0.08017946990238331, | |
| "learning_rate": 0.0001050229447915027, | |
| "loss": 1.2001, | |
| "step": 1005 | |
| }, | |
| { | |
| "epoch": 0.5381643799120821, | |
| "grad_norm": 0.07658836800590599, | |
| "learning_rate": 0.0001040933488301171, | |
| "loss": 1.2002, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.5408285600106567, | |
| "grad_norm": 0.07982866205360158, | |
| "learning_rate": 0.00010316339840607194, | |
| "loss": 1.1836, | |
| "step": 1015 | |
| }, | |
| { | |
| "epoch": 0.5434927401092314, | |
| "grad_norm": 0.07960723865086924, | |
| "learning_rate": 0.0001022331740482237, | |
| "loss": 1.1777, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.546156920207806, | |
| "grad_norm": 0.0748679705543507, | |
| "learning_rate": 0.00010130275630915009, | |
| "loss": 1.1921, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 0.5488211003063808, | |
| "grad_norm": 0.0878356815626402, | |
| "learning_rate": 0.00010037222575817475, | |
| "loss": 1.1709, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.5514852804049554, | |
| "grad_norm": 0.07848930190109027, | |
| "learning_rate": 9.944166297439011e-05, | |
| "loss": 1.1896, | |
| "step": 1035 | |
| }, | |
| { | |
| "epoch": 0.5541494605035301, | |
| "grad_norm": 0.07598028143410929, | |
| "learning_rate": 9.85111485396798e-05, | |
| "loss": 1.1671, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.5568136406021047, | |
| "grad_norm": 0.07455678199305199, | |
| "learning_rate": 9.758076303174082e-05, | |
| "loss": 1.1879, | |
| "step": 1045 | |
| }, | |
| { | |
| "epoch": 0.5594778207006794, | |
| "grad_norm": 0.07933883340689663, | |
| "learning_rate": 9.665058701710561e-05, | |
| "loss": 1.1906, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.562142000799254, | |
| "grad_norm": 0.0782311193459855, | |
| "learning_rate": 9.572070104416566e-05, | |
| "loss": 1.1814, | |
| "step": 1055 | |
| }, | |
| { | |
| "epoch": 0.5648061808978287, | |
| "grad_norm": 0.07728561745740628, | |
| "learning_rate": 9.479118563619636e-05, | |
| "loss": 1.179, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.5674703609964034, | |
| "grad_norm": 0.08077639691991045, | |
| "learning_rate": 9.386212128438412e-05, | |
| "loss": 1.1957, | |
| "step": 1065 | |
| }, | |
| { | |
| "epoch": 0.5701345410949781, | |
| "grad_norm": 0.07773225366778684, | |
| "learning_rate": 9.29335884408562e-05, | |
| "loss": 1.221, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.5727987211935527, | |
| "grad_norm": 0.07773099884244754, | |
| "learning_rate": 9.2005667511714e-05, | |
| "loss": 1.158, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 0.5754629012921273, | |
| "grad_norm": 0.07725515386954404, | |
| "learning_rate": 9.107843885007042e-05, | |
| "loss": 1.1699, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.578127081390702, | |
| "grad_norm": 0.08162033290693702, | |
| "learning_rate": 9.015198274909151e-05, | |
| "loss": 1.1885, | |
| "step": 1085 | |
| }, | |
| { | |
| "epoch": 0.5807912614892766, | |
| "grad_norm": 0.07774378164612243, | |
| "learning_rate": 8.922637943504361e-05, | |
| "loss": 1.1924, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.5834554415878513, | |
| "grad_norm": 0.08627267331483346, | |
| "learning_rate": 8.830170906034625e-05, | |
| "loss": 1.1971, | |
| "step": 1095 | |
| }, | |
| { | |
| "epoch": 0.586119621686426, | |
| "grad_norm": 0.07645366300993464, | |
| "learning_rate": 8.737805169663114e-05, | |
| "loss": 1.1807, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.5887838017850007, | |
| "grad_norm": 0.08443619410444902, | |
| "learning_rate": 8.645548732780864e-05, | |
| "loss": 1.1761, | |
| "step": 1105 | |
| }, | |
| { | |
| "epoch": 0.5914479818835753, | |
| "grad_norm": 0.07706639361213953, | |
| "learning_rate": 8.553409584314138e-05, | |
| "loss": 1.1902, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.59411216198215, | |
| "grad_norm": 0.08249092376191036, | |
| "learning_rate": 8.461395703032638e-05, | |
| "loss": 1.1839, | |
| "step": 1115 | |
| }, | |
| { | |
| "epoch": 0.5967763420807246, | |
| "grad_norm": 0.08065147422026245, | |
| "learning_rate": 8.369515056858575e-05, | |
| "loss": 1.1731, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.5994405221792993, | |
| "grad_norm": 0.07848658439918688, | |
| "learning_rate": 8.277775602176702e-05, | |
| "loss": 1.177, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 0.6021047022778739, | |
| "grad_norm": 0.07816482146783796, | |
| "learning_rate": 8.186185283145325e-05, | |
| "loss": 1.1625, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.6047688823764487, | |
| "grad_norm": 0.07727396916592474, | |
| "learning_rate": 8.094752031008371e-05, | |
| "loss": 1.2127, | |
| "step": 1135 | |
| }, | |
| { | |
| "epoch": 0.6074330624750233, | |
| "grad_norm": 0.0789877501243841, | |
| "learning_rate": 8.003483763408603e-05, | |
| "loss": 1.1685, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.610097242573598, | |
| "grad_norm": 0.07854117414343613, | |
| "learning_rate": 7.912388383701982e-05, | |
| "loss": 1.1826, | |
| "step": 1145 | |
| }, | |
| { | |
| "epoch": 0.6127614226721726, | |
| "grad_norm": 0.07974248826415456, | |
| "learning_rate": 7.821473780273279e-05, | |
| "loss": 1.1867, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.6154256027707473, | |
| "grad_norm": 0.08234486503543673, | |
| "learning_rate": 7.730747825852975e-05, | |
| "loss": 1.1928, | |
| "step": 1155 | |
| }, | |
| { | |
| "epoch": 0.6180897828693219, | |
| "grad_norm": 0.08113984284337296, | |
| "learning_rate": 7.64021837683554e-05, | |
| "loss": 1.2018, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.6207539629678966, | |
| "grad_norm": 0.07823635237833673, | |
| "learning_rate": 7.549893272599098e-05, | |
| "loss": 1.1756, | |
| "step": 1165 | |
| }, | |
| { | |
| "epoch": 0.6234181430664713, | |
| "grad_norm": 0.07948360741609674, | |
| "learning_rate": 7.459780334826578e-05, | |
| "loss": 1.2052, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.626082323165046, | |
| "grad_norm": 0.07981232728150925, | |
| "learning_rate": 7.369887366828405e-05, | |
| "loss": 1.1935, | |
| "step": 1175 | |
| }, | |
| { | |
| "epoch": 0.6287465032636206, | |
| "grad_norm": 0.07772028594630517, | |
| "learning_rate": 7.28022215286676e-05, | |
| "loss": 1.1742, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.6314106833621953, | |
| "grad_norm": 0.07942822498880386, | |
| "learning_rate": 7.190792457481526e-05, | |
| "loss": 1.2044, | |
| "step": 1185 | |
| }, | |
| { | |
| "epoch": 0.6340748634607699, | |
| "grad_norm": 0.08072671416547043, | |
| "learning_rate": 7.101606024817888e-05, | |
| "loss": 1.2139, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.6367390435593446, | |
| "grad_norm": 0.07657851251411404, | |
| "learning_rate": 7.01267057795577e-05, | |
| "loss": 1.1771, | |
| "step": 1195 | |
| }, | |
| { | |
| "epoch": 0.6394032236579192, | |
| "grad_norm": 0.07629161839506614, | |
| "learning_rate": 6.923993818241013e-05, | |
| "loss": 1.1878, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.642067403756494, | |
| "grad_norm": 0.07750592384625017, | |
| "learning_rate": 6.83558342461851e-05, | |
| "loss": 1.1965, | |
| "step": 1205 | |
| }, | |
| { | |
| "epoch": 0.6447315838550686, | |
| "grad_norm": 0.08074061148056243, | |
| "learning_rate": 6.747447052967246e-05, | |
| "loss": 1.1598, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.6473957639536433, | |
| "grad_norm": 0.08114992875515604, | |
| "learning_rate": 6.659592335437321e-05, | |
| "loss": 1.1863, | |
| "step": 1215 | |
| }, | |
| { | |
| "epoch": 0.6500599440522179, | |
| "grad_norm": 0.07837122688644742, | |
| "learning_rate": 6.572026879789064e-05, | |
| "loss": 1.1789, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.6527241241507926, | |
| "grad_norm": 0.07904963655043487, | |
| "learning_rate": 6.484758268734226e-05, | |
| "loss": 1.1988, | |
| "step": 1225 | |
| }, | |
| { | |
| "epoch": 0.6553883042493672, | |
| "grad_norm": 0.07794516218687547, | |
| "learning_rate": 6.397794059279376e-05, | |
| "loss": 1.1797, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.658052484347942, | |
| "grad_norm": 0.0782549564009468, | |
| "learning_rate": 6.311141782071486e-05, | |
| "loss": 1.1861, | |
| "step": 1235 | |
| }, | |
| { | |
| "epoch": 0.6607166644465166, | |
| "grad_norm": 0.08113466467014144, | |
| "learning_rate": 6.224808940745814e-05, | |
| "loss": 1.1812, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.6633808445450913, | |
| "grad_norm": 0.0751998700186739, | |
| "learning_rate": 6.138803011276157e-05, | |
| "loss": 1.1903, | |
| "step": 1245 | |
| }, | |
| { | |
| "epoch": 0.6660450246436659, | |
| "grad_norm": 0.08263899166467202, | |
| "learning_rate": 6.0531314413274306e-05, | |
| "loss": 1.1652, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.6687092047422406, | |
| "grad_norm": 0.07705824954536489, | |
| "learning_rate": 5.9678016496107737e-05, | |
| "loss": 1.1811, | |
| "step": 1255 | |
| }, | |
| { | |
| "epoch": 0.6713733848408152, | |
| "grad_norm": 0.08540183694555796, | |
| "learning_rate": 5.8828210252410995e-05, | |
| "loss": 1.1896, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.6740375649393899, | |
| "grad_norm": 0.0742001043082849, | |
| "learning_rate": 5.798196927097259e-05, | |
| "loss": 1.1709, | |
| "step": 1265 | |
| }, | |
| { | |
| "epoch": 0.6767017450379645, | |
| "grad_norm": 0.2483647268345394, | |
| "learning_rate": 5.7139366831847955e-05, | |
| "loss": 1.1841, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.6793659251365393, | |
| "grad_norm": 0.07966799417452507, | |
| "learning_rate": 5.63004759000136e-05, | |
| "loss": 1.1739, | |
| "step": 1275 | |
| }, | |
| { | |
| "epoch": 0.6820301052351139, | |
| "grad_norm": 0.07969094718480424, | |
| "learning_rate": 5.546536911904896e-05, | |
| "loss": 1.1903, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.6846942853336886, | |
| "grad_norm": 0.0828241197230709, | |
| "learning_rate": 5.463411880484577e-05, | |
| "loss": 1.1802, | |
| "step": 1285 | |
| }, | |
| { | |
| "epoch": 0.6873584654322632, | |
| "grad_norm": 0.09372413723978523, | |
| "learning_rate": 5.3806796939345685e-05, | |
| "loss": 1.1786, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.6900226455308379, | |
| "grad_norm": 0.07631033783827931, | |
| "learning_rate": 5.298347516430748e-05, | |
| "loss": 1.1895, | |
| "step": 1295 | |
| }, | |
| { | |
| "epoch": 0.6926868256294125, | |
| "grad_norm": 0.07690415214661994, | |
| "learning_rate": 5.216422477510267e-05, | |
| "loss": 1.1913, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.6953510057279872, | |
| "grad_norm": 0.0767097655138434, | |
| "learning_rate": 5.1349116714542144e-05, | |
| "loss": 1.1685, | |
| "step": 1305 | |
| }, | |
| { | |
| "epoch": 0.6980151858265619, | |
| "grad_norm": 0.07609380641851764, | |
| "learning_rate": 5.053822156673276e-05, | |
| "loss": 1.1907, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.7006793659251366, | |
| "grad_norm": 0.07956518870952646, | |
| "learning_rate": 4.973160955096496e-05, | |
| "loss": 1.1668, | |
| "step": 1315 | |
| }, | |
| { | |
| "epoch": 0.7033435460237112, | |
| "grad_norm": 0.0781847554601962, | |
| "learning_rate": 4.892935051563242e-05, | |
| "loss": 1.1898, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.7060077261222859, | |
| "grad_norm": 0.07968044970093434, | |
| "learning_rate": 4.8131513932183415e-05, | |
| "loss": 1.2072, | |
| "step": 1325 | |
| }, | |
| { | |
| "epoch": 0.7086719062208605, | |
| "grad_norm": 0.07875194128774458, | |
| "learning_rate": 4.733816888910483e-05, | |
| "loss": 1.178, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.7113360863194352, | |
| "grad_norm": 0.08181988531463072, | |
| "learning_rate": 4.654938408593974e-05, | |
| "loss": 1.1679, | |
| "step": 1335 | |
| }, | |
| { | |
| "epoch": 0.7140002664180098, | |
| "grad_norm": 0.08458887612797164, | |
| "learning_rate": 4.576522782733802e-05, | |
| "loss": 1.1925, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.7166644465165846, | |
| "grad_norm": 0.07929665848737272, | |
| "learning_rate": 4.4985768017142014e-05, | |
| "loss": 1.1942, | |
| "step": 1345 | |
| }, | |
| { | |
| "epoch": 0.7193286266151592, | |
| "grad_norm": 0.0793190517544045, | |
| "learning_rate": 4.421107215250586e-05, | |
| "loss": 1.1504, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.7219928067137339, | |
| "grad_norm": 0.0793615584488964, | |
| "learning_rate": 4.3441207318051005e-05, | |
| "loss": 1.1704, | |
| "step": 1355 | |
| }, | |
| { | |
| "epoch": 0.7246569868123085, | |
| "grad_norm": 0.08055131022376696, | |
| "learning_rate": 4.2676240180056856e-05, | |
| "loss": 1.1937, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.7273211669108832, | |
| "grad_norm": 0.08173806907452158, | |
| "learning_rate": 4.191623698068778e-05, | |
| "loss": 1.1779, | |
| "step": 1365 | |
| }, | |
| { | |
| "epoch": 0.7299853470094578, | |
| "grad_norm": 0.07773805545181321, | |
| "learning_rate": 4.116126353225703e-05, | |
| "loss": 1.1846, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.7326495271080325, | |
| "grad_norm": 0.07667327254766519, | |
| "learning_rate": 4.0411385211527684e-05, | |
| "loss": 1.2095, | |
| "step": 1375 | |
| }, | |
| { | |
| "epoch": 0.7353137072066072, | |
| "grad_norm": 0.08098668895333083, | |
| "learning_rate": 3.96666669540512e-05, | |
| "loss": 1.1682, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.7379778873051819, | |
| "grad_norm": 0.07984137500350058, | |
| "learning_rate": 3.892717324854459e-05, | |
| "loss": 1.1729, | |
| "step": 1385 | |
| }, | |
| { | |
| "epoch": 0.7406420674037565, | |
| "grad_norm": 0.07922048060290626, | |
| "learning_rate": 3.8192968131305886e-05, | |
| "loss": 1.1775, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.7433062475023312, | |
| "grad_norm": 0.07652665688687964, | |
| "learning_rate": 3.746411518066894e-05, | |
| "loss": 1.1621, | |
| "step": 1395 | |
| }, | |
| { | |
| "epoch": 0.7459704276009058, | |
| "grad_norm": 0.0798026979694231, | |
| "learning_rate": 3.674067751149796e-05, | |
| "loss": 1.1702, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.7486346076994805, | |
| "grad_norm": 0.08300115098412487, | |
| "learning_rate": 3.602271776972188e-05, | |
| "loss": 1.1533, | |
| "step": 1405 | |
| }, | |
| { | |
| "epoch": 0.7512987877980551, | |
| "grad_norm": 0.08575297897696614, | |
| "learning_rate": 3.5310298126909816e-05, | |
| "loss": 1.2051, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.7539629678966299, | |
| "grad_norm": 0.07861668329891834, | |
| "learning_rate": 3.46034802748872e-05, | |
| "loss": 1.1804, | |
| "step": 1415 | |
| }, | |
| { | |
| "epoch": 0.7566271479952045, | |
| "grad_norm": 0.07590825262005231, | |
| "learning_rate": 3.390232542039352e-05, | |
| "loss": 1.1846, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.7592913280937792, | |
| "grad_norm": 0.07760055527146281, | |
| "learning_rate": 3.320689427978232e-05, | |
| "loss": 1.174, | |
| "step": 1425 | |
| }, | |
| { | |
| "epoch": 0.7619555081923538, | |
| "grad_norm": 0.07989565355982597, | |
| "learning_rate": 3.251724707376324e-05, | |
| "loss": 1.1696, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.7646196882909284, | |
| "grad_norm": 0.07764798745610466, | |
| "learning_rate": 3.1833443522187454e-05, | |
| "loss": 1.1761, | |
| "step": 1435 | |
| }, | |
| { | |
| "epoch": 0.7672838683895031, | |
| "grad_norm": 0.07976913879065081, | |
| "learning_rate": 3.115554283887614e-05, | |
| "loss": 1.1909, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.7699480484880777, | |
| "grad_norm": 0.08144781158937257, | |
| "learning_rate": 3.0483603726492836e-05, | |
| "loss": 1.1718, | |
| "step": 1445 | |
| }, | |
| { | |
| "epoch": 0.7726122285866525, | |
| "grad_norm": 0.07748040591215276, | |
| "learning_rate": 2.9817684371460153e-05, | |
| "loss": 1.1867, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.7752764086852271, | |
| "grad_norm": 0.07690798090395808, | |
| "learning_rate": 2.9157842438921047e-05, | |
| "loss": 1.201, | |
| "step": 1455 | |
| }, | |
| { | |
| "epoch": 0.7779405887838018, | |
| "grad_norm": 0.07507016568699426, | |
| "learning_rate": 2.8504135067745464e-05, | |
| "loss": 1.1881, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.7806047688823764, | |
| "grad_norm": 0.07900678246787794, | |
| "learning_rate": 2.7856618865582318e-05, | |
| "loss": 1.1734, | |
| "step": 1465 | |
| }, | |
| { | |
| "epoch": 0.7832689489809511, | |
| "grad_norm": 0.08067040264461905, | |
| "learning_rate": 2.721534990395752e-05, | |
| "loss": 1.2003, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.7859331290795257, | |
| "grad_norm": 0.08005195988665038, | |
| "learning_rate": 2.658038371341859e-05, | |
| "loss": 1.1898, | |
| "step": 1475 | |
| }, | |
| { | |
| "epoch": 0.7885973091781004, | |
| "grad_norm": 0.07899201399633156, | |
| "learning_rate": 2.5951775278725955e-05, | |
| "loss": 1.1934, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.7912614892766751, | |
| "grad_norm": 0.07642642805468737, | |
| "learning_rate": 2.5329579034091455e-05, | |
| "loss": 1.2073, | |
| "step": 1485 | |
| }, | |
| { | |
| "epoch": 0.7939256693752498, | |
| "grad_norm": 0.08151590645902157, | |
| "learning_rate": 2.4713848858464817e-05, | |
| "loss": 1.181, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.7965898494738244, | |
| "grad_norm": 0.07944386459666944, | |
| "learning_rate": 2.410463807086786e-05, | |
| "loss": 1.1955, | |
| "step": 1495 | |
| }, | |
| { | |
| "epoch": 0.7992540295723991, | |
| "grad_norm": 0.07711356209897446, | |
| "learning_rate": 2.3501999425777432e-05, | |
| "loss": 1.1891, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.8019182096709737, | |
| "grad_norm": 0.08004988252971652, | |
| "learning_rate": 2.2905985108557114e-05, | |
| "loss": 1.1851, | |
| "step": 1505 | |
| }, | |
| { | |
| "epoch": 0.8045823897695484, | |
| "grad_norm": 0.08534573467737261, | |
| "learning_rate": 2.2316646730938196e-05, | |
| "loss": 1.1721, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.807246569868123, | |
| "grad_norm": 0.07920097969121198, | |
| "learning_rate": 2.173403532655046e-05, | |
| "loss": 1.1694, | |
| "step": 1515 | |
| }, | |
| { | |
| "epoch": 0.8099107499666978, | |
| "grad_norm": 0.07511500294571079, | |
| "learning_rate": 2.1158201346502926e-05, | |
| "loss": 1.1746, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.8125749300652724, | |
| "grad_norm": 0.07737975818247868, | |
| "learning_rate": 2.0589194655014898e-05, | |
| "loss": 1.185, | |
| "step": 1525 | |
| }, | |
| { | |
| "epoch": 0.8152391101638471, | |
| "grad_norm": 0.08190267832244168, | |
| "learning_rate": 2.0027064525098236e-05, | |
| "loss": 1.195, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.8179032902624217, | |
| "grad_norm": 0.08131306910775678, | |
| "learning_rate": 1.9471859634290336e-05, | |
| "loss": 1.1742, | |
| "step": 1535 | |
| }, | |
| { | |
| "epoch": 0.8205674703609964, | |
| "grad_norm": 0.0858733149118693, | |
| "learning_rate": 1.8923628060439036e-05, | |
| "loss": 1.1898, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.823231650459571, | |
| "grad_norm": 0.07675405619600406, | |
| "learning_rate": 1.838241727753931e-05, | |
| "loss": 1.1881, | |
| "step": 1545 | |
| }, | |
| { | |
| "epoch": 0.8258958305581457, | |
| "grad_norm": 0.07682847096326867, | |
| "learning_rate": 1.7848274151622234e-05, | |
| "loss": 1.1805, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.8285600106567204, | |
| "grad_norm": 0.08364774954218579, | |
| "learning_rate": 1.732124493669671e-05, | |
| "loss": 1.192, | |
| "step": 1555 | |
| }, | |
| { | |
| "epoch": 0.8312241907552951, | |
| "grad_norm": 0.07984768371406777, | |
| "learning_rate": 1.6801375270743924e-05, | |
| "loss": 1.1858, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.8338883708538697, | |
| "grad_norm": 0.08106959134364375, | |
| "learning_rate": 1.6288710171765576e-05, | |
| "loss": 1.1813, | |
| "step": 1565 | |
| }, | |
| { | |
| "epoch": 0.8365525509524444, | |
| "grad_norm": 0.07836491305718539, | |
| "learning_rate": 1.578329403388541e-05, | |
| "loss": 1.1881, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.839216731051019, | |
| "grad_norm": 0.0758115839482243, | |
| "learning_rate": 1.528517062350492e-05, | |
| "loss": 1.1889, | |
| "step": 1575 | |
| }, | |
| { | |
| "epoch": 0.8418809111495937, | |
| "grad_norm": 0.07588248787185455, | |
| "learning_rate": 1.4794383075513452e-05, | |
| "loss": 1.1768, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.8445450912481683, | |
| "grad_norm": 0.08067020299948538, | |
| "learning_rate": 1.431097388955297e-05, | |
| "loss": 1.2063, | |
| "step": 1585 | |
| }, | |
| { | |
| "epoch": 0.8472092713467431, | |
| "grad_norm": 0.07642268182718946, | |
| "learning_rate": 1.3834984926337657e-05, | |
| "loss": 1.1589, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.8498734514453177, | |
| "grad_norm": 0.07651179067647247, | |
| "learning_rate": 1.3366457404029275e-05, | |
| "loss": 1.2185, | |
| "step": 1595 | |
| }, | |
| { | |
| "epoch": 0.8525376315438924, | |
| "grad_norm": 0.07900907023829833, | |
| "learning_rate": 1.2905431894667553e-05, | |
| "loss": 1.163, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.855201811642467, | |
| "grad_norm": 0.07852531932632778, | |
| "learning_rate": 1.2451948320657114e-05, | |
| "loss": 1.1827, | |
| "step": 1605 | |
| }, | |
| { | |
| "epoch": 0.8578659917410417, | |
| "grad_norm": 0.08065290367816753, | |
| "learning_rate": 1.200604595131033e-05, | |
| "loss": 1.1723, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.8605301718396163, | |
| "grad_norm": 0.0779423491553262, | |
| "learning_rate": 1.1567763399446718e-05, | |
| "loss": 1.1636, | |
| "step": 1615 | |
| }, | |
| { | |
| "epoch": 0.863194351938191, | |
| "grad_norm": 0.07899604520177515, | |
| "learning_rate": 1.1137138618049404e-05, | |
| "loss": 1.2024, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.8658585320367657, | |
| "grad_norm": 0.08253045338915561, | |
| "learning_rate": 1.0714208896978484e-05, | |
| "loss": 1.1735, | |
| "step": 1625 | |
| }, | |
| { | |
| "epoch": 0.8685227121353404, | |
| "grad_norm": 0.07870321328036661, | |
| "learning_rate": 1.0299010859742009e-05, | |
| "loss": 1.1731, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.871186892233915, | |
| "grad_norm": 0.08272725502586431, | |
| "learning_rate": 9.891580460324523e-06, | |
| "loss": 1.1929, | |
| "step": 1635 | |
| }, | |
| { | |
| "epoch": 0.8738510723324897, | |
| "grad_norm": 0.07935943504829367, | |
| "learning_rate": 9.491952980073604e-06, | |
| "loss": 1.1709, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.8765152524310643, | |
| "grad_norm": 0.07895683534188976, | |
| "learning_rate": 9.100163024644815e-06, | |
| "loss": 1.1712, | |
| "step": 1645 | |
| }, | |
| { | |
| "epoch": 0.879179432529639, | |
| "grad_norm": 0.08082741880893497, | |
| "learning_rate": 8.716244521004846e-06, | |
| "loss": 1.1698, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.8818436126282136, | |
| "grad_norm": 0.07991694698538618, | |
| "learning_rate": 8.34023071449378e-06, | |
| "loss": 1.1789, | |
| "step": 1655 | |
| }, | |
| { | |
| "epoch": 0.8845077927267884, | |
| "grad_norm": 0.08214765689148273, | |
| "learning_rate": 7.972154165946155e-06, | |
| "loss": 1.1845, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.887171972825363, | |
| "grad_norm": 0.08176590536508709, | |
| "learning_rate": 7.612046748871327e-06, | |
| "loss": 1.1771, | |
| "step": 1665 | |
| }, | |
| { | |
| "epoch": 0.8898361529239377, | |
| "grad_norm": 0.0808798899357427, | |
| "learning_rate": 7.25993964669347e-06, | |
| "loss": 1.2, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.8925003330225123, | |
| "grad_norm": 0.07776019044347303, | |
| "learning_rate": 6.915863350051199e-06, | |
| "loss": 1.204, | |
| "step": 1675 | |
| }, | |
| { | |
| "epoch": 0.895164513121087, | |
| "grad_norm": 0.07706209163365262, | |
| "learning_rate": 6.579847654157234e-06, | |
| "loss": 1.1972, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.8978286932196616, | |
| "grad_norm": 0.07653638119319102, | |
| "learning_rate": 6.2519216562183516e-06, | |
| "loss": 1.1623, | |
| "step": 1685 | |
| }, | |
| { | |
| "epoch": 0.9004928733182364, | |
| "grad_norm": 0.07709508406908554, | |
| "learning_rate": 5.932113752915658e-06, | |
| "loss": 1.165, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.903157053416811, | |
| "grad_norm": 0.07922295657062284, | |
| "learning_rate": 5.620451637945567e-06, | |
| "loss": 1.19, | |
| "step": 1695 | |
| }, | |
| { | |
| "epoch": 0.9058212335153857, | |
| "grad_norm": 0.07654014983869019, | |
| "learning_rate": 5.316962299621808e-06, | |
| "loss": 1.1708, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.9084854136139603, | |
| "grad_norm": 0.07678567172116282, | |
| "learning_rate": 5.0216720185381595e-06, | |
| "loss": 1.1873, | |
| "step": 1705 | |
| }, | |
| { | |
| "epoch": 0.911149593712535, | |
| "grad_norm": 0.07752534581690265, | |
| "learning_rate": 4.734606365292871e-06, | |
| "loss": 1.175, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.9138137738111096, | |
| "grad_norm": 0.07543079231709852, | |
| "learning_rate": 4.4557901982743345e-06, | |
| "loss": 1.1718, | |
| "step": 1715 | |
| }, | |
| { | |
| "epoch": 0.9164779539096843, | |
| "grad_norm": 0.0847130046966339, | |
| "learning_rate": 4.185247661508396e-06, | |
| "loss": 1.1853, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.919142134008259, | |
| "grad_norm": 0.07672126462506362, | |
| "learning_rate": 3.923002182567737e-06, | |
| "loss": 1.1528, | |
| "step": 1725 | |
| }, | |
| { | |
| "epoch": 0.9218063141068337, | |
| "grad_norm": 0.08103275321155728, | |
| "learning_rate": 3.6690764705430537e-06, | |
| "loss": 1.1925, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.9244704942054083, | |
| "grad_norm": 0.07625095781555866, | |
| "learning_rate": 3.423492514076654e-06, | |
| "loss": 1.1466, | |
| "step": 1735 | |
| }, | |
| { | |
| "epoch": 0.927134674303983, | |
| "grad_norm": 0.07795348864062035, | |
| "learning_rate": 3.186271579458333e-06, | |
| "loss": 1.1804, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.9297988544025576, | |
| "grad_norm": 0.0798431550705619, | |
| "learning_rate": 2.9574342087837382e-06, | |
| "loss": 1.1948, | |
| "step": 1745 | |
| }, | |
| { | |
| "epoch": 0.9324630345011323, | |
| "grad_norm": 0.07723730498120181, | |
| "learning_rate": 2.7370002181757114e-06, | |
| "loss": 1.194, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.9351272145997069, | |
| "grad_norm": 0.076024197686587, | |
| "learning_rate": 2.52498869606812e-06, | |
| "loss": 1.1553, | |
| "step": 1755 | |
| }, | |
| { | |
| "epoch": 0.9377913946982817, | |
| "grad_norm": 0.07525767304214721, | |
| "learning_rate": 2.3214180015530218e-06, | |
| "loss": 1.1717, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.9404555747968563, | |
| "grad_norm": 0.07866979015778834, | |
| "learning_rate": 2.1263057627908478e-06, | |
| "loss": 1.1877, | |
| "step": 1765 | |
| }, | |
| { | |
| "epoch": 0.943119754895431, | |
| "grad_norm": 0.07918446703667696, | |
| "learning_rate": 1.9396688754838355e-06, | |
| "loss": 1.1825, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.9457839349940056, | |
| "grad_norm": 0.07807410852748545, | |
| "learning_rate": 1.7615235014130205e-06, | |
| "loss": 1.1597, | |
| "step": 1775 | |
| }, | |
| { | |
| "epoch": 0.9484481150925803, | |
| "grad_norm": 0.07773597995281895, | |
| "learning_rate": 1.5918850670386676e-06, | |
| "loss": 1.1736, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.9511122951911549, | |
| "grad_norm": 0.08101968421115548, | |
| "learning_rate": 1.4307682621644392e-06, | |
| "loss": 1.1726, | |
| "step": 1785 | |
| }, | |
| { | |
| "epoch": 0.9537764752897295, | |
| "grad_norm": 0.07640809417651317, | |
| "learning_rate": 1.2781870386653017e-06, | |
| "loss": 1.176, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.9564406553883043, | |
| "grad_norm": 0.07624826302993815, | |
| "learning_rate": 1.1341546092794475e-06, | |
| "loss": 1.1712, | |
| "step": 1795 | |
| }, | |
| { | |
| "epoch": 0.9591048354868789, | |
| "grad_norm": 0.07644512408140866, | |
| "learning_rate": 9.986834464640328e-07, | |
| "loss": 1.1804, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.9617690155854536, | |
| "grad_norm": 0.078535810502388, | |
| "learning_rate": 8.717852813152073e-07, | |
| "loss": 1.1634, | |
| "step": 1805 | |
| }, | |
| { | |
| "epoch": 0.9644331956840282, | |
| "grad_norm": 0.07806596561964084, | |
| "learning_rate": 7.534711025522167e-07, | |
| "loss": 1.1685, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 0.9670973757826029, | |
| "grad_norm": 0.07845914770402948, | |
| "learning_rate": 6.437511555658748e-07, | |
| "loss": 1.1704, | |
| "step": 1815 | |
| }, | |
| { | |
| "epoch": 0.9697615558811775, | |
| "grad_norm": 0.07631630513554058, | |
| "learning_rate": 5.426349415313503e-07, | |
| "loss": 1.167, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.9724257359797522, | |
| "grad_norm": 0.07644461634231602, | |
| "learning_rate": 4.5013121658538107e-07, | |
| "loss": 1.1952, | |
| "step": 1825 | |
| }, | |
| { | |
| "epoch": 0.9750899160783268, | |
| "grad_norm": 0.07797081390243328, | |
| "learning_rate": 3.662479910681027e-07, | |
| "loss": 1.1587, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.9777540961769016, | |
| "grad_norm": 0.07746705967903876, | |
| "learning_rate": 2.909925288293369e-07, | |
| "loss": 1.1729, | |
| "step": 1835 | |
| }, | |
| { | |
| "epoch": 0.9804182762754762, | |
| "grad_norm": 0.08118653860364235, | |
| "learning_rate": 2.2437134659962778e-07, | |
| "loss": 1.1637, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.9830824563740509, | |
| "grad_norm": 0.07709302387499754, | |
| "learning_rate": 1.6639021342588213e-07, | |
| "loss": 1.1714, | |
| "step": 1845 | |
| }, | |
| { | |
| "epoch": 0.9857466364726255, | |
| "grad_norm": 0.07514251112734556, | |
| "learning_rate": 1.1705415017183585e-07, | |
| "loss": 1.1826, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.9884108165712002, | |
| "grad_norm": 0.07785113536208763, | |
| "learning_rate": 7.636742908324613e-08, | |
| "loss": 1.1894, | |
| "step": 1855 | |
| }, | |
| { | |
| "epoch": 0.9910749966697748, | |
| "grad_norm": 0.08355649548742282, | |
| "learning_rate": 4.4333573417953967e-08, | |
| "loss": 1.1833, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.9937391767683496, | |
| "grad_norm": 0.07679966879567424, | |
| "learning_rate": 2.0955357140783893e-08, | |
| "loss": 1.1922, | |
| "step": 1865 | |
| }, | |
| { | |
| "epoch": 0.9964033568669242, | |
| "grad_norm": 0.07806796015090577, | |
| "learning_rate": 6.234804683336038e-09, | |
| "loss": 1.1612, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 0.9990675369654989, | |
| "grad_norm": 0.07653772348938681, | |
| "learning_rate": 1.7319076868194117e-10, | |
| "loss": 1.1679, | |
| "step": 1875 | |
| }, | |
| { | |
| "epoch": 0.9996003729852138, | |
| "eval_loss": 1.1644140481948853, | |
| "eval_runtime": 1556.6302, | |
| "eval_samples_per_second": 8.594, | |
| "eval_steps_per_second": 0.538, | |
| "step": 1876 | |
| }, | |
| { | |
| "epoch": 0.9996003729852138, | |
| "step": 1876, | |
| "total_flos": 2.3594858912415744e+16, | |
| "train_loss": 1.2015958401694227, | |
| "train_runtime": 40566.6998, | |
| "train_samples_per_second": 2.961, | |
| "train_steps_per_second": 0.046 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 1876, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 25, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.3594858912415744e+16, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |