| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 500, | |
| "global_step": 62447, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0016013579515429084, | |
| "grad_norm": 19.96757698059082, | |
| "learning_rate": 4.003442960946414e-08, | |
| "loss": 6.6824, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.0032027159030858167, | |
| "grad_norm": 20.086483001708984, | |
| "learning_rate": 8.006885921892828e-08, | |
| "loss": 6.6684, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.004804073854628725, | |
| "grad_norm": 15.201075553894043, | |
| "learning_rate": 1.2010328882839244e-07, | |
| "loss": 6.5818, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.006405431806171633, | |
| "grad_norm": 12.548062324523926, | |
| "learning_rate": 1.6013771843785657e-07, | |
| "loss": 6.409, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.008006789757714542, | |
| "grad_norm": 10.694886207580566, | |
| "learning_rate": 2.0017214804732072e-07, | |
| "loss": 6.1352, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.00960814770925745, | |
| "grad_norm": 8.358333587646484, | |
| "learning_rate": 2.402065776567849e-07, | |
| "loss": 5.7786, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.011209505660800359, | |
| "grad_norm": 6.130667686462402, | |
| "learning_rate": 2.80241007266249e-07, | |
| "loss": 5.3755, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.012810863612343267, | |
| "grad_norm": 3.611398935317993, | |
| "learning_rate": 3.2027543687571313e-07, | |
| "loss": 5.1389, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.014412221563886175, | |
| "grad_norm": 2.8347551822662354, | |
| "learning_rate": 3.603098664851773e-07, | |
| "loss": 4.9266, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.016013579515429085, | |
| "grad_norm": 21.159616470336914, | |
| "learning_rate": 4.0034429609464144e-07, | |
| "loss": 4.8398, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.017614937466971993, | |
| "grad_norm": 6.242217540740967, | |
| "learning_rate": 4.4037872570410557e-07, | |
| "loss": 4.7241, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.0192162954185149, | |
| "grad_norm": 3.52362322807312, | |
| "learning_rate": 4.804131553135698e-07, | |
| "loss": 4.6735, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.02081765337005781, | |
| "grad_norm": 2.831575393676758, | |
| "learning_rate": 5.204475849230339e-07, | |
| "loss": 4.5398, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.022419011321600717, | |
| "grad_norm": 11.5364351272583, | |
| "learning_rate": 5.60482014532498e-07, | |
| "loss": 4.5072, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.024020369273143626, | |
| "grad_norm": 3.1574575901031494, | |
| "learning_rate": 6.005164441419621e-07, | |
| "loss": 4.4694, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.025621727224686534, | |
| "grad_norm": 2.988778829574585, | |
| "learning_rate": 6.405508737514263e-07, | |
| "loss": 4.451, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.027223085176229442, | |
| "grad_norm": 2.388536214828491, | |
| "learning_rate": 6.805853033608904e-07, | |
| "loss": 4.3784, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.02882444312777235, | |
| "grad_norm": 8.523998260498047, | |
| "learning_rate": 7.206197329703546e-07, | |
| "loss": 4.3341, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.030425801079315258, | |
| "grad_norm": 5.110918045043945, | |
| "learning_rate": 7.606541625798188e-07, | |
| "loss": 4.2974, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.03202715903085817, | |
| "grad_norm": 2.1784770488739014, | |
| "learning_rate": 8.006885921892829e-07, | |
| "loss": 4.2403, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.033628516982401074, | |
| "grad_norm": 2.2727229595184326, | |
| "learning_rate": 8.407230217987469e-07, | |
| "loss": 4.1771, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.035229874933943986, | |
| "grad_norm": 2.410856008529663, | |
| "learning_rate": 8.807574514082111e-07, | |
| "loss": 4.1723, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.03683123288548689, | |
| "grad_norm": 7.233943462371826, | |
| "learning_rate": 9.207918810176753e-07, | |
| "loss": 4.1031, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.0384325908370298, | |
| "grad_norm": 9.451576232910156, | |
| "learning_rate": 9.608263106271395e-07, | |
| "loss": 4.0296, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.04003394878857271, | |
| "grad_norm": 5.198200225830078, | |
| "learning_rate": 1.0008607402366035e-06, | |
| "loss": 3.9371, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.04163530674011562, | |
| "grad_norm": 11.912164688110352, | |
| "learning_rate": 1.0408951698460678e-06, | |
| "loss": 3.8349, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.04323666469165852, | |
| "grad_norm": 6.008382320404053, | |
| "learning_rate": 1.0809295994555318e-06, | |
| "loss": 3.7505, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.044838022643201435, | |
| "grad_norm": 3.3153979778289795, | |
| "learning_rate": 1.120964029064996e-06, | |
| "loss": 3.6149, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.046439380594744346, | |
| "grad_norm": 8.011855125427246, | |
| "learning_rate": 1.16099845867446e-06, | |
| "loss": 3.5414, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.04804073854628725, | |
| "grad_norm": 3.550476312637329, | |
| "learning_rate": 1.2010328882839243e-06, | |
| "loss": 3.4248, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.04964209649783016, | |
| "grad_norm": 3.9144866466522217, | |
| "learning_rate": 1.2410673178933883e-06, | |
| "loss": 3.3224, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.05124345444937307, | |
| "grad_norm": 3.6054248809814453, | |
| "learning_rate": 1.2811017475028525e-06, | |
| "loss": 3.2983, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.05284481240091598, | |
| "grad_norm": 4.165266990661621, | |
| "learning_rate": 1.3211361771123166e-06, | |
| "loss": 3.1677, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.054446170352458884, | |
| "grad_norm": 4.654821872711182, | |
| "learning_rate": 1.3611706067217808e-06, | |
| "loss": 3.14, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.056047528304001795, | |
| "grad_norm": 3.641819715499878, | |
| "learning_rate": 1.4012050363312448e-06, | |
| "loss": 3.0439, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.0576488862555447, | |
| "grad_norm": 3.61091947555542, | |
| "learning_rate": 1.4412394659407093e-06, | |
| "loss": 2.9522, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.05925024420708761, | |
| "grad_norm": 22.04112434387207, | |
| "learning_rate": 1.4812738955501733e-06, | |
| "loss": 2.9255, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.060851602158630516, | |
| "grad_norm": 5.0808892250061035, | |
| "learning_rate": 1.5213083251596375e-06, | |
| "loss": 2.8402, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.06245296011017343, | |
| "grad_norm": 9.055444717407227, | |
| "learning_rate": 1.5613427547691015e-06, | |
| "loss": 2.8354, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.06405431806171634, | |
| "grad_norm": 3.44482684135437, | |
| "learning_rate": 1.6013771843785658e-06, | |
| "loss": 2.7592, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.06565567601325924, | |
| "grad_norm": 2.7728819847106934, | |
| "learning_rate": 1.6414116139880298e-06, | |
| "loss": 2.7746, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.06725703396480215, | |
| "grad_norm": 1.9306970834732056, | |
| "learning_rate": 1.6814460435974938e-06, | |
| "loss": 2.7233, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.06885839191634506, | |
| "grad_norm": 1.8614246845245361, | |
| "learning_rate": 1.7214804732069583e-06, | |
| "loss": 2.7021, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 0.07045974986788797, | |
| "grad_norm": 3.224013566970825, | |
| "learning_rate": 1.7615149028164223e-06, | |
| "loss": 2.6586, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.07206110781943088, | |
| "grad_norm": 4.159784317016602, | |
| "learning_rate": 1.8015493324258865e-06, | |
| "loss": 2.6666, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.07366246577097378, | |
| "grad_norm": 2.2219038009643555, | |
| "learning_rate": 1.8415837620353505e-06, | |
| "loss": 2.6465, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 0.07526382372251669, | |
| "grad_norm": 14.757235527038574, | |
| "learning_rate": 1.8816181916448148e-06, | |
| "loss": 2.6125, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 0.0768651816740596, | |
| "grad_norm": 1.881609559059143, | |
| "learning_rate": 1.921652621254279e-06, | |
| "loss": 2.5652, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.07846653962560252, | |
| "grad_norm": 1.9000244140625, | |
| "learning_rate": 1.9616870508637432e-06, | |
| "loss": 2.5676, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 0.08006789757714541, | |
| "grad_norm": 3.4342846870422363, | |
| "learning_rate": 2.001721480473207e-06, | |
| "loss": 2.5934, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.08166925552868833, | |
| "grad_norm": 3.2394461631774902, | |
| "learning_rate": 2.0417559100826713e-06, | |
| "loss": 2.5371, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 0.08327061348023124, | |
| "grad_norm": 2.726757287979126, | |
| "learning_rate": 2.0817903396921355e-06, | |
| "loss": 2.5211, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 0.08487197143177415, | |
| "grad_norm": 1.8385337591171265, | |
| "learning_rate": 2.1218247693015993e-06, | |
| "loss": 2.5449, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 0.08647332938331705, | |
| "grad_norm": 1.7317003011703491, | |
| "learning_rate": 2.1618591989110636e-06, | |
| "loss": 2.5368, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 0.08807468733485996, | |
| "grad_norm": 1.8202093839645386, | |
| "learning_rate": 2.201893628520528e-06, | |
| "loss": 2.4703, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.08967604528640287, | |
| "grad_norm": 1.627389669418335, | |
| "learning_rate": 2.241928058129992e-06, | |
| "loss": 2.4741, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 0.09127740323794578, | |
| "grad_norm": 3.039496660232544, | |
| "learning_rate": 2.2819624877394563e-06, | |
| "loss": 2.4966, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 0.09287876118948869, | |
| "grad_norm": 5.223389148712158, | |
| "learning_rate": 2.32199691734892e-06, | |
| "loss": 2.4383, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 0.09448011914103159, | |
| "grad_norm": 1.7681688070297241, | |
| "learning_rate": 2.3620313469583843e-06, | |
| "loss": 2.4656, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 0.0960814770925745, | |
| "grad_norm": 4.00803804397583, | |
| "learning_rate": 2.4020657765678486e-06, | |
| "loss": 2.481, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.09768283504411741, | |
| "grad_norm": 14.015419960021973, | |
| "learning_rate": 2.4421002061773128e-06, | |
| "loss": 2.4758, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 0.09928419299566033, | |
| "grad_norm": 3.860048294067383, | |
| "learning_rate": 2.4821346357867766e-06, | |
| "loss": 2.4548, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 0.10088555094720322, | |
| "grad_norm": 1.4068512916564941, | |
| "learning_rate": 2.5221690653962413e-06, | |
| "loss": 2.4428, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 0.10248690889874613, | |
| "grad_norm": 3.721557855606079, | |
| "learning_rate": 2.562203495005705e-06, | |
| "loss": 2.3956, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 0.10408826685028905, | |
| "grad_norm": 2.806149482727051, | |
| "learning_rate": 2.6022379246151693e-06, | |
| "loss": 2.3903, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.10568962480183196, | |
| "grad_norm": 2.8240647315979004, | |
| "learning_rate": 2.642272354224633e-06, | |
| "loss": 2.395, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 0.10729098275337486, | |
| "grad_norm": 1.7092350721359253, | |
| "learning_rate": 2.6823067838340978e-06, | |
| "loss": 2.4076, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 0.10889234070491777, | |
| "grad_norm": 1.814175009727478, | |
| "learning_rate": 2.7223412134435616e-06, | |
| "loss": 2.4055, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 0.11049369865646068, | |
| "grad_norm": 2.08941650390625, | |
| "learning_rate": 2.762375643053026e-06, | |
| "loss": 2.4097, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 0.11209505660800359, | |
| "grad_norm": 2.0335028171539307, | |
| "learning_rate": 2.8024100726624896e-06, | |
| "loss": 2.3769, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.1136964145595465, | |
| "grad_norm": 14.262283325195312, | |
| "learning_rate": 2.8424445022719543e-06, | |
| "loss": 2.3706, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 0.1152977725110894, | |
| "grad_norm": 2.324890375137329, | |
| "learning_rate": 2.8824789318814185e-06, | |
| "loss": 2.3688, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 0.11689913046263231, | |
| "grad_norm": 2.6902220249176025, | |
| "learning_rate": 2.9225133614908823e-06, | |
| "loss": 2.3829, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 0.11850048841417522, | |
| "grad_norm": 3.410318613052368, | |
| "learning_rate": 2.9625477911003466e-06, | |
| "loss": 2.3687, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 0.12010184636571813, | |
| "grad_norm": 1.4391207695007324, | |
| "learning_rate": 3.0025822207098104e-06, | |
| "loss": 2.3909, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.12170320431726103, | |
| "grad_norm": 11.690342903137207, | |
| "learning_rate": 3.042616650319275e-06, | |
| "loss": 2.3387, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 0.12330456226880394, | |
| "grad_norm": 1.5653709173202515, | |
| "learning_rate": 3.082651079928739e-06, | |
| "loss": 2.3451, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 0.12490592022034686, | |
| "grad_norm": 3.124866247177124, | |
| "learning_rate": 3.122685509538203e-06, | |
| "loss": 2.322, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 0.12650727817188975, | |
| "grad_norm": 12.413910865783691, | |
| "learning_rate": 3.162719939147667e-06, | |
| "loss": 2.3182, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 0.12810863612343268, | |
| "grad_norm": 1.7550314664840698, | |
| "learning_rate": 3.2027543687571315e-06, | |
| "loss": 2.3099, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.12970999407497558, | |
| "grad_norm": 1.9001699686050415, | |
| "learning_rate": 3.2427887983665958e-06, | |
| "loss": 2.3299, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 0.13131135202651847, | |
| "grad_norm": 1.4118369817733765, | |
| "learning_rate": 3.2828232279760596e-06, | |
| "loss": 2.3003, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 0.1329127099780614, | |
| "grad_norm": 3.046459913253784, | |
| "learning_rate": 3.322857657585524e-06, | |
| "loss": 2.3053, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 0.1345140679296043, | |
| "grad_norm": 6.424179553985596, | |
| "learning_rate": 3.3628920871949876e-06, | |
| "loss": 2.2845, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 0.13611542588114722, | |
| "grad_norm": 3.9462482929229736, | |
| "learning_rate": 3.4029265168044523e-06, | |
| "loss": 2.2821, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.13771678383269012, | |
| "grad_norm": 2.464116096496582, | |
| "learning_rate": 3.4429609464139165e-06, | |
| "loss": 2.3144, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 0.13931814178423302, | |
| "grad_norm": 17.63976287841797, | |
| "learning_rate": 3.4829953760233803e-06, | |
| "loss": 2.2811, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 0.14091949973577594, | |
| "grad_norm": 3.135732650756836, | |
| "learning_rate": 3.5230298056328446e-06, | |
| "loss": 2.2953, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 0.14252085768731884, | |
| "grad_norm": 4.162137031555176, | |
| "learning_rate": 3.563064235242309e-06, | |
| "loss": 2.2692, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 0.14412221563886177, | |
| "grad_norm": 6.429003715515137, | |
| "learning_rate": 3.603098664851773e-06, | |
| "loss": 2.2819, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.14572357359040466, | |
| "grad_norm": 6.803035736083984, | |
| "learning_rate": 3.643133094461237e-06, | |
| "loss": 2.2672, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 0.14732493154194756, | |
| "grad_norm": 15.847606658935547, | |
| "learning_rate": 3.683167524070701e-06, | |
| "loss": 2.26, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 0.1489262894934905, | |
| "grad_norm": 3.1911871433258057, | |
| "learning_rate": 3.723201953680165e-06, | |
| "loss": 2.2355, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 0.15052764744503339, | |
| "grad_norm": 1.6060032844543457, | |
| "learning_rate": 3.7632363832896296e-06, | |
| "loss": 2.2608, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 0.15212900539657628, | |
| "grad_norm": 1.5236974954605103, | |
| "learning_rate": 3.8032708128990938e-06, | |
| "loss": 2.2507, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.1537303633481192, | |
| "grad_norm": 8.704015731811523, | |
| "learning_rate": 3.843305242508558e-06, | |
| "loss": 2.2457, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 0.1553317212996621, | |
| "grad_norm": 4.1284918785095215, | |
| "learning_rate": 3.883339672118022e-06, | |
| "loss": 2.2321, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 0.15693307925120503, | |
| "grad_norm": 8.519213676452637, | |
| "learning_rate": 3.9233741017274865e-06, | |
| "loss": 2.2356, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 0.15853443720274793, | |
| "grad_norm": 6.228696823120117, | |
| "learning_rate": 3.96340853133695e-06, | |
| "loss": 2.2243, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 0.16013579515429083, | |
| "grad_norm": 2.693775177001953, | |
| "learning_rate": 4.003442960946414e-06, | |
| "loss": 2.2288, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.16173715310583375, | |
| "grad_norm": 8.416048049926758, | |
| "learning_rate": 4.043477390555878e-06, | |
| "loss": 2.2311, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 0.16333851105737665, | |
| "grad_norm": 1.5264601707458496, | |
| "learning_rate": 4.083511820165343e-06, | |
| "loss": 2.2186, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 0.16493986900891958, | |
| "grad_norm": 1.7846661806106567, | |
| "learning_rate": 4.123546249774807e-06, | |
| "loss": 2.2132, | |
| "step": 10300 | |
| }, | |
| { | |
| "epoch": 0.16654122696046247, | |
| "grad_norm": 3.9117202758789062, | |
| "learning_rate": 4.163580679384271e-06, | |
| "loss": 2.228, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 0.16814258491200537, | |
| "grad_norm": 4.531779766082764, | |
| "learning_rate": 4.203615108993735e-06, | |
| "loss": 2.2066, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.1697439428635483, | |
| "grad_norm": 2.1657228469848633, | |
| "learning_rate": 4.243649538603199e-06, | |
| "loss": 2.1929, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 0.1713453008150912, | |
| "grad_norm": 2.9067344665527344, | |
| "learning_rate": 4.283683968212663e-06, | |
| "loss": 2.2093, | |
| "step": 10700 | |
| }, | |
| { | |
| "epoch": 0.1729466587666341, | |
| "grad_norm": 3.7661423683166504, | |
| "learning_rate": 4.323718397822127e-06, | |
| "loss": 2.1919, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 0.17454801671817702, | |
| "grad_norm": 2.9169373512268066, | |
| "learning_rate": 4.363752827431592e-06, | |
| "loss": 2.2099, | |
| "step": 10900 | |
| }, | |
| { | |
| "epoch": 0.17614937466971992, | |
| "grad_norm": 2.1810638904571533, | |
| "learning_rate": 4.403787257041056e-06, | |
| "loss": 2.1923, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.17775073262126284, | |
| "grad_norm": 8.174213409423828, | |
| "learning_rate": 4.443821686650519e-06, | |
| "loss": 2.1886, | |
| "step": 11100 | |
| }, | |
| { | |
| "epoch": 0.17935209057280574, | |
| "grad_norm": 2.431321382522583, | |
| "learning_rate": 4.483856116259984e-06, | |
| "loss": 2.1991, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 0.18095344852434864, | |
| "grad_norm": 3.7426862716674805, | |
| "learning_rate": 4.523890545869448e-06, | |
| "loss": 2.1763, | |
| "step": 11300 | |
| }, | |
| { | |
| "epoch": 0.18255480647589156, | |
| "grad_norm": 2.5155022144317627, | |
| "learning_rate": 4.5639249754789125e-06, | |
| "loss": 2.1906, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 0.18415616442743446, | |
| "grad_norm": 1.7059454917907715, | |
| "learning_rate": 4.603959405088376e-06, | |
| "loss": 2.1872, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 0.18575752237897739, | |
| "grad_norm": 5.253864765167236, | |
| "learning_rate": 4.64399383469784e-06, | |
| "loss": 2.1889, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 0.18735888033052028, | |
| "grad_norm": 1.5918197631835938, | |
| "learning_rate": 4.684028264307305e-06, | |
| "loss": 2.1746, | |
| "step": 11700 | |
| }, | |
| { | |
| "epoch": 0.18896023828206318, | |
| "grad_norm": 10.147111892700195, | |
| "learning_rate": 4.724062693916769e-06, | |
| "loss": 2.1712, | |
| "step": 11800 | |
| }, | |
| { | |
| "epoch": 0.1905615962336061, | |
| "grad_norm": 4.3356781005859375, | |
| "learning_rate": 4.764097123526233e-06, | |
| "loss": 2.1815, | |
| "step": 11900 | |
| }, | |
| { | |
| "epoch": 0.192162954185149, | |
| "grad_norm": 10.20026683807373, | |
| "learning_rate": 4.804131553135697e-06, | |
| "loss": 2.176, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.1937643121366919, | |
| "grad_norm": 1.9123090505599976, | |
| "learning_rate": 4.844165982745162e-06, | |
| "loss": 2.1807, | |
| "step": 12100 | |
| }, | |
| { | |
| "epoch": 0.19536567008823483, | |
| "grad_norm": 1.6245704889297485, | |
| "learning_rate": 4.8842004123546256e-06, | |
| "loss": 2.1637, | |
| "step": 12200 | |
| }, | |
| { | |
| "epoch": 0.19696702803977773, | |
| "grad_norm": 5.880768299102783, | |
| "learning_rate": 4.924234841964089e-06, | |
| "loss": 2.1735, | |
| "step": 12300 | |
| }, | |
| { | |
| "epoch": 0.19856838599132065, | |
| "grad_norm": 5.809731960296631, | |
| "learning_rate": 4.964269271573553e-06, | |
| "loss": 2.1523, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 0.20016974394286355, | |
| "grad_norm": 1.827416181564331, | |
| "learning_rate": 5.004303701183018e-06, | |
| "loss": 2.1485, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 0.20177110189440645, | |
| "grad_norm": 2.386488437652588, | |
| "learning_rate": 5.0443381307924825e-06, | |
| "loss": 2.1641, | |
| "step": 12600 | |
| }, | |
| { | |
| "epoch": 0.20337245984594937, | |
| "grad_norm": 5.080982208251953, | |
| "learning_rate": 5.0843725604019455e-06, | |
| "loss": 2.1706, | |
| "step": 12700 | |
| }, | |
| { | |
| "epoch": 0.20497381779749227, | |
| "grad_norm": 6.828605651855469, | |
| "learning_rate": 5.12440699001141e-06, | |
| "loss": 2.1736, | |
| "step": 12800 | |
| }, | |
| { | |
| "epoch": 0.2065751757490352, | |
| "grad_norm": 2.243302822113037, | |
| "learning_rate": 5.164441419620875e-06, | |
| "loss": 2.1238, | |
| "step": 12900 | |
| }, | |
| { | |
| "epoch": 0.2081765337005781, | |
| "grad_norm": 3.8954567909240723, | |
| "learning_rate": 5.204475849230339e-06, | |
| "loss": 2.1461, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 0.209777891652121, | |
| "grad_norm": 3.563438653945923, | |
| "learning_rate": 5.244510278839802e-06, | |
| "loss": 2.1492, | |
| "step": 13100 | |
| }, | |
| { | |
| "epoch": 0.21137924960366392, | |
| "grad_norm": 2.1851043701171875, | |
| "learning_rate": 5.284544708449266e-06, | |
| "loss": 2.1407, | |
| "step": 13200 | |
| }, | |
| { | |
| "epoch": 0.2129806075552068, | |
| "grad_norm": 4.8792524337768555, | |
| "learning_rate": 5.324579138058731e-06, | |
| "loss": 2.1403, | |
| "step": 13300 | |
| }, | |
| { | |
| "epoch": 0.2145819655067497, | |
| "grad_norm": 4.021134376525879, | |
| "learning_rate": 5.3646135676681955e-06, | |
| "loss": 2.1628, | |
| "step": 13400 | |
| }, | |
| { | |
| "epoch": 0.21618332345829264, | |
| "grad_norm": 3.8988146781921387, | |
| "learning_rate": 5.4046479972776585e-06, | |
| "loss": 2.1425, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 0.21778468140983553, | |
| "grad_norm": 6.337070941925049, | |
| "learning_rate": 5.444682426887123e-06, | |
| "loss": 2.1493, | |
| "step": 13600 | |
| }, | |
| { | |
| "epoch": 0.21938603936137846, | |
| "grad_norm": 2.077366828918457, | |
| "learning_rate": 5.484716856496588e-06, | |
| "loss": 2.1264, | |
| "step": 13700 | |
| }, | |
| { | |
| "epoch": 0.22098739731292136, | |
| "grad_norm": 1.3507400751113892, | |
| "learning_rate": 5.524751286106052e-06, | |
| "loss": 2.1306, | |
| "step": 13800 | |
| }, | |
| { | |
| "epoch": 0.22258875526446426, | |
| "grad_norm": 1.5656003952026367, | |
| "learning_rate": 5.564785715715516e-06, | |
| "loss": 2.135, | |
| "step": 13900 | |
| }, | |
| { | |
| "epoch": 0.22419011321600718, | |
| "grad_norm": 3.315119981765747, | |
| "learning_rate": 5.604820145324979e-06, | |
| "loss": 2.1449, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.22579147116755008, | |
| "grad_norm": 1.677067518234253, | |
| "learning_rate": 5.644854574934444e-06, | |
| "loss": 2.1126, | |
| "step": 14100 | |
| }, | |
| { | |
| "epoch": 0.227392829119093, | |
| "grad_norm": 1.3107109069824219, | |
| "learning_rate": 5.6848890045439086e-06, | |
| "loss": 2.1415, | |
| "step": 14200 | |
| }, | |
| { | |
| "epoch": 0.2289941870706359, | |
| "grad_norm": 1.887251853942871, | |
| "learning_rate": 5.724923434153372e-06, | |
| "loss": 2.1312, | |
| "step": 14300 | |
| }, | |
| { | |
| "epoch": 0.2305955450221788, | |
| "grad_norm": 4.706649303436279, | |
| "learning_rate": 5.764957863762837e-06, | |
| "loss": 2.1417, | |
| "step": 14400 | |
| }, | |
| { | |
| "epoch": 0.23219690297372172, | |
| "grad_norm": 4.202969074249268, | |
| "learning_rate": 5.8049922933723e-06, | |
| "loss": 2.1403, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 0.23379826092526462, | |
| "grad_norm": 2.2349281311035156, | |
| "learning_rate": 5.845026722981765e-06, | |
| "loss": 2.1164, | |
| "step": 14600 | |
| }, | |
| { | |
| "epoch": 0.23539961887680752, | |
| "grad_norm": 1.7390815019607544, | |
| "learning_rate": 5.885061152591229e-06, | |
| "loss": 2.1313, | |
| "step": 14700 | |
| }, | |
| { | |
| "epoch": 0.23700097682835045, | |
| "grad_norm": 1.9534856081008911, | |
| "learning_rate": 5.925095582200693e-06, | |
| "loss": 2.1252, | |
| "step": 14800 | |
| }, | |
| { | |
| "epoch": 0.23860233477989334, | |
| "grad_norm": 1.7701072692871094, | |
| "learning_rate": 5.965130011810158e-06, | |
| "loss": 2.1207, | |
| "step": 14900 | |
| }, | |
| { | |
| "epoch": 0.24020369273143627, | |
| "grad_norm": 6.166327953338623, | |
| "learning_rate": 6.005164441419621e-06, | |
| "loss": 2.1079, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.24180505068297917, | |
| "grad_norm": 2.4361186027526855, | |
| "learning_rate": 6.045198871029085e-06, | |
| "loss": 2.114, | |
| "step": 15100 | |
| }, | |
| { | |
| "epoch": 0.24340640863452206, | |
| "grad_norm": 2.536973714828491, | |
| "learning_rate": 6.08523330063855e-06, | |
| "loss": 2.109, | |
| "step": 15200 | |
| }, | |
| { | |
| "epoch": 0.245007766586065, | |
| "grad_norm": 3.394212484359741, | |
| "learning_rate": 6.125267730248014e-06, | |
| "loss": 2.1193, | |
| "step": 15300 | |
| }, | |
| { | |
| "epoch": 0.2466091245376079, | |
| "grad_norm": 1.725258708000183, | |
| "learning_rate": 6.165302159857478e-06, | |
| "loss": 2.1238, | |
| "step": 15400 | |
| }, | |
| { | |
| "epoch": 0.2482104824891508, | |
| "grad_norm": 2.9132273197174072, | |
| "learning_rate": 6.205336589466942e-06, | |
| "loss": 2.115, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 0.2498118404406937, | |
| "grad_norm": 1.6105629205703735, | |
| "learning_rate": 6.245371019076406e-06, | |
| "loss": 2.1103, | |
| "step": 15600 | |
| }, | |
| { | |
| "epoch": 0.25141319839223664, | |
| "grad_norm": 1.4759615659713745, | |
| "learning_rate": 6.285405448685871e-06, | |
| "loss": 2.1018, | |
| "step": 15700 | |
| }, | |
| { | |
| "epoch": 0.2530145563437795, | |
| "grad_norm": 6.175992488861084, | |
| "learning_rate": 6.325439878295334e-06, | |
| "loss": 2.1052, | |
| "step": 15800 | |
| }, | |
| { | |
| "epoch": 0.25461591429532243, | |
| "grad_norm": 1.496497631072998, | |
| "learning_rate": 6.3654743079047984e-06, | |
| "loss": 2.1098, | |
| "step": 15900 | |
| }, | |
| { | |
| "epoch": 0.25621727224686536, | |
| "grad_norm": 2.9353444576263428, | |
| "learning_rate": 6.405508737514263e-06, | |
| "loss": 2.1142, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.2578186301984082, | |
| "grad_norm": 3.003761053085327, | |
| "learning_rate": 6.445543167123727e-06, | |
| "loss": 2.1096, | |
| "step": 16100 | |
| }, | |
| { | |
| "epoch": 0.25941998814995115, | |
| "grad_norm": 1.8897191286087036, | |
| "learning_rate": 6.4855775967331916e-06, | |
| "loss": 2.0977, | |
| "step": 16200 | |
| }, | |
| { | |
| "epoch": 0.2610213461014941, | |
| "grad_norm": 1.1225190162658691, | |
| "learning_rate": 6.5256120263426545e-06, | |
| "loss": 2.1022, | |
| "step": 16300 | |
| }, | |
| { | |
| "epoch": 0.26262270405303695, | |
| "grad_norm": 5.252044200897217, | |
| "learning_rate": 6.565646455952119e-06, | |
| "loss": 2.1068, | |
| "step": 16400 | |
| }, | |
| { | |
| "epoch": 0.2642240620045799, | |
| "grad_norm": 1.9852492809295654, | |
| "learning_rate": 6.605680885561584e-06, | |
| "loss": 2.0882, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 0.2658254199561228, | |
| "grad_norm": 1.1616008281707764, | |
| "learning_rate": 6.645715315171048e-06, | |
| "loss": 2.0944, | |
| "step": 16600 | |
| }, | |
| { | |
| "epoch": 0.2674267779076657, | |
| "grad_norm": 2.1226704120635986, | |
| "learning_rate": 6.685749744780512e-06, | |
| "loss": 2.0927, | |
| "step": 16700 | |
| }, | |
| { | |
| "epoch": 0.2690281358592086, | |
| "grad_norm": 1.4191474914550781, | |
| "learning_rate": 6.725784174389975e-06, | |
| "loss": 2.0998, | |
| "step": 16800 | |
| }, | |
| { | |
| "epoch": 0.2706294938107515, | |
| "grad_norm": 2.283435106277466, | |
| "learning_rate": 6.76581860399944e-06, | |
| "loss": 2.1157, | |
| "step": 16900 | |
| }, | |
| { | |
| "epoch": 0.27223085176229445, | |
| "grad_norm": 1.6899996995925903, | |
| "learning_rate": 6.805853033608905e-06, | |
| "loss": 2.0937, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 0.2738322097138373, | |
| "grad_norm": 1.3105698823928833, | |
| "learning_rate": 6.845887463218368e-06, | |
| "loss": 2.0545, | |
| "step": 17100 | |
| }, | |
| { | |
| "epoch": 0.27543356766538024, | |
| "grad_norm": 1.1776176691055298, | |
| "learning_rate": 6.885921892827833e-06, | |
| "loss": 2.0984, | |
| "step": 17200 | |
| }, | |
| { | |
| "epoch": 0.27703492561692317, | |
| "grad_norm": 1.651307225227356, | |
| "learning_rate": 6.925956322437296e-06, | |
| "loss": 2.0959, | |
| "step": 17300 | |
| }, | |
| { | |
| "epoch": 0.27863628356846604, | |
| "grad_norm": 1.7482041120529175, | |
| "learning_rate": 6.965990752046761e-06, | |
| "loss": 2.0636, | |
| "step": 17400 | |
| }, | |
| { | |
| "epoch": 0.28023764152000896, | |
| "grad_norm": 3.625835418701172, | |
| "learning_rate": 7.006025181656225e-06, | |
| "loss": 2.085, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 0.2818389994715519, | |
| "grad_norm": 1.6532440185546875, | |
| "learning_rate": 7.046059611265689e-06, | |
| "loss": 2.0883, | |
| "step": 17600 | |
| }, | |
| { | |
| "epoch": 0.28344035742309476, | |
| "grad_norm": 1.331597924232483, | |
| "learning_rate": 7.086094040875153e-06, | |
| "loss": 2.1034, | |
| "step": 17700 | |
| }, | |
| { | |
| "epoch": 0.2850417153746377, | |
| "grad_norm": 3.6023612022399902, | |
| "learning_rate": 7.126128470484618e-06, | |
| "loss": 2.0991, | |
| "step": 17800 | |
| }, | |
| { | |
| "epoch": 0.2866430733261806, | |
| "grad_norm": 1.4167087078094482, | |
| "learning_rate": 7.166162900094081e-06, | |
| "loss": 2.1057, | |
| "step": 17900 | |
| }, | |
| { | |
| "epoch": 0.28824443127772353, | |
| "grad_norm": 6.183845520019531, | |
| "learning_rate": 7.206197329703546e-06, | |
| "loss": 2.0951, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 0.2898457892292664, | |
| "grad_norm": 1.5191693305969238, | |
| "learning_rate": 7.246231759313009e-06, | |
| "loss": 2.062, | |
| "step": 18100 | |
| }, | |
| { | |
| "epoch": 0.29144714718080933, | |
| "grad_norm": 1.5019919872283936, | |
| "learning_rate": 7.286266188922474e-06, | |
| "loss": 2.073, | |
| "step": 18200 | |
| }, | |
| { | |
| "epoch": 0.29304850513235225, | |
| "grad_norm": 2.338139533996582, | |
| "learning_rate": 7.326300618531938e-06, | |
| "loss": 2.0935, | |
| "step": 18300 | |
| }, | |
| { | |
| "epoch": 0.2946498630838951, | |
| "grad_norm": 3.6389622688293457, | |
| "learning_rate": 7.366335048141402e-06, | |
| "loss": 2.0907, | |
| "step": 18400 | |
| }, | |
| { | |
| "epoch": 0.29625122103543805, | |
| "grad_norm": 1.3060230016708374, | |
| "learning_rate": 7.406369477750867e-06, | |
| "loss": 2.0691, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 0.297852578986981, | |
| "grad_norm": 2.181640863418579, | |
| "learning_rate": 7.44640390736033e-06, | |
| "loss": 2.0668, | |
| "step": 18600 | |
| }, | |
| { | |
| "epoch": 0.29945393693852385, | |
| "grad_norm": 2.1645591259002686, | |
| "learning_rate": 7.4864383369697944e-06, | |
| "loss": 2.0589, | |
| "step": 18700 | |
| }, | |
| { | |
| "epoch": 0.30105529489006677, | |
| "grad_norm": 2.522383451461792, | |
| "learning_rate": 7.526472766579259e-06, | |
| "loss": 2.0624, | |
| "step": 18800 | |
| }, | |
| { | |
| "epoch": 0.3026566528416097, | |
| "grad_norm": 2.0438318252563477, | |
| "learning_rate": 7.566507196188723e-06, | |
| "loss": 2.0756, | |
| "step": 18900 | |
| }, | |
| { | |
| "epoch": 0.30425801079315257, | |
| "grad_norm": 1.5602883100509644, | |
| "learning_rate": 7.6065416257981876e-06, | |
| "loss": 2.0539, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 0.3058593687446955, | |
| "grad_norm": 1.2384752035140991, | |
| "learning_rate": 7.64657605540765e-06, | |
| "loss": 2.0698, | |
| "step": 19100 | |
| }, | |
| { | |
| "epoch": 0.3074607266962384, | |
| "grad_norm": 3.290865659713745, | |
| "learning_rate": 7.686610485017116e-06, | |
| "loss": 2.0538, | |
| "step": 19200 | |
| }, | |
| { | |
| "epoch": 0.30906208464778134, | |
| "grad_norm": 1.9636443853378296, | |
| "learning_rate": 7.72664491462658e-06, | |
| "loss": 2.0679, | |
| "step": 19300 | |
| }, | |
| { | |
| "epoch": 0.3106634425993242, | |
| "grad_norm": 2.1679654121398926, | |
| "learning_rate": 7.766679344236044e-06, | |
| "loss": 2.0734, | |
| "step": 19400 | |
| }, | |
| { | |
| "epoch": 0.31226480055086714, | |
| "grad_norm": 2.441173553466797, | |
| "learning_rate": 7.806713773845507e-06, | |
| "loss": 2.0475, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 0.31386615850241006, | |
| "grad_norm": 1.2764122486114502, | |
| "learning_rate": 7.846748203454973e-06, | |
| "loss": 2.0773, | |
| "step": 19600 | |
| }, | |
| { | |
| "epoch": 0.31546751645395293, | |
| "grad_norm": 1.106123685836792, | |
| "learning_rate": 7.886782633064435e-06, | |
| "loss": 2.0645, | |
| "step": 19700 | |
| }, | |
| { | |
| "epoch": 0.31706887440549586, | |
| "grad_norm": 1.025707721710205, | |
| "learning_rate": 7.9268170626739e-06, | |
| "loss": 2.0643, | |
| "step": 19800 | |
| }, | |
| { | |
| "epoch": 0.3186702323570388, | |
| "grad_norm": 1.2565511465072632, | |
| "learning_rate": 7.966851492283364e-06, | |
| "loss": 2.0666, | |
| "step": 19900 | |
| }, | |
| { | |
| "epoch": 0.32027159030858166, | |
| "grad_norm": 1.2378392219543457, | |
| "learning_rate": 8.006885921892828e-06, | |
| "loss": 2.0601, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 0.3218729482601246, | |
| "grad_norm": 1.9206656217575073, | |
| "learning_rate": 8.046920351502294e-06, | |
| "loss": 2.0576, | |
| "step": 20100 | |
| }, | |
| { | |
| "epoch": 0.3234743062116675, | |
| "grad_norm": 1.6953002214431763, | |
| "learning_rate": 8.086954781111756e-06, | |
| "loss": 2.0502, | |
| "step": 20200 | |
| }, | |
| { | |
| "epoch": 0.3250756641632104, | |
| "grad_norm": 1.6600971221923828, | |
| "learning_rate": 8.126989210721221e-06, | |
| "loss": 2.0589, | |
| "step": 20300 | |
| }, | |
| { | |
| "epoch": 0.3266770221147533, | |
| "grad_norm": 2.360778331756592, | |
| "learning_rate": 8.167023640330685e-06, | |
| "loss": 2.0591, | |
| "step": 20400 | |
| }, | |
| { | |
| "epoch": 0.3282783800662962, | |
| "grad_norm": 1.5475653409957886, | |
| "learning_rate": 8.207058069940149e-06, | |
| "loss": 2.0703, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 0.32987973801783915, | |
| "grad_norm": 1.2999683618545532, | |
| "learning_rate": 8.247092499549614e-06, | |
| "loss": 2.0651, | |
| "step": 20600 | |
| }, | |
| { | |
| "epoch": 0.331481095969382, | |
| "grad_norm": 3.301884889602661, | |
| "learning_rate": 8.287126929159077e-06, | |
| "loss": 2.0485, | |
| "step": 20700 | |
| }, | |
| { | |
| "epoch": 0.33308245392092495, | |
| "grad_norm": 3.200942277908325, | |
| "learning_rate": 8.327161358768542e-06, | |
| "loss": 2.0444, | |
| "step": 20800 | |
| }, | |
| { | |
| "epoch": 0.3346838118724679, | |
| "grad_norm": 1.2649630308151245, | |
| "learning_rate": 8.367195788378006e-06, | |
| "loss": 2.0526, | |
| "step": 20900 | |
| }, | |
| { | |
| "epoch": 0.33628516982401074, | |
| "grad_norm": 1.187700867652893, | |
| "learning_rate": 8.40723021798747e-06, | |
| "loss": 2.0651, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 0.33788652777555367, | |
| "grad_norm": 1.5766338109970093, | |
| "learning_rate": 8.447264647596935e-06, | |
| "loss": 2.0575, | |
| "step": 21100 | |
| }, | |
| { | |
| "epoch": 0.3394878857270966, | |
| "grad_norm": 1.1678153276443481, | |
| "learning_rate": 8.487299077206397e-06, | |
| "loss": 2.0394, | |
| "step": 21200 | |
| }, | |
| { | |
| "epoch": 0.34108924367863946, | |
| "grad_norm": 1.978745698928833, | |
| "learning_rate": 8.527333506815863e-06, | |
| "loss": 2.0434, | |
| "step": 21300 | |
| }, | |
| { | |
| "epoch": 0.3426906016301824, | |
| "grad_norm": 1.311265230178833, | |
| "learning_rate": 8.567367936425327e-06, | |
| "loss": 2.0423, | |
| "step": 21400 | |
| }, | |
| { | |
| "epoch": 0.3442919595817253, | |
| "grad_norm": 1.4099359512329102, | |
| "learning_rate": 8.60740236603479e-06, | |
| "loss": 2.0375, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 0.3458933175332682, | |
| "grad_norm": 1.2521507740020752, | |
| "learning_rate": 8.647436795644254e-06, | |
| "loss": 2.0355, | |
| "step": 21600 | |
| }, | |
| { | |
| "epoch": 0.3474946754848111, | |
| "grad_norm": 2.544433832168579, | |
| "learning_rate": 8.687471225253718e-06, | |
| "loss": 2.0351, | |
| "step": 21700 | |
| }, | |
| { | |
| "epoch": 0.34909603343635404, | |
| "grad_norm": 1.6786710023880005, | |
| "learning_rate": 8.727505654863184e-06, | |
| "loss": 2.0544, | |
| "step": 21800 | |
| }, | |
| { | |
| "epoch": 0.35069739138789696, | |
| "grad_norm": 1.224026083946228, | |
| "learning_rate": 8.767540084472647e-06, | |
| "loss": 2.0406, | |
| "step": 21900 | |
| }, | |
| { | |
| "epoch": 0.35229874933943983, | |
| "grad_norm": 7.5012431144714355, | |
| "learning_rate": 8.807574514082111e-06, | |
| "loss": 2.0355, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 0.35390010729098276, | |
| "grad_norm": 1.4292916059494019, | |
| "learning_rate": 8.847608943691575e-06, | |
| "loss": 2.0445, | |
| "step": 22100 | |
| }, | |
| { | |
| "epoch": 0.3555014652425257, | |
| "grad_norm": 1.1762036085128784, | |
| "learning_rate": 8.887643373301039e-06, | |
| "loss": 2.0358, | |
| "step": 22200 | |
| }, | |
| { | |
| "epoch": 0.35710282319406855, | |
| "grad_norm": 1.1497453451156616, | |
| "learning_rate": 8.927677802910504e-06, | |
| "loss": 2.0411, | |
| "step": 22300 | |
| }, | |
| { | |
| "epoch": 0.3587041811456115, | |
| "grad_norm": 1.7819931507110596, | |
| "learning_rate": 8.967712232519968e-06, | |
| "loss": 2.0414, | |
| "step": 22400 | |
| }, | |
| { | |
| "epoch": 0.3603055390971544, | |
| "grad_norm": 4.624775409698486, | |
| "learning_rate": 9.007746662129432e-06, | |
| "loss": 2.0309, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 0.3619068970486973, | |
| "grad_norm": 1.5174845457077026, | |
| "learning_rate": 9.047781091738896e-06, | |
| "loss": 2.0494, | |
| "step": 22600 | |
| }, | |
| { | |
| "epoch": 0.3635082550002402, | |
| "grad_norm": 2.5349197387695312, | |
| "learning_rate": 9.08781552134836e-06, | |
| "loss": 2.0199, | |
| "step": 22700 | |
| }, | |
| { | |
| "epoch": 0.3651096129517831, | |
| "grad_norm": 1.4281384944915771, | |
| "learning_rate": 9.127849950957825e-06, | |
| "loss": 2.0461, | |
| "step": 22800 | |
| }, | |
| { | |
| "epoch": 0.366710970903326, | |
| "grad_norm": 1.4501956701278687, | |
| "learning_rate": 9.167884380567289e-06, | |
| "loss": 2.0275, | |
| "step": 22900 | |
| }, | |
| { | |
| "epoch": 0.3683123288548689, | |
| "grad_norm": 1.7848312854766846, | |
| "learning_rate": 9.207918810176753e-06, | |
| "loss": 2.0459, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 0.36991368680641185, | |
| "grad_norm": 1.2266578674316406, | |
| "learning_rate": 9.247953239786217e-06, | |
| "loss": 2.0382, | |
| "step": 23100 | |
| }, | |
| { | |
| "epoch": 0.37151504475795477, | |
| "grad_norm": 2.917593002319336, | |
| "learning_rate": 9.28798766939568e-06, | |
| "loss": 2.0338, | |
| "step": 23200 | |
| }, | |
| { | |
| "epoch": 0.37311640270949764, | |
| "grad_norm": 1.7669585943222046, | |
| "learning_rate": 9.328022099005146e-06, | |
| "loss": 2.0098, | |
| "step": 23300 | |
| }, | |
| { | |
| "epoch": 0.37471776066104057, | |
| "grad_norm": 1.3076069355010986, | |
| "learning_rate": 9.36805652861461e-06, | |
| "loss": 2.0259, | |
| "step": 23400 | |
| }, | |
| { | |
| "epoch": 0.3763191186125835, | |
| "grad_norm": 1.26585054397583, | |
| "learning_rate": 9.408090958224073e-06, | |
| "loss": 2.0096, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 0.37792047656412636, | |
| "grad_norm": 1.330881953239441, | |
| "learning_rate": 9.448125387833537e-06, | |
| "loss": 2.0141, | |
| "step": 23600 | |
| }, | |
| { | |
| "epoch": 0.3795218345156693, | |
| "grad_norm": 1.3129397630691528, | |
| "learning_rate": 9.488159817443003e-06, | |
| "loss": 2.0351, | |
| "step": 23700 | |
| }, | |
| { | |
| "epoch": 0.3811231924672122, | |
| "grad_norm": 2.2104837894439697, | |
| "learning_rate": 9.528194247052467e-06, | |
| "loss": 2.0458, | |
| "step": 23800 | |
| }, | |
| { | |
| "epoch": 0.3827245504187551, | |
| "grad_norm": 4.37896728515625, | |
| "learning_rate": 9.56822867666193e-06, | |
| "loss": 2.0432, | |
| "step": 23900 | |
| }, | |
| { | |
| "epoch": 0.384325908370298, | |
| "grad_norm": 1.4323294162750244, | |
| "learning_rate": 9.608263106271394e-06, | |
| "loss": 2.028, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 0.38592726632184093, | |
| "grad_norm": 2.277630567550659, | |
| "learning_rate": 9.648297535880858e-06, | |
| "loss": 2.012, | |
| "step": 24100 | |
| }, | |
| { | |
| "epoch": 0.3875286242733838, | |
| "grad_norm": 1.0068135261535645, | |
| "learning_rate": 9.688331965490324e-06, | |
| "loss": 2.0153, | |
| "step": 24200 | |
| }, | |
| { | |
| "epoch": 0.38912998222492673, | |
| "grad_norm": 1.464872121810913, | |
| "learning_rate": 9.728366395099786e-06, | |
| "loss": 2.0255, | |
| "step": 24300 | |
| }, | |
| { | |
| "epoch": 0.39073134017646965, | |
| "grad_norm": 1.6919342279434204, | |
| "learning_rate": 9.768400824709251e-06, | |
| "loss": 2.0146, | |
| "step": 24400 | |
| }, | |
| { | |
| "epoch": 0.3923326981280126, | |
| "grad_norm": 1.4236170053482056, | |
| "learning_rate": 9.808435254318715e-06, | |
| "loss": 2.0235, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 0.39393405607955545, | |
| "grad_norm": 1.2634207010269165, | |
| "learning_rate": 9.848469683928179e-06, | |
| "loss": 2.0067, | |
| "step": 24600 | |
| }, | |
| { | |
| "epoch": 0.3955354140310984, | |
| "grad_norm": 1.185770034790039, | |
| "learning_rate": 9.888504113537644e-06, | |
| "loss": 2.0249, | |
| "step": 24700 | |
| }, | |
| { | |
| "epoch": 0.3971367719826413, | |
| "grad_norm": 1.6554452180862427, | |
| "learning_rate": 9.928538543147106e-06, | |
| "loss": 2.0224, | |
| "step": 24800 | |
| }, | |
| { | |
| "epoch": 0.39873812993418417, | |
| "grad_norm": 1.7017241716384888, | |
| "learning_rate": 9.968572972756572e-06, | |
| "loss": 2.0138, | |
| "step": 24900 | |
| }, | |
| { | |
| "epoch": 0.4003394878857271, | |
| "grad_norm": 1.0250684022903442, | |
| "learning_rate": 1.0008607402366036e-05, | |
| "loss": 2.0082, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 0.40194084583727, | |
| "grad_norm": 1.3391590118408203, | |
| "learning_rate": 1.0048641831975501e-05, | |
| "loss": 2.008, | |
| "step": 25100 | |
| }, | |
| { | |
| "epoch": 0.4035422037888129, | |
| "grad_norm": 1.0555273294448853, | |
| "learning_rate": 1.0088676261584965e-05, | |
| "loss": 2.0199, | |
| "step": 25200 | |
| }, | |
| { | |
| "epoch": 0.4051435617403558, | |
| "grad_norm": 2.1245908737182617, | |
| "learning_rate": 1.0128710691194427e-05, | |
| "loss": 2.0141, | |
| "step": 25300 | |
| }, | |
| { | |
| "epoch": 0.40674491969189874, | |
| "grad_norm": 1.1639268398284912, | |
| "learning_rate": 1.0168745120803891e-05, | |
| "loss": 2.0274, | |
| "step": 25400 | |
| }, | |
| { | |
| "epoch": 0.4083462776434416, | |
| "grad_norm": 1.75816011428833, | |
| "learning_rate": 1.0208779550413356e-05, | |
| "loss": 2.0065, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 0.40994763559498454, | |
| "grad_norm": 3.2224700450897217, | |
| "learning_rate": 1.024881398002282e-05, | |
| "loss": 2.0036, | |
| "step": 25600 | |
| }, | |
| { | |
| "epoch": 0.41154899354652746, | |
| "grad_norm": 1.0586453676223755, | |
| "learning_rate": 1.0288848409632284e-05, | |
| "loss": 2.0216, | |
| "step": 25700 | |
| }, | |
| { | |
| "epoch": 0.4131503514980704, | |
| "grad_norm": 1.5636674165725708, | |
| "learning_rate": 1.032888283924175e-05, | |
| "loss": 2.0035, | |
| "step": 25800 | |
| }, | |
| { | |
| "epoch": 0.41475170944961326, | |
| "grad_norm": 1.287876009941101, | |
| "learning_rate": 1.0368917268851213e-05, | |
| "loss": 2.0033, | |
| "step": 25900 | |
| }, | |
| { | |
| "epoch": 0.4163530674011562, | |
| "grad_norm": 1.1676390171051025, | |
| "learning_rate": 1.0408951698460677e-05, | |
| "loss": 1.9948, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 0.4179544253526991, | |
| "grad_norm": 2.230921506881714, | |
| "learning_rate": 1.0448986128070143e-05, | |
| "loss": 1.9747, | |
| "step": 26100 | |
| }, | |
| { | |
| "epoch": 0.419555783304242, | |
| "grad_norm": 1.1102570295333862, | |
| "learning_rate": 1.0489020557679605e-05, | |
| "loss": 2.002, | |
| "step": 26200 | |
| }, | |
| { | |
| "epoch": 0.4211571412557849, | |
| "grad_norm": 12.577959060668945, | |
| "learning_rate": 1.0529054987289069e-05, | |
| "loss": 1.9873, | |
| "step": 26300 | |
| }, | |
| { | |
| "epoch": 0.42275849920732783, | |
| "grad_norm": 1.0285041332244873, | |
| "learning_rate": 1.0569089416898532e-05, | |
| "loss": 2.0182, | |
| "step": 26400 | |
| }, | |
| { | |
| "epoch": 0.4243598571588707, | |
| "grad_norm": 2.1250357627868652, | |
| "learning_rate": 1.0609123846507998e-05, | |
| "loss": 1.9949, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 0.4259612151104136, | |
| "grad_norm": 0.90369713306427, | |
| "learning_rate": 1.0649158276117462e-05, | |
| "loss": 2.0081, | |
| "step": 26600 | |
| }, | |
| { | |
| "epoch": 0.42756257306195655, | |
| "grad_norm": 1.0429993867874146, | |
| "learning_rate": 1.0689192705726926e-05, | |
| "loss": 2.0188, | |
| "step": 26700 | |
| }, | |
| { | |
| "epoch": 0.4291639310134994, | |
| "grad_norm": 1.2060284614562988, | |
| "learning_rate": 1.0729227135336391e-05, | |
| "loss": 1.9747, | |
| "step": 26800 | |
| }, | |
| { | |
| "epoch": 0.43076528896504235, | |
| "grad_norm": 1.7947618961334229, | |
| "learning_rate": 1.0769261564945855e-05, | |
| "loss": 1.9963, | |
| "step": 26900 | |
| }, | |
| { | |
| "epoch": 0.4323666469165853, | |
| "grad_norm": 0.970507025718689, | |
| "learning_rate": 1.0809295994555317e-05, | |
| "loss": 2.0089, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 0.4339680048681282, | |
| "grad_norm": 1.038913607597351, | |
| "learning_rate": 1.0849330424164784e-05, | |
| "loss": 1.9827, | |
| "step": 27100 | |
| }, | |
| { | |
| "epoch": 0.43556936281967107, | |
| "grad_norm": 2.165769100189209, | |
| "learning_rate": 1.0889364853774246e-05, | |
| "loss": 1.9961, | |
| "step": 27200 | |
| }, | |
| { | |
| "epoch": 0.437170720771214, | |
| "grad_norm": 1.196454644203186, | |
| "learning_rate": 1.092939928338371e-05, | |
| "loss": 2.0035, | |
| "step": 27300 | |
| }, | |
| { | |
| "epoch": 0.4387720787227569, | |
| "grad_norm": 0.956650972366333, | |
| "learning_rate": 1.0969433712993176e-05, | |
| "loss": 1.9841, | |
| "step": 27400 | |
| }, | |
| { | |
| "epoch": 0.4403734366742998, | |
| "grad_norm": 1.084486961364746, | |
| "learning_rate": 1.100946814260264e-05, | |
| "loss": 1.9789, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 0.4419747946258427, | |
| "grad_norm": 0.9682411551475525, | |
| "learning_rate": 1.1049502572212103e-05, | |
| "loss": 1.9999, | |
| "step": 27600 | |
| }, | |
| { | |
| "epoch": 0.44357615257738564, | |
| "grad_norm": 2.1347734928131104, | |
| "learning_rate": 1.1089537001821567e-05, | |
| "loss": 2.0091, | |
| "step": 27700 | |
| }, | |
| { | |
| "epoch": 0.4451775105289285, | |
| "grad_norm": 4.513906478881836, | |
| "learning_rate": 1.1129571431431033e-05, | |
| "loss": 1.9896, | |
| "step": 27800 | |
| }, | |
| { | |
| "epoch": 0.44677886848047144, | |
| "grad_norm": 1.6367132663726807, | |
| "learning_rate": 1.1169605861040496e-05, | |
| "loss": 1.9781, | |
| "step": 27900 | |
| }, | |
| { | |
| "epoch": 0.44838022643201436, | |
| "grad_norm": 1.0168904066085815, | |
| "learning_rate": 1.1209640290649958e-05, | |
| "loss": 1.9924, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 0.44998158438355723, | |
| "grad_norm": 1.7051305770874023, | |
| "learning_rate": 1.1249674720259424e-05, | |
| "loss": 1.9872, | |
| "step": 28100 | |
| }, | |
| { | |
| "epoch": 0.45158294233510016, | |
| "grad_norm": 0.9768884778022766, | |
| "learning_rate": 1.1289709149868888e-05, | |
| "loss": 1.9809, | |
| "step": 28200 | |
| }, | |
| { | |
| "epoch": 0.4531843002866431, | |
| "grad_norm": 1.0439552068710327, | |
| "learning_rate": 1.1329743579478352e-05, | |
| "loss": 1.999, | |
| "step": 28300 | |
| }, | |
| { | |
| "epoch": 0.454785658238186, | |
| "grad_norm": 0.9658423066139221, | |
| "learning_rate": 1.1369778009087817e-05, | |
| "loss": 2.0104, | |
| "step": 28400 | |
| }, | |
| { | |
| "epoch": 0.4563870161897289, | |
| "grad_norm": 0.9558666944503784, | |
| "learning_rate": 1.1409812438697281e-05, | |
| "loss": 2.0104, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 0.4579883741412718, | |
| "grad_norm": 1.591242790222168, | |
| "learning_rate": 1.1449846868306745e-05, | |
| "loss": 1.9888, | |
| "step": 28600 | |
| }, | |
| { | |
| "epoch": 0.45958973209281473, | |
| "grad_norm": 1.8828788995742798, | |
| "learning_rate": 1.148988129791621e-05, | |
| "loss": 1.9951, | |
| "step": 28700 | |
| }, | |
| { | |
| "epoch": 0.4611910900443576, | |
| "grad_norm": 1.1350332498550415, | |
| "learning_rate": 1.1529915727525674e-05, | |
| "loss": 1.9842, | |
| "step": 28800 | |
| }, | |
| { | |
| "epoch": 0.4627924479959005, | |
| "grad_norm": 1.6506210565567017, | |
| "learning_rate": 1.1569950157135136e-05, | |
| "loss": 1.9927, | |
| "step": 28900 | |
| }, | |
| { | |
| "epoch": 0.46439380594744345, | |
| "grad_norm": 1.0234204530715942, | |
| "learning_rate": 1.16099845867446e-05, | |
| "loss": 1.9981, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 0.4659951638989863, | |
| "grad_norm": 0.9220559597015381, | |
| "learning_rate": 1.1650019016354065e-05, | |
| "loss": 1.9772, | |
| "step": 29100 | |
| }, | |
| { | |
| "epoch": 0.46759652185052925, | |
| "grad_norm": 1.008548617362976, | |
| "learning_rate": 1.169005344596353e-05, | |
| "loss": 1.9885, | |
| "step": 29200 | |
| }, | |
| { | |
| "epoch": 0.46919787980207217, | |
| "grad_norm": 1.0374430418014526, | |
| "learning_rate": 1.1730087875572993e-05, | |
| "loss": 1.9901, | |
| "step": 29300 | |
| }, | |
| { | |
| "epoch": 0.47079923775361504, | |
| "grad_norm": 1.4683129787445068, | |
| "learning_rate": 1.1770122305182459e-05, | |
| "loss": 1.9905, | |
| "step": 29400 | |
| }, | |
| { | |
| "epoch": 0.47240059570515797, | |
| "grad_norm": 2.1045260429382324, | |
| "learning_rate": 1.1810156734791922e-05, | |
| "loss": 1.9764, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 0.4740019536567009, | |
| "grad_norm": 0.9143902063369751, | |
| "learning_rate": 1.1850191164401386e-05, | |
| "loss": 1.9914, | |
| "step": 29600 | |
| }, | |
| { | |
| "epoch": 0.4756033116082438, | |
| "grad_norm": 1.0126798152923584, | |
| "learning_rate": 1.1890225594010852e-05, | |
| "loss": 1.9559, | |
| "step": 29700 | |
| }, | |
| { | |
| "epoch": 0.4772046695597867, | |
| "grad_norm": 1.282818078994751, | |
| "learning_rate": 1.1930260023620316e-05, | |
| "loss": 1.9927, | |
| "step": 29800 | |
| }, | |
| { | |
| "epoch": 0.4788060275113296, | |
| "grad_norm": 1.2307484149932861, | |
| "learning_rate": 1.1970294453229778e-05, | |
| "loss": 1.9825, | |
| "step": 29900 | |
| }, | |
| { | |
| "epoch": 0.48040738546287254, | |
| "grad_norm": 1.429739236831665, | |
| "learning_rate": 1.2010328882839241e-05, | |
| "loss": 1.9616, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 0.4820087434144154, | |
| "grad_norm": 1.5777498483657837, | |
| "learning_rate": 1.2050363312448707e-05, | |
| "loss": 1.9821, | |
| "step": 30100 | |
| }, | |
| { | |
| "epoch": 0.48361010136595833, | |
| "grad_norm": 1.1172056198120117, | |
| "learning_rate": 1.209039774205817e-05, | |
| "loss": 1.9669, | |
| "step": 30200 | |
| }, | |
| { | |
| "epoch": 0.48521145931750126, | |
| "grad_norm": 1.8118427991867065, | |
| "learning_rate": 1.2130432171667635e-05, | |
| "loss": 1.9555, | |
| "step": 30300 | |
| }, | |
| { | |
| "epoch": 0.48681281726904413, | |
| "grad_norm": 5.031758785247803, | |
| "learning_rate": 1.21704666012771e-05, | |
| "loss": 1.958, | |
| "step": 30400 | |
| }, | |
| { | |
| "epoch": 0.48841417522058705, | |
| "grad_norm": 1.171064853668213, | |
| "learning_rate": 1.2210501030886564e-05, | |
| "loss": 1.9697, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 0.49001553317213, | |
| "grad_norm": 1.6317328214645386, | |
| "learning_rate": 1.2250535460496028e-05, | |
| "loss": 1.9722, | |
| "step": 30600 | |
| }, | |
| { | |
| "epoch": 0.49161689112367285, | |
| "grad_norm": 0.9671623110771179, | |
| "learning_rate": 1.2290569890105493e-05, | |
| "loss": 1.9659, | |
| "step": 30700 | |
| }, | |
| { | |
| "epoch": 0.4932182490752158, | |
| "grad_norm": 1.0588128566741943, | |
| "learning_rate": 1.2330604319714955e-05, | |
| "loss": 1.9534, | |
| "step": 30800 | |
| }, | |
| { | |
| "epoch": 0.4948196070267587, | |
| "grad_norm": 1.1236603260040283, | |
| "learning_rate": 1.237063874932442e-05, | |
| "loss": 1.9505, | |
| "step": 30900 | |
| }, | |
| { | |
| "epoch": 0.4964209649783016, | |
| "grad_norm": 1.175752878189087, | |
| "learning_rate": 1.2410673178933885e-05, | |
| "loss": 1.9712, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 0.4980223229298445, | |
| "grad_norm": 1.0395989418029785, | |
| "learning_rate": 1.2450707608543348e-05, | |
| "loss": 1.9493, | |
| "step": 31100 | |
| }, | |
| { | |
| "epoch": 0.4996236808813874, | |
| "grad_norm": 0.9693764448165894, | |
| "learning_rate": 1.2490742038152812e-05, | |
| "loss": 1.9581, | |
| "step": 31200 | |
| }, | |
| { | |
| "epoch": 0.5012250388329303, | |
| "grad_norm": 1.100197434425354, | |
| "learning_rate": 1.2530776467762276e-05, | |
| "loss": 1.955, | |
| "step": 31300 | |
| }, | |
| { | |
| "epoch": 0.5028263967844733, | |
| "grad_norm": 1.3823459148406982, | |
| "learning_rate": 1.2570810897371742e-05, | |
| "loss": 1.9734, | |
| "step": 31400 | |
| }, | |
| { | |
| "epoch": 0.5044277547360161, | |
| "grad_norm": 0.9062979221343994, | |
| "learning_rate": 1.2610845326981205e-05, | |
| "loss": 1.9612, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 0.506029112687559, | |
| "grad_norm": 1.0007665157318115, | |
| "learning_rate": 1.2650879756590668e-05, | |
| "loss": 1.9664, | |
| "step": 31600 | |
| }, | |
| { | |
| "epoch": 0.5076304706391019, | |
| "grad_norm": 0.9745628833770752, | |
| "learning_rate": 1.2690914186200135e-05, | |
| "loss": 1.9648, | |
| "step": 31700 | |
| }, | |
| { | |
| "epoch": 0.5092318285906449, | |
| "grad_norm": 1.407834768295288, | |
| "learning_rate": 1.2730948615809597e-05, | |
| "loss": 1.9562, | |
| "step": 31800 | |
| }, | |
| { | |
| "epoch": 0.5108331865421878, | |
| "grad_norm": 1.207322597503662, | |
| "learning_rate": 1.277098304541906e-05, | |
| "loss": 1.9696, | |
| "step": 31900 | |
| }, | |
| { | |
| "epoch": 0.5124345444937307, | |
| "grad_norm": 1.4670792818069458, | |
| "learning_rate": 1.2811017475028526e-05, | |
| "loss": 1.9524, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 0.5140359024452736, | |
| "grad_norm": 1.023777961730957, | |
| "learning_rate": 1.285105190463799e-05, | |
| "loss": 1.97, | |
| "step": 32100 | |
| }, | |
| { | |
| "epoch": 0.5156372603968165, | |
| "grad_norm": 0.9778289198875427, | |
| "learning_rate": 1.2891086334247454e-05, | |
| "loss": 1.9494, | |
| "step": 32200 | |
| }, | |
| { | |
| "epoch": 0.5172386183483594, | |
| "grad_norm": 0.8971097469329834, | |
| "learning_rate": 1.2931120763856918e-05, | |
| "loss": 1.9628, | |
| "step": 32300 | |
| }, | |
| { | |
| "epoch": 0.5188399762999023, | |
| "grad_norm": 1.8562573194503784, | |
| "learning_rate": 1.2971155193466383e-05, | |
| "loss": 1.9543, | |
| "step": 32400 | |
| }, | |
| { | |
| "epoch": 0.5204413342514452, | |
| "grad_norm": 1.7294055223464966, | |
| "learning_rate": 1.3011189623075847e-05, | |
| "loss": 1.9519, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 0.5220426922029882, | |
| "grad_norm": 1.2172763347625732, | |
| "learning_rate": 1.3051224052685309e-05, | |
| "loss": 1.9758, | |
| "step": 32600 | |
| }, | |
| { | |
| "epoch": 0.5236440501545311, | |
| "grad_norm": 1.144281268119812, | |
| "learning_rate": 1.3091258482294775e-05, | |
| "loss": 1.9589, | |
| "step": 32700 | |
| }, | |
| { | |
| "epoch": 0.5252454081060739, | |
| "grad_norm": 1.057813048362732, | |
| "learning_rate": 1.3131292911904238e-05, | |
| "loss": 1.9443, | |
| "step": 32800 | |
| }, | |
| { | |
| "epoch": 0.5268467660576168, | |
| "grad_norm": 1.297404170036316, | |
| "learning_rate": 1.3171327341513702e-05, | |
| "loss": 1.9614, | |
| "step": 32900 | |
| }, | |
| { | |
| "epoch": 0.5284481240091597, | |
| "grad_norm": 1.0840290784835815, | |
| "learning_rate": 1.3211361771123168e-05, | |
| "loss": 1.9633, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 0.5300494819607027, | |
| "grad_norm": 1.0041546821594238, | |
| "learning_rate": 1.3251396200732631e-05, | |
| "loss": 1.9484, | |
| "step": 33100 | |
| }, | |
| { | |
| "epoch": 0.5316508399122456, | |
| "grad_norm": 1.780435562133789, | |
| "learning_rate": 1.3291430630342095e-05, | |
| "loss": 1.9438, | |
| "step": 33200 | |
| }, | |
| { | |
| "epoch": 0.5332521978637885, | |
| "grad_norm": 0.9901188015937805, | |
| "learning_rate": 1.333146505995156e-05, | |
| "loss": 1.9384, | |
| "step": 33300 | |
| }, | |
| { | |
| "epoch": 0.5348535558153314, | |
| "grad_norm": 0.9118313789367676, | |
| "learning_rate": 1.3371499489561025e-05, | |
| "loss": 1.9507, | |
| "step": 33400 | |
| }, | |
| { | |
| "epoch": 0.5364549137668743, | |
| "grad_norm": 1.0270628929138184, | |
| "learning_rate": 1.3411533919170487e-05, | |
| "loss": 1.9685, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 0.5380562717184172, | |
| "grad_norm": 2.4503536224365234, | |
| "learning_rate": 1.345156834877995e-05, | |
| "loss": 1.9481, | |
| "step": 33600 | |
| }, | |
| { | |
| "epoch": 0.5396576296699601, | |
| "grad_norm": 1.1191452741622925, | |
| "learning_rate": 1.3491602778389416e-05, | |
| "loss": 1.9529, | |
| "step": 33700 | |
| }, | |
| { | |
| "epoch": 0.541258987621503, | |
| "grad_norm": 0.8804434537887573, | |
| "learning_rate": 1.353163720799888e-05, | |
| "loss": 1.9591, | |
| "step": 33800 | |
| }, | |
| { | |
| "epoch": 0.542860345573046, | |
| "grad_norm": 1.1734013557434082, | |
| "learning_rate": 1.3571671637608344e-05, | |
| "loss": 1.9643, | |
| "step": 33900 | |
| }, | |
| { | |
| "epoch": 0.5444617035245889, | |
| "grad_norm": 0.9487005472183228, | |
| "learning_rate": 1.361170606721781e-05, | |
| "loss": 1.9408, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 0.5460630614761317, | |
| "grad_norm": 1.025894045829773, | |
| "learning_rate": 1.3651740496827273e-05, | |
| "loss": 1.9682, | |
| "step": 34100 | |
| }, | |
| { | |
| "epoch": 0.5476644194276746, | |
| "grad_norm": 1.3745815753936768, | |
| "learning_rate": 1.3691774926436737e-05, | |
| "loss": 1.9441, | |
| "step": 34200 | |
| }, | |
| { | |
| "epoch": 0.5492657773792176, | |
| "grad_norm": 0.9772420525550842, | |
| "learning_rate": 1.3731809356046202e-05, | |
| "loss": 1.9593, | |
| "step": 34300 | |
| }, | |
| { | |
| "epoch": 0.5508671353307605, | |
| "grad_norm": 0.8825002908706665, | |
| "learning_rate": 1.3771843785655666e-05, | |
| "loss": 1.9413, | |
| "step": 34400 | |
| }, | |
| { | |
| "epoch": 0.5524684932823034, | |
| "grad_norm": 2.0654349327087402, | |
| "learning_rate": 1.3811878215265128e-05, | |
| "loss": 1.9478, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 0.5540698512338463, | |
| "grad_norm": 0.9932202696800232, | |
| "learning_rate": 1.3851912644874592e-05, | |
| "loss": 1.9529, | |
| "step": 34600 | |
| }, | |
| { | |
| "epoch": 0.5556712091853893, | |
| "grad_norm": 0.923985481262207, | |
| "learning_rate": 1.3891947074484058e-05, | |
| "loss": 1.9542, | |
| "step": 34700 | |
| }, | |
| { | |
| "epoch": 0.5572725671369321, | |
| "grad_norm": 1.2756383419036865, | |
| "learning_rate": 1.3931981504093521e-05, | |
| "loss": 1.9437, | |
| "step": 34800 | |
| }, | |
| { | |
| "epoch": 0.558873925088475, | |
| "grad_norm": 1.025530457496643, | |
| "learning_rate": 1.3972015933702985e-05, | |
| "loss": 1.9479, | |
| "step": 34900 | |
| }, | |
| { | |
| "epoch": 0.5604752830400179, | |
| "grad_norm": 0.9658239483833313, | |
| "learning_rate": 1.401205036331245e-05, | |
| "loss": 1.9392, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 0.5620766409915608, | |
| "grad_norm": 1.0094221830368042, | |
| "learning_rate": 1.4052084792921914e-05, | |
| "loss": 1.9311, | |
| "step": 35100 | |
| }, | |
| { | |
| "epoch": 0.5636779989431038, | |
| "grad_norm": 0.933716893196106, | |
| "learning_rate": 1.4092119222531378e-05, | |
| "loss": 1.9605, | |
| "step": 35200 | |
| }, | |
| { | |
| "epoch": 0.5652793568946467, | |
| "grad_norm": 1.0568841695785522, | |
| "learning_rate": 1.4132153652140844e-05, | |
| "loss": 1.9453, | |
| "step": 35300 | |
| }, | |
| { | |
| "epoch": 0.5668807148461895, | |
| "grad_norm": 0.9029392004013062, | |
| "learning_rate": 1.4172188081750306e-05, | |
| "loss": 1.9327, | |
| "step": 35400 | |
| }, | |
| { | |
| "epoch": 0.5684820727977324, | |
| "grad_norm": 0.9875580668449402, | |
| "learning_rate": 1.421222251135977e-05, | |
| "loss": 1.9405, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 0.5700834307492754, | |
| "grad_norm": 0.9351832270622253, | |
| "learning_rate": 1.4252256940969235e-05, | |
| "loss": 1.9527, | |
| "step": 35600 | |
| }, | |
| { | |
| "epoch": 0.5716847887008183, | |
| "grad_norm": 1.1400425434112549, | |
| "learning_rate": 1.4292291370578699e-05, | |
| "loss": 1.9451, | |
| "step": 35700 | |
| }, | |
| { | |
| "epoch": 0.5732861466523612, | |
| "grad_norm": 0.971022367477417, | |
| "learning_rate": 1.4332325800188163e-05, | |
| "loss": 1.9336, | |
| "step": 35800 | |
| }, | |
| { | |
| "epoch": 0.5748875046039041, | |
| "grad_norm": 0.8905283808708191, | |
| "learning_rate": 1.4372360229797627e-05, | |
| "loss": 1.9518, | |
| "step": 35900 | |
| }, | |
| { | |
| "epoch": 0.5764888625554471, | |
| "grad_norm": 1.2511688470840454, | |
| "learning_rate": 1.4412394659407092e-05, | |
| "loss": 1.9276, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 0.5780902205069899, | |
| "grad_norm": 1.2555015087127686, | |
| "learning_rate": 1.4452429089016556e-05, | |
| "loss": 1.9306, | |
| "step": 36100 | |
| }, | |
| { | |
| "epoch": 0.5796915784585328, | |
| "grad_norm": 2.5456793308258057, | |
| "learning_rate": 1.4492463518626018e-05, | |
| "loss": 1.9212, | |
| "step": 36200 | |
| }, | |
| { | |
| "epoch": 0.5812929364100757, | |
| "grad_norm": 5.189430236816406, | |
| "learning_rate": 1.4532497948235485e-05, | |
| "loss": 1.9298, | |
| "step": 36300 | |
| }, | |
| { | |
| "epoch": 0.5828942943616187, | |
| "grad_norm": 0.8082601428031921, | |
| "learning_rate": 1.4572532377844947e-05, | |
| "loss": 1.9289, | |
| "step": 36400 | |
| }, | |
| { | |
| "epoch": 0.5844956523131616, | |
| "grad_norm": 1.2962714433670044, | |
| "learning_rate": 1.4612566807454411e-05, | |
| "loss": 1.9303, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 0.5860970102647045, | |
| "grad_norm": 1.9360517263412476, | |
| "learning_rate": 1.4652601237063877e-05, | |
| "loss": 1.9155, | |
| "step": 36600 | |
| }, | |
| { | |
| "epoch": 0.5876983682162473, | |
| "grad_norm": 1.16732919216156, | |
| "learning_rate": 1.469263566667334e-05, | |
| "loss": 1.9132, | |
| "step": 36700 | |
| }, | |
| { | |
| "epoch": 0.5892997261677902, | |
| "grad_norm": 0.8907911777496338, | |
| "learning_rate": 1.4732670096282804e-05, | |
| "loss": 1.9312, | |
| "step": 36800 | |
| }, | |
| { | |
| "epoch": 0.5909010841193332, | |
| "grad_norm": 0.9275608062744141, | |
| "learning_rate": 1.477270452589227e-05, | |
| "loss": 1.9638, | |
| "step": 36900 | |
| }, | |
| { | |
| "epoch": 0.5925024420708761, | |
| "grad_norm": 1.2977879047393799, | |
| "learning_rate": 1.4812738955501734e-05, | |
| "loss": 1.9372, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 0.594103800022419, | |
| "grad_norm": 1.1967015266418457, | |
| "learning_rate": 1.4852773385111196e-05, | |
| "loss": 1.9319, | |
| "step": 37100 | |
| }, | |
| { | |
| "epoch": 0.595705157973962, | |
| "grad_norm": 1.0788534879684448, | |
| "learning_rate": 1.489280781472066e-05, | |
| "loss": 1.9326, | |
| "step": 37200 | |
| }, | |
| { | |
| "epoch": 0.5973065159255049, | |
| "grad_norm": 0.8467668890953064, | |
| "learning_rate": 1.4932842244330125e-05, | |
| "loss": 1.9238, | |
| "step": 37300 | |
| }, | |
| { | |
| "epoch": 0.5989078738770477, | |
| "grad_norm": 0.8952154517173767, | |
| "learning_rate": 1.4972876673939589e-05, | |
| "loss": 1.926, | |
| "step": 37400 | |
| }, | |
| { | |
| "epoch": 0.6005092318285906, | |
| "grad_norm": 0.8892629742622375, | |
| "learning_rate": 1.5012911103549053e-05, | |
| "loss": 1.943, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 0.6021105897801335, | |
| "grad_norm": 0.8832671642303467, | |
| "learning_rate": 1.5052945533158518e-05, | |
| "loss": 1.9035, | |
| "step": 37600 | |
| }, | |
| { | |
| "epoch": 0.6037119477316765, | |
| "grad_norm": 1.0101639032363892, | |
| "learning_rate": 1.5092979962767982e-05, | |
| "loss": 1.9282, | |
| "step": 37700 | |
| }, | |
| { | |
| "epoch": 0.6053133056832194, | |
| "grad_norm": 0.9980772733688354, | |
| "learning_rate": 1.5133014392377446e-05, | |
| "loss": 1.9334, | |
| "step": 37800 | |
| }, | |
| { | |
| "epoch": 0.6069146636347623, | |
| "grad_norm": 0.9352878332138062, | |
| "learning_rate": 1.5173048821986911e-05, | |
| "loss": 1.9286, | |
| "step": 37900 | |
| }, | |
| { | |
| "epoch": 0.6085160215863051, | |
| "grad_norm": 0.9329906105995178, | |
| "learning_rate": 1.5213083251596375e-05, | |
| "loss": 1.9133, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 0.6101173795378481, | |
| "grad_norm": 1.0744600296020508, | |
| "learning_rate": 1.5253117681205837e-05, | |
| "loss": 1.9431, | |
| "step": 38100 | |
| }, | |
| { | |
| "epoch": 0.611718737489391, | |
| "grad_norm": 1.1284574270248413, | |
| "learning_rate": 1.52931521108153e-05, | |
| "loss": 1.9236, | |
| "step": 38200 | |
| }, | |
| { | |
| "epoch": 0.6133200954409339, | |
| "grad_norm": 0.7931867241859436, | |
| "learning_rate": 1.5333186540424767e-05, | |
| "loss": 1.9239, | |
| "step": 38300 | |
| }, | |
| { | |
| "epoch": 0.6149214533924768, | |
| "grad_norm": 0.9535111784934998, | |
| "learning_rate": 1.5373220970034232e-05, | |
| "loss": 1.933, | |
| "step": 38400 | |
| }, | |
| { | |
| "epoch": 0.6165228113440198, | |
| "grad_norm": 1.1604766845703125, | |
| "learning_rate": 1.5413255399643694e-05, | |
| "loss": 1.9118, | |
| "step": 38500 | |
| }, | |
| { | |
| "epoch": 0.6181241692955627, | |
| "grad_norm": 0.9939236640930176, | |
| "learning_rate": 1.545328982925316e-05, | |
| "loss": 1.9004, | |
| "step": 38600 | |
| }, | |
| { | |
| "epoch": 0.6197255272471055, | |
| "grad_norm": 0.901757538318634, | |
| "learning_rate": 1.5493324258862622e-05, | |
| "loss": 1.9198, | |
| "step": 38700 | |
| }, | |
| { | |
| "epoch": 0.6213268851986484, | |
| "grad_norm": 1.034832239151001, | |
| "learning_rate": 1.5533358688472087e-05, | |
| "loss": 1.9175, | |
| "step": 38800 | |
| }, | |
| { | |
| "epoch": 0.6229282431501914, | |
| "grad_norm": 0.8186530470848083, | |
| "learning_rate": 1.5573393118081553e-05, | |
| "loss": 1.9098, | |
| "step": 38900 | |
| }, | |
| { | |
| "epoch": 0.6245296011017343, | |
| "grad_norm": 1.0724900960922241, | |
| "learning_rate": 1.5613427547691015e-05, | |
| "loss": 1.9143, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 0.6261309590532772, | |
| "grad_norm": 0.9440537691116333, | |
| "learning_rate": 1.565346197730048e-05, | |
| "loss": 1.9327, | |
| "step": 39100 | |
| }, | |
| { | |
| "epoch": 0.6277323170048201, | |
| "grad_norm": 0.9175347089767456, | |
| "learning_rate": 1.5693496406909946e-05, | |
| "loss": 1.9097, | |
| "step": 39200 | |
| }, | |
| { | |
| "epoch": 0.6293336749563629, | |
| "grad_norm": 1.075506567955017, | |
| "learning_rate": 1.5733530836519408e-05, | |
| "loss": 1.9148, | |
| "step": 39300 | |
| }, | |
| { | |
| "epoch": 0.6309350329079059, | |
| "grad_norm": 1.156162142753601, | |
| "learning_rate": 1.577356526612887e-05, | |
| "loss": 1.928, | |
| "step": 39400 | |
| }, | |
| { | |
| "epoch": 0.6325363908594488, | |
| "grad_norm": 1.2199561595916748, | |
| "learning_rate": 1.5813599695738336e-05, | |
| "loss": 1.9212, | |
| "step": 39500 | |
| }, | |
| { | |
| "epoch": 0.6341377488109917, | |
| "grad_norm": 1.088230848312378, | |
| "learning_rate": 1.58536341253478e-05, | |
| "loss": 1.9147, | |
| "step": 39600 | |
| }, | |
| { | |
| "epoch": 0.6357391067625346, | |
| "grad_norm": 0.911685049533844, | |
| "learning_rate": 1.5893668554957263e-05, | |
| "loss": 1.914, | |
| "step": 39700 | |
| }, | |
| { | |
| "epoch": 0.6373404647140776, | |
| "grad_norm": 0.8977714776992798, | |
| "learning_rate": 1.593370298456673e-05, | |
| "loss": 1.9212, | |
| "step": 39800 | |
| }, | |
| { | |
| "epoch": 0.6389418226656205, | |
| "grad_norm": 0.9816354513168335, | |
| "learning_rate": 1.5973737414176194e-05, | |
| "loss": 1.9046, | |
| "step": 39900 | |
| }, | |
| { | |
| "epoch": 0.6405431806171633, | |
| "grad_norm": 0.88201904296875, | |
| "learning_rate": 1.6013771843785656e-05, | |
| "loss": 1.9359, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 0.6421445385687062, | |
| "grad_norm": 0.9104109406471252, | |
| "learning_rate": 1.6053806273395122e-05, | |
| "loss": 1.9302, | |
| "step": 40100 | |
| }, | |
| { | |
| "epoch": 0.6437458965202492, | |
| "grad_norm": 1.5256859064102173, | |
| "learning_rate": 1.6093840703004587e-05, | |
| "loss": 1.9226, | |
| "step": 40200 | |
| }, | |
| { | |
| "epoch": 0.6453472544717921, | |
| "grad_norm": 0.8858827948570251, | |
| "learning_rate": 1.613387513261405e-05, | |
| "loss": 1.9143, | |
| "step": 40300 | |
| }, | |
| { | |
| "epoch": 0.646948612423335, | |
| "grad_norm": 1.480420470237732, | |
| "learning_rate": 1.617390956222351e-05, | |
| "loss": 1.9171, | |
| "step": 40400 | |
| }, | |
| { | |
| "epoch": 0.6485499703748779, | |
| "grad_norm": 0.9443252682685852, | |
| "learning_rate": 1.6213943991832977e-05, | |
| "loss": 1.9104, | |
| "step": 40500 | |
| }, | |
| { | |
| "epoch": 0.6501513283264208, | |
| "grad_norm": 1.4180731773376465, | |
| "learning_rate": 1.6253978421442443e-05, | |
| "loss": 1.9015, | |
| "step": 40600 | |
| }, | |
| { | |
| "epoch": 0.6517526862779637, | |
| "grad_norm": 1.0369699001312256, | |
| "learning_rate": 1.6294012851051905e-05, | |
| "loss": 1.9085, | |
| "step": 40700 | |
| }, | |
| { | |
| "epoch": 0.6533540442295066, | |
| "grad_norm": 1.0155749320983887, | |
| "learning_rate": 1.633404728066137e-05, | |
| "loss": 1.8968, | |
| "step": 40800 | |
| }, | |
| { | |
| "epoch": 0.6549554021810495, | |
| "grad_norm": 1.0214248895645142, | |
| "learning_rate": 1.6374081710270836e-05, | |
| "loss": 1.9109, | |
| "step": 40900 | |
| }, | |
| { | |
| "epoch": 0.6565567601325925, | |
| "grad_norm": 1.2233892679214478, | |
| "learning_rate": 1.6414116139880298e-05, | |
| "loss": 1.8968, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 0.6581581180841354, | |
| "grad_norm": 0.8677876591682434, | |
| "learning_rate": 1.6454150569489763e-05, | |
| "loss": 1.9121, | |
| "step": 41100 | |
| }, | |
| { | |
| "epoch": 0.6597594760356783, | |
| "grad_norm": 0.8257797956466675, | |
| "learning_rate": 1.649418499909923e-05, | |
| "loss": 1.9329, | |
| "step": 41200 | |
| }, | |
| { | |
| "epoch": 0.6613608339872211, | |
| "grad_norm": 0.904925525188446, | |
| "learning_rate": 1.653421942870869e-05, | |
| "loss": 1.8934, | |
| "step": 41300 | |
| }, | |
| { | |
| "epoch": 0.662962191938764, | |
| "grad_norm": 0.8754270672798157, | |
| "learning_rate": 1.6574253858318153e-05, | |
| "loss": 1.8885, | |
| "step": 41400 | |
| }, | |
| { | |
| "epoch": 0.664563549890307, | |
| "grad_norm": 0.9102962613105774, | |
| "learning_rate": 1.661428828792762e-05, | |
| "loss": 1.9046, | |
| "step": 41500 | |
| }, | |
| { | |
| "epoch": 0.6661649078418499, | |
| "grad_norm": 0.9199568033218384, | |
| "learning_rate": 1.6654322717537084e-05, | |
| "loss": 1.9122, | |
| "step": 41600 | |
| }, | |
| { | |
| "epoch": 0.6677662657933928, | |
| "grad_norm": 0.9582586288452148, | |
| "learning_rate": 1.6694357147146546e-05, | |
| "loss": 1.8959, | |
| "step": 41700 | |
| }, | |
| { | |
| "epoch": 0.6693676237449357, | |
| "grad_norm": 0.8151847124099731, | |
| "learning_rate": 1.6734391576756012e-05, | |
| "loss": 1.887, | |
| "step": 41800 | |
| }, | |
| { | |
| "epoch": 0.6709689816964786, | |
| "grad_norm": 0.9953237771987915, | |
| "learning_rate": 1.6774426006365477e-05, | |
| "loss": 1.9236, | |
| "step": 41900 | |
| }, | |
| { | |
| "epoch": 0.6725703396480215, | |
| "grad_norm": 1.465527057647705, | |
| "learning_rate": 1.681446043597494e-05, | |
| "loss": 1.9136, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 0.6741716975995644, | |
| "grad_norm": 0.9603108763694763, | |
| "learning_rate": 1.68544948655844e-05, | |
| "loss": 1.8941, | |
| "step": 42100 | |
| }, | |
| { | |
| "epoch": 0.6757730555511073, | |
| "grad_norm": 0.8624867796897888, | |
| "learning_rate": 1.689452929519387e-05, | |
| "loss": 1.905, | |
| "step": 42200 | |
| }, | |
| { | |
| "epoch": 0.6773744135026503, | |
| "grad_norm": 0.9774655699729919, | |
| "learning_rate": 1.6934563724803333e-05, | |
| "loss": 1.9156, | |
| "step": 42300 | |
| }, | |
| { | |
| "epoch": 0.6789757714541932, | |
| "grad_norm": 2.9199743270874023, | |
| "learning_rate": 1.6974598154412795e-05, | |
| "loss": 1.9126, | |
| "step": 42400 | |
| }, | |
| { | |
| "epoch": 0.6805771294057361, | |
| "grad_norm": 1.2201206684112549, | |
| "learning_rate": 1.701463258402226e-05, | |
| "loss": 1.8976, | |
| "step": 42500 | |
| }, | |
| { | |
| "epoch": 0.6821784873572789, | |
| "grad_norm": 1.0182702541351318, | |
| "learning_rate": 1.7054667013631726e-05, | |
| "loss": 1.8968, | |
| "step": 42600 | |
| }, | |
| { | |
| "epoch": 0.6837798453088219, | |
| "grad_norm": 1.134906530380249, | |
| "learning_rate": 1.7094701443241188e-05, | |
| "loss": 1.9361, | |
| "step": 42700 | |
| }, | |
| { | |
| "epoch": 0.6853812032603648, | |
| "grad_norm": 1.635399341583252, | |
| "learning_rate": 1.7134735872850653e-05, | |
| "loss": 1.919, | |
| "step": 42800 | |
| }, | |
| { | |
| "epoch": 0.6869825612119077, | |
| "grad_norm": 0.8835542798042297, | |
| "learning_rate": 1.717477030246012e-05, | |
| "loss": 1.8776, | |
| "step": 42900 | |
| }, | |
| { | |
| "epoch": 0.6885839191634506, | |
| "grad_norm": 0.9510149955749512, | |
| "learning_rate": 1.721480473206958e-05, | |
| "loss": 1.9036, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 0.6901852771149936, | |
| "grad_norm": 0.8410897850990295, | |
| "learning_rate": 1.7254839161679043e-05, | |
| "loss": 1.9091, | |
| "step": 43100 | |
| }, | |
| { | |
| "epoch": 0.6917866350665364, | |
| "grad_norm": 1.4297950267791748, | |
| "learning_rate": 1.729487359128851e-05, | |
| "loss": 1.8934, | |
| "step": 43200 | |
| }, | |
| { | |
| "epoch": 0.6933879930180793, | |
| "grad_norm": 0.9010776877403259, | |
| "learning_rate": 1.7334908020897974e-05, | |
| "loss": 1.8997, | |
| "step": 43300 | |
| }, | |
| { | |
| "epoch": 0.6949893509696222, | |
| "grad_norm": 0.8833039999008179, | |
| "learning_rate": 1.7374942450507436e-05, | |
| "loss": 1.892, | |
| "step": 43400 | |
| }, | |
| { | |
| "epoch": 0.6965907089211651, | |
| "grad_norm": 0.9560312032699585, | |
| "learning_rate": 1.74149768801169e-05, | |
| "loss": 1.8977, | |
| "step": 43500 | |
| }, | |
| { | |
| "epoch": 0.6981920668727081, | |
| "grad_norm": 0.8603575825691223, | |
| "learning_rate": 1.7455011309726367e-05, | |
| "loss": 1.8881, | |
| "step": 43600 | |
| }, | |
| { | |
| "epoch": 0.699793424824251, | |
| "grad_norm": 0.8545820116996765, | |
| "learning_rate": 1.749504573933583e-05, | |
| "loss": 1.8992, | |
| "step": 43700 | |
| }, | |
| { | |
| "epoch": 0.7013947827757939, | |
| "grad_norm": 1.6138850450515747, | |
| "learning_rate": 1.7535080168945295e-05, | |
| "loss": 1.8823, | |
| "step": 43800 | |
| }, | |
| { | |
| "epoch": 0.7029961407273367, | |
| "grad_norm": 1.339882731437683, | |
| "learning_rate": 1.757511459855476e-05, | |
| "loss": 1.8942, | |
| "step": 43900 | |
| }, | |
| { | |
| "epoch": 0.7045974986788797, | |
| "grad_norm": 0.8209664225578308, | |
| "learning_rate": 1.7615149028164222e-05, | |
| "loss": 1.885, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 0.7061988566304226, | |
| "grad_norm": 0.8096824884414673, | |
| "learning_rate": 1.7655183457773685e-05, | |
| "loss": 1.8738, | |
| "step": 44100 | |
| }, | |
| { | |
| "epoch": 0.7078002145819655, | |
| "grad_norm": 1.0560259819030762, | |
| "learning_rate": 1.769521788738315e-05, | |
| "loss": 1.8941, | |
| "step": 44200 | |
| }, | |
| { | |
| "epoch": 0.7094015725335084, | |
| "grad_norm": 1.1268258094787598, | |
| "learning_rate": 1.7735252316992616e-05, | |
| "loss": 1.8974, | |
| "step": 44300 | |
| }, | |
| { | |
| "epoch": 0.7110029304850514, | |
| "grad_norm": 0.9307839274406433, | |
| "learning_rate": 1.7775286746602078e-05, | |
| "loss": 1.9122, | |
| "step": 44400 | |
| }, | |
| { | |
| "epoch": 0.7126042884365942, | |
| "grad_norm": 1.0069445371627808, | |
| "learning_rate": 1.7815321176211543e-05, | |
| "loss": 1.8597, | |
| "step": 44500 | |
| }, | |
| { | |
| "epoch": 0.7142056463881371, | |
| "grad_norm": 1.2771753072738647, | |
| "learning_rate": 1.785535560582101e-05, | |
| "loss": 1.8819, | |
| "step": 44600 | |
| }, | |
| { | |
| "epoch": 0.71580700433968, | |
| "grad_norm": 0.7819973230361938, | |
| "learning_rate": 1.789539003543047e-05, | |
| "loss": 1.8722, | |
| "step": 44700 | |
| }, | |
| { | |
| "epoch": 0.717408362291223, | |
| "grad_norm": 0.8193828463554382, | |
| "learning_rate": 1.7935424465039936e-05, | |
| "loss": 1.8745, | |
| "step": 44800 | |
| }, | |
| { | |
| "epoch": 0.7190097202427659, | |
| "grad_norm": 0.7969743609428406, | |
| "learning_rate": 1.7975458894649402e-05, | |
| "loss": 1.911, | |
| "step": 44900 | |
| }, | |
| { | |
| "epoch": 0.7206110781943088, | |
| "grad_norm": 1.4411369562149048, | |
| "learning_rate": 1.8015493324258864e-05, | |
| "loss": 1.8763, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 0.7222124361458517, | |
| "grad_norm": 1.0016000270843506, | |
| "learning_rate": 1.805552775386833e-05, | |
| "loss": 1.8875, | |
| "step": 45100 | |
| }, | |
| { | |
| "epoch": 0.7238137940973945, | |
| "grad_norm": 0.8997382521629333, | |
| "learning_rate": 1.809556218347779e-05, | |
| "loss": 1.8766, | |
| "step": 45200 | |
| }, | |
| { | |
| "epoch": 0.7254151520489375, | |
| "grad_norm": 0.8878375291824341, | |
| "learning_rate": 1.8135596613087257e-05, | |
| "loss": 1.8803, | |
| "step": 45300 | |
| }, | |
| { | |
| "epoch": 0.7270165100004804, | |
| "grad_norm": 0.8563076853752136, | |
| "learning_rate": 1.817563104269672e-05, | |
| "loss": 1.8855, | |
| "step": 45400 | |
| }, | |
| { | |
| "epoch": 0.7286178679520233, | |
| "grad_norm": 1.020241618156433, | |
| "learning_rate": 1.8215665472306185e-05, | |
| "loss": 1.8583, | |
| "step": 45500 | |
| }, | |
| { | |
| "epoch": 0.7302192259035662, | |
| "grad_norm": 0.8296322822570801, | |
| "learning_rate": 1.825569990191565e-05, | |
| "loss": 1.8692, | |
| "step": 45600 | |
| }, | |
| { | |
| "epoch": 0.7318205838551092, | |
| "grad_norm": 1.9434692859649658, | |
| "learning_rate": 1.8295734331525112e-05, | |
| "loss": 1.8831, | |
| "step": 45700 | |
| }, | |
| { | |
| "epoch": 0.733421941806652, | |
| "grad_norm": 0.9088252782821655, | |
| "learning_rate": 1.8335768761134578e-05, | |
| "loss": 1.8868, | |
| "step": 45800 | |
| }, | |
| { | |
| "epoch": 0.7350232997581949, | |
| "grad_norm": 1.7532590627670288, | |
| "learning_rate": 1.837580319074404e-05, | |
| "loss": 1.8734, | |
| "step": 45900 | |
| }, | |
| { | |
| "epoch": 0.7366246577097378, | |
| "grad_norm": 0.9662244319915771, | |
| "learning_rate": 1.8415837620353505e-05, | |
| "loss": 1.888, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 0.7382260156612808, | |
| "grad_norm": 3.4851512908935547, | |
| "learning_rate": 1.845587204996297e-05, | |
| "loss": 1.8829, | |
| "step": 46100 | |
| }, | |
| { | |
| "epoch": 0.7398273736128237, | |
| "grad_norm": 0.9157941341400146, | |
| "learning_rate": 1.8495906479572433e-05, | |
| "loss": 1.9091, | |
| "step": 46200 | |
| }, | |
| { | |
| "epoch": 0.7414287315643666, | |
| "grad_norm": 0.8992369771003723, | |
| "learning_rate": 1.85359409091819e-05, | |
| "loss": 1.8932, | |
| "step": 46300 | |
| }, | |
| { | |
| "epoch": 0.7430300895159095, | |
| "grad_norm": 0.8611487150192261, | |
| "learning_rate": 1.857597533879136e-05, | |
| "loss": 1.8672, | |
| "step": 46400 | |
| }, | |
| { | |
| "epoch": 0.7446314474674524, | |
| "grad_norm": 1.0629839897155762, | |
| "learning_rate": 1.8616009768400826e-05, | |
| "loss": 1.8819, | |
| "step": 46500 | |
| }, | |
| { | |
| "epoch": 0.7462328054189953, | |
| "grad_norm": 0.8317407369613647, | |
| "learning_rate": 1.865604419801029e-05, | |
| "loss": 1.8684, | |
| "step": 46600 | |
| }, | |
| { | |
| "epoch": 0.7478341633705382, | |
| "grad_norm": 0.8102233409881592, | |
| "learning_rate": 1.8696078627619754e-05, | |
| "loss": 1.8796, | |
| "step": 46700 | |
| }, | |
| { | |
| "epoch": 0.7494355213220811, | |
| "grad_norm": 0.8077260255813599, | |
| "learning_rate": 1.873611305722922e-05, | |
| "loss": 1.8872, | |
| "step": 46800 | |
| }, | |
| { | |
| "epoch": 0.7510368792736241, | |
| "grad_norm": 0.9285743236541748, | |
| "learning_rate": 1.877614748683868e-05, | |
| "loss": 1.8828, | |
| "step": 46900 | |
| }, | |
| { | |
| "epoch": 0.752638237225167, | |
| "grad_norm": 0.835612416267395, | |
| "learning_rate": 1.8816181916448147e-05, | |
| "loss": 1.8685, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 0.7542395951767098, | |
| "grad_norm": 1.5960347652435303, | |
| "learning_rate": 1.8856216346057612e-05, | |
| "loss": 1.8705, | |
| "step": 47100 | |
| }, | |
| { | |
| "epoch": 0.7558409531282527, | |
| "grad_norm": 0.7755472660064697, | |
| "learning_rate": 1.8896250775667075e-05, | |
| "loss": 1.8851, | |
| "step": 47200 | |
| }, | |
| { | |
| "epoch": 0.7574423110797956, | |
| "grad_norm": 1.0042415857315063, | |
| "learning_rate": 1.893628520527654e-05, | |
| "loss": 1.855, | |
| "step": 47300 | |
| }, | |
| { | |
| "epoch": 0.7590436690313386, | |
| "grad_norm": 0.8991414904594421, | |
| "learning_rate": 1.8976319634886006e-05, | |
| "loss": 1.8569, | |
| "step": 47400 | |
| }, | |
| { | |
| "epoch": 0.7606450269828815, | |
| "grad_norm": 0.813565194606781, | |
| "learning_rate": 1.9016354064495468e-05, | |
| "loss": 1.8578, | |
| "step": 47500 | |
| }, | |
| { | |
| "epoch": 0.7622463849344244, | |
| "grad_norm": 0.7883344292640686, | |
| "learning_rate": 1.9056388494104933e-05, | |
| "loss": 1.8968, | |
| "step": 47600 | |
| }, | |
| { | |
| "epoch": 0.7638477428859674, | |
| "grad_norm": 1.0632473230361938, | |
| "learning_rate": 1.9096422923714395e-05, | |
| "loss": 1.879, | |
| "step": 47700 | |
| }, | |
| { | |
| "epoch": 0.7654491008375102, | |
| "grad_norm": 0.8479236364364624, | |
| "learning_rate": 1.913645735332386e-05, | |
| "loss": 1.8683, | |
| "step": 47800 | |
| }, | |
| { | |
| "epoch": 0.7670504587890531, | |
| "grad_norm": 0.871159553527832, | |
| "learning_rate": 1.9176491782933323e-05, | |
| "loss": 1.8653, | |
| "step": 47900 | |
| }, | |
| { | |
| "epoch": 0.768651816740596, | |
| "grad_norm": 0.8534667491912842, | |
| "learning_rate": 1.921652621254279e-05, | |
| "loss": 1.8507, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 0.7702531746921389, | |
| "grad_norm": 0.8931534290313721, | |
| "learning_rate": 1.9256560642152254e-05, | |
| "loss": 1.8625, | |
| "step": 48100 | |
| }, | |
| { | |
| "epoch": 0.7718545326436819, | |
| "grad_norm": 1.1518031358718872, | |
| "learning_rate": 1.9296595071761716e-05, | |
| "loss": 1.8786, | |
| "step": 48200 | |
| }, | |
| { | |
| "epoch": 0.7734558905952248, | |
| "grad_norm": 0.9310818910598755, | |
| "learning_rate": 1.933662950137118e-05, | |
| "loss": 1.8517, | |
| "step": 48300 | |
| }, | |
| { | |
| "epoch": 0.7750572485467676, | |
| "grad_norm": 1.314759612083435, | |
| "learning_rate": 1.9376663930980647e-05, | |
| "loss": 1.853, | |
| "step": 48400 | |
| }, | |
| { | |
| "epoch": 0.7766586064983105, | |
| "grad_norm": 0.8431141972541809, | |
| "learning_rate": 1.941669836059011e-05, | |
| "loss": 1.863, | |
| "step": 48500 | |
| }, | |
| { | |
| "epoch": 0.7782599644498535, | |
| "grad_norm": 0.90580815076828, | |
| "learning_rate": 1.945673279019957e-05, | |
| "loss": 1.8496, | |
| "step": 48600 | |
| }, | |
| { | |
| "epoch": 0.7798613224013964, | |
| "grad_norm": 1.0436537265777588, | |
| "learning_rate": 1.9496767219809037e-05, | |
| "loss": 1.8629, | |
| "step": 48700 | |
| }, | |
| { | |
| "epoch": 0.7814626803529393, | |
| "grad_norm": 0.8080843091011047, | |
| "learning_rate": 1.9536801649418502e-05, | |
| "loss": 1.857, | |
| "step": 48800 | |
| }, | |
| { | |
| "epoch": 0.7830640383044822, | |
| "grad_norm": 0.8750945925712585, | |
| "learning_rate": 1.9576836079027964e-05, | |
| "loss": 1.8725, | |
| "step": 48900 | |
| }, | |
| { | |
| "epoch": 0.7846653962560252, | |
| "grad_norm": 1.2619659900665283, | |
| "learning_rate": 1.961687050863743e-05, | |
| "loss": 1.8422, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 0.786266754207568, | |
| "grad_norm": 0.84897780418396, | |
| "learning_rate": 1.9656904938246895e-05, | |
| "loss": 1.8725, | |
| "step": 49100 | |
| }, | |
| { | |
| "epoch": 0.7878681121591109, | |
| "grad_norm": 0.7454677820205688, | |
| "learning_rate": 1.9696939367856358e-05, | |
| "loss": 1.8735, | |
| "step": 49200 | |
| }, | |
| { | |
| "epoch": 0.7894694701106538, | |
| "grad_norm": 0.8530156016349792, | |
| "learning_rate": 1.9736973797465823e-05, | |
| "loss": 1.8597, | |
| "step": 49300 | |
| }, | |
| { | |
| "epoch": 0.7910708280621968, | |
| "grad_norm": 0.9725930690765381, | |
| "learning_rate": 1.977700822707529e-05, | |
| "loss": 1.8515, | |
| "step": 49400 | |
| }, | |
| { | |
| "epoch": 0.7926721860137397, | |
| "grad_norm": 0.8235682249069214, | |
| "learning_rate": 1.981704265668475e-05, | |
| "loss": 1.8791, | |
| "step": 49500 | |
| }, | |
| { | |
| "epoch": 0.7942735439652826, | |
| "grad_norm": 0.9344043135643005, | |
| "learning_rate": 1.9857077086294213e-05, | |
| "loss": 1.8663, | |
| "step": 49600 | |
| }, | |
| { | |
| "epoch": 0.7958749019168254, | |
| "grad_norm": 0.9629167318344116, | |
| "learning_rate": 1.9897111515903678e-05, | |
| "loss": 1.8647, | |
| "step": 49700 | |
| }, | |
| { | |
| "epoch": 0.7974762598683683, | |
| "grad_norm": 0.7384589910507202, | |
| "learning_rate": 1.9937145945513144e-05, | |
| "loss": 1.8586, | |
| "step": 49800 | |
| }, | |
| { | |
| "epoch": 0.7990776178199113, | |
| "grad_norm": 1.069229245185852, | |
| "learning_rate": 1.9977180375122606e-05, | |
| "loss": 1.8622, | |
| "step": 49900 | |
| }, | |
| { | |
| "epoch": 0.8006789757714542, | |
| "grad_norm": 0.8724033236503601, | |
| "learning_rate": 1.9999415105482566e-05, | |
| "loss": 1.8602, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 0.8022803337229971, | |
| "grad_norm": 0.8449952602386475, | |
| "learning_rate": 1.9993531998299776e-05, | |
| "loss": 1.8321, | |
| "step": 50100 | |
| }, | |
| { | |
| "epoch": 0.80388169167454, | |
| "grad_norm": 0.8022226095199585, | |
| "learning_rate": 1.9981326651105962e-05, | |
| "loss": 1.8735, | |
| "step": 50200 | |
| }, | |
| { | |
| "epoch": 0.805483049626083, | |
| "grad_norm": 0.8840874433517456, | |
| "learning_rate": 1.9962806785408838e-05, | |
| "loss": 1.829, | |
| "step": 50300 | |
| }, | |
| { | |
| "epoch": 0.8070844075776258, | |
| "grad_norm": 1.0448670387268066, | |
| "learning_rate": 1.993798411749008e-05, | |
| "loss": 1.8595, | |
| "step": 50400 | |
| }, | |
| { | |
| "epoch": 0.8086857655291687, | |
| "grad_norm": 0.904961109161377, | |
| "learning_rate": 1.9906874350993245e-05, | |
| "loss": 1.8586, | |
| "step": 50500 | |
| }, | |
| { | |
| "epoch": 0.8102871234807116, | |
| "grad_norm": 2.3403968811035156, | |
| "learning_rate": 1.98694971669891e-05, | |
| "loss": 1.8492, | |
| "step": 50600 | |
| }, | |
| { | |
| "epoch": 0.8118884814322546, | |
| "grad_norm": 1.0114359855651855, | |
| "learning_rate": 1.9825876211524724e-05, | |
| "loss": 1.8609, | |
| "step": 50700 | |
| }, | |
| { | |
| "epoch": 0.8134898393837975, | |
| "grad_norm": 0.8057318329811096, | |
| "learning_rate": 1.977603908066426e-05, | |
| "loss": 1.829, | |
| "step": 50800 | |
| }, | |
| { | |
| "epoch": 0.8150911973353404, | |
| "grad_norm": 0.8821057677268982, | |
| "learning_rate": 1.9720017303030703e-05, | |
| "loss": 1.862, | |
| "step": 50900 | |
| }, | |
| { | |
| "epoch": 0.8166925552868832, | |
| "grad_norm": 2.767875909805298, | |
| "learning_rate": 1.9657846319859854e-05, | |
| "loss": 1.8678, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 0.8182939132384262, | |
| "grad_norm": 0.7666317820549011, | |
| "learning_rate": 1.9589565462579015e-05, | |
| "loss": 1.8621, | |
| "step": 51100 | |
| }, | |
| { | |
| "epoch": 0.8198952711899691, | |
| "grad_norm": 1.021729588508606, | |
| "learning_rate": 1.9515217927924633e-05, | |
| "loss": 1.8352, | |
| "step": 51200 | |
| }, | |
| { | |
| "epoch": 0.821496629141512, | |
| "grad_norm": 0.8282054662704468, | |
| "learning_rate": 1.943485075061461e-05, | |
| "loss": 1.8583, | |
| "step": 51300 | |
| }, | |
| { | |
| "epoch": 0.8230979870930549, | |
| "grad_norm": 3.0896732807159424, | |
| "learning_rate": 1.934851477359256e-05, | |
| "loss": 1.8394, | |
| "step": 51400 | |
| }, | |
| { | |
| "epoch": 0.8246993450445979, | |
| "grad_norm": 0.9474732279777527, | |
| "learning_rate": 1.9256264615862893e-05, | |
| "loss": 1.8682, | |
| "step": 51500 | |
| }, | |
| { | |
| "epoch": 0.8263007029961408, | |
| "grad_norm": 0.7810207009315491, | |
| "learning_rate": 1.9158158637937027e-05, | |
| "loss": 1.8337, | |
| "step": 51600 | |
| }, | |
| { | |
| "epoch": 0.8279020609476836, | |
| "grad_norm": 0.8208989500999451, | |
| "learning_rate": 1.9054258904912575e-05, | |
| "loss": 1.8367, | |
| "step": 51700 | |
| }, | |
| { | |
| "epoch": 0.8295034188992265, | |
| "grad_norm": 0.8764814138412476, | |
| "learning_rate": 1.89446311472089e-05, | |
| "loss": 1.8403, | |
| "step": 51800 | |
| }, | |
| { | |
| "epoch": 0.8311047768507694, | |
| "grad_norm": 1.1485708951950073, | |
| "learning_rate": 1.8829344718983903e-05, | |
| "loss": 1.8576, | |
| "step": 51900 | |
| }, | |
| { | |
| "epoch": 0.8327061348023124, | |
| "grad_norm": 1.06003737449646, | |
| "learning_rate": 1.8708472554258237e-05, | |
| "loss": 1.872, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 0.8343074927538553, | |
| "grad_norm": 0.8322979807853699, | |
| "learning_rate": 1.8582091120774855e-05, | |
| "loss": 1.859, | |
| "step": 52100 | |
| }, | |
| { | |
| "epoch": 0.8359088507053982, | |
| "grad_norm": 0.7536402940750122, | |
| "learning_rate": 1.845028037162298e-05, | |
| "loss": 1.8401, | |
| "step": 52200 | |
| }, | |
| { | |
| "epoch": 0.837510208656941, | |
| "grad_norm": 1.4201630353927612, | |
| "learning_rate": 1.83131236946571e-05, | |
| "loss": 1.8723, | |
| "step": 52300 | |
| }, | |
| { | |
| "epoch": 0.839111566608484, | |
| "grad_norm": 0.7676379680633545, | |
| "learning_rate": 1.8170707859743067e-05, | |
| "loss": 1.8572, | |
| "step": 52400 | |
| }, | |
| { | |
| "epoch": 0.8407129245600269, | |
| "grad_norm": 0.8063752055168152, | |
| "learning_rate": 1.8023122963864602e-05, | |
| "loss": 1.8469, | |
| "step": 52500 | |
| }, | |
| { | |
| "epoch": 0.8423142825115698, | |
| "grad_norm": 0.8385179042816162, | |
| "learning_rate": 1.787046237412493e-05, | |
| "loss": 1.8564, | |
| "step": 52600 | |
| }, | |
| { | |
| "epoch": 0.8439156404631127, | |
| "grad_norm": 0.8969714641571045, | |
| "learning_rate": 1.7712822668679682e-05, | |
| "loss": 1.8556, | |
| "step": 52700 | |
| }, | |
| { | |
| "epoch": 0.8455169984146557, | |
| "grad_norm": 1.184692621231079, | |
| "learning_rate": 1.7550303575638318e-05, | |
| "loss": 1.8423, | |
| "step": 52800 | |
| }, | |
| { | |
| "epoch": 0.8471183563661986, | |
| "grad_norm": 0.8388579487800598, | |
| "learning_rate": 1.7383007909972844e-05, | |
| "loss": 1.8157, | |
| "step": 52900 | |
| }, | |
| { | |
| "epoch": 0.8487197143177414, | |
| "grad_norm": 0.7864462733268738, | |
| "learning_rate": 1.721104150847362e-05, | |
| "loss": 1.8526, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 0.8503210722692843, | |
| "grad_norm": 0.86407071352005, | |
| "learning_rate": 1.703451316279353e-05, | |
| "loss": 1.8428, | |
| "step": 53100 | |
| }, | |
| { | |
| "epoch": 0.8519224302208273, | |
| "grad_norm": 0.8313634395599365, | |
| "learning_rate": 1.6853534550622722e-05, | |
| "loss": 1.8479, | |
| "step": 53200 | |
| }, | |
| { | |
| "epoch": 0.8535237881723702, | |
| "grad_norm": 1.4253445863723755, | |
| "learning_rate": 1.666822016503765e-05, | |
| "loss": 1.8275, | |
| "step": 53300 | |
| }, | |
| { | |
| "epoch": 0.8551251461239131, | |
| "grad_norm": 5.398781776428223, | |
| "learning_rate": 1.6478687242068904e-05, | |
| "loss": 1.854, | |
| "step": 53400 | |
| }, | |
| { | |
| "epoch": 0.856726504075456, | |
| "grad_norm": 1.7977509498596191, | |
| "learning_rate": 1.628505568653385e-05, | |
| "loss": 1.8339, | |
| "step": 53500 | |
| }, | |
| { | |
| "epoch": 0.8583278620269988, | |
| "grad_norm": 0.8206777572631836, | |
| "learning_rate": 1.6087447996180826e-05, | |
| "loss": 1.8511, | |
| "step": 53600 | |
| }, | |
| { | |
| "epoch": 0.8599292199785418, | |
| "grad_norm": 0.8535060286521912, | |
| "learning_rate": 1.5885989184193027e-05, | |
| "loss": 1.8586, | |
| "step": 53700 | |
| }, | |
| { | |
| "epoch": 0.8615305779300847, | |
| "grad_norm": 1.6550579071044922, | |
| "learning_rate": 1.5680806700101e-05, | |
| "loss": 1.8482, | |
| "step": 53800 | |
| }, | |
| { | |
| "epoch": 0.8631319358816276, | |
| "grad_norm": 0.8122648000717163, | |
| "learning_rate": 1.5472030349153854e-05, | |
| "loss": 1.8335, | |
| "step": 53900 | |
| }, | |
| { | |
| "epoch": 0.8647332938331705, | |
| "grad_norm": 0.7805556058883667, | |
| "learning_rate": 1.525979221020014e-05, | |
| "loss": 1.8252, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 0.8663346517847135, | |
| "grad_norm": 0.8546029329299927, | |
| "learning_rate": 1.5044226552130399e-05, | |
| "loss": 1.8353, | |
| "step": 54100 | |
| }, | |
| { | |
| "epoch": 0.8679360097362564, | |
| "grad_norm": 0.7961782217025757, | |
| "learning_rate": 1.4825469748934192e-05, | |
| "loss": 1.8348, | |
| "step": 54200 | |
| }, | |
| { | |
| "epoch": 0.8695373676877992, | |
| "grad_norm": 0.9392079710960388, | |
| "learning_rate": 1.4603660193425402e-05, | |
| "loss": 1.8205, | |
| "step": 54300 | |
| }, | |
| { | |
| "epoch": 0.8711387256393421, | |
| "grad_norm": 0.7852017283439636, | |
| "learning_rate": 1.4378938209690334e-05, | |
| "loss": 1.8327, | |
| "step": 54400 | |
| }, | |
| { | |
| "epoch": 0.8727400835908851, | |
| "grad_norm": 0.8385934829711914, | |
| "learning_rate": 1.4151445964314057e-05, | |
| "loss": 1.8383, | |
| "step": 54500 | |
| }, | |
| { | |
| "epoch": 0.874341441542428, | |
| "grad_norm": 0.7498407363891602, | |
| "learning_rate": 1.3921327376441087e-05, | |
| "loss": 1.8121, | |
| "step": 54600 | |
| }, | |
| { | |
| "epoch": 0.8759427994939709, | |
| "grad_norm": 0.8227770924568176, | |
| "learning_rate": 1.3688728026727369e-05, | |
| "loss": 1.8395, | |
| "step": 54700 | |
| }, | |
| { | |
| "epoch": 0.8775441574455138, | |
| "grad_norm": 0.911970317363739, | |
| "learning_rate": 1.3453795065241128e-05, | |
| "loss": 1.8262, | |
| "step": 54800 | |
| }, | |
| { | |
| "epoch": 0.8791455153970567, | |
| "grad_norm": 0.8143411874771118, | |
| "learning_rate": 1.3216677118370834e-05, | |
| "loss": 1.8571, | |
| "step": 54900 | |
| }, | |
| { | |
| "epoch": 0.8807468733485996, | |
| "grad_norm": 0.8301388621330261, | |
| "learning_rate": 1.2977524194799229e-05, | |
| "loss": 1.8435, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 0.8823482313001425, | |
| "grad_norm": 1.3477791547775269, | |
| "learning_rate": 1.2736487590602864e-05, | |
| "loss": 1.8372, | |
| "step": 55100 | |
| }, | |
| { | |
| "epoch": 0.8839495892516854, | |
| "grad_norm": 0.8804235458374023, | |
| "learning_rate": 1.2493719793537157e-05, | |
| "loss": 1.841, | |
| "step": 55200 | |
| }, | |
| { | |
| "epoch": 0.8855509472032284, | |
| "grad_norm": 0.7941620349884033, | |
| "learning_rate": 1.2249374386567598e-05, | |
| "loss": 1.8271, | |
| "step": 55300 | |
| }, | |
| { | |
| "epoch": 0.8871523051547713, | |
| "grad_norm": 0.8681734800338745, | |
| "learning_rate": 1.2003605950708059e-05, | |
| "loss": 1.8459, | |
| "step": 55400 | |
| }, | |
| { | |
| "epoch": 0.8887536631063142, | |
| "grad_norm": 0.7299553155899048, | |
| "learning_rate": 1.1756569967227716e-05, | |
| "loss": 1.8684, | |
| "step": 55500 | |
| }, | |
| { | |
| "epoch": 0.890355021057857, | |
| "grad_norm": 0.7805650234222412, | |
| "learning_rate": 1.1508422719288434e-05, | |
| "loss": 1.8113, | |
| "step": 55600 | |
| }, | |
| { | |
| "epoch": 0.8919563790094, | |
| "grad_norm": 0.7692527770996094, | |
| "learning_rate": 1.125932119307486e-05, | |
| "loss": 1.8252, | |
| "step": 55700 | |
| }, | |
| { | |
| "epoch": 0.8935577369609429, | |
| "grad_norm": 0.8291378021240234, | |
| "learning_rate": 1.1009422978479742e-05, | |
| "loss": 1.7992, | |
| "step": 55800 | |
| }, | |
| { | |
| "epoch": 0.8951590949124858, | |
| "grad_norm": 0.8779826164245605, | |
| "learning_rate": 1.0758886169407351e-05, | |
| "loss": 1.8336, | |
| "step": 55900 | |
| }, | |
| { | |
| "epoch": 0.8967604528640287, | |
| "grad_norm": 0.7980159521102905, | |
| "learning_rate": 1.050786926375801e-05, | |
| "loss": 1.8212, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 0.8983618108155716, | |
| "grad_norm": 3.2298014163970947, | |
| "learning_rate": 1.025653106315707e-05, | |
| "loss": 1.8188, | |
| "step": 56100 | |
| }, | |
| { | |
| "epoch": 0.8999631687671145, | |
| "grad_norm": 0.8914725184440613, | |
| "learning_rate": 1.0005030572491733e-05, | |
| "loss": 1.8387, | |
| "step": 56200 | |
| }, | |
| { | |
| "epoch": 0.9015645267186574, | |
| "grad_norm": 0.8599027395248413, | |
| "learning_rate": 9.753526899319275e-06, | |
| "loss": 1.8327, | |
| "step": 56300 | |
| }, | |
| { | |
| "epoch": 0.9031658846702003, | |
| "grad_norm": 0.9533581733703613, | |
| "learning_rate": 9.50217915321035e-06, | |
| "loss": 1.822, | |
| "step": 56400 | |
| }, | |
| { | |
| "epoch": 0.9047672426217432, | |
| "grad_norm": 0.8099405169487, | |
| "learning_rate": 9.251146345090958e-06, | |
| "loss": 1.8462, | |
| "step": 56500 | |
| }, | |
| { | |
| "epoch": 0.9063686005732862, | |
| "grad_norm": 0.8883758783340454, | |
| "learning_rate": 9.000587286646886e-06, | |
| "loss": 1.8184, | |
| "step": 56600 | |
| }, | |
| { | |
| "epoch": 0.9079699585248291, | |
| "grad_norm": 1.6830765008926392, | |
| "learning_rate": 8.750660489854142e-06, | |
| "loss": 1.82, | |
| "step": 56700 | |
| }, | |
| { | |
| "epoch": 0.909571316476372, | |
| "grad_norm": 1.2402883768081665, | |
| "learning_rate": 8.501524066699047e-06, | |
| "loss": 1.816, | |
| "step": 56800 | |
| }, | |
| { | |
| "epoch": 0.9111726744279148, | |
| "grad_norm": 0.8525800108909607, | |
| "learning_rate": 8.253335629151306e-06, | |
| "loss": 1.8248, | |
| "step": 56900 | |
| }, | |
| { | |
| "epoch": 0.9127740323794578, | |
| "grad_norm": 0.8562950491905212, | |
| "learning_rate": 8.006252189453485e-06, | |
| "loss": 1.8284, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 0.9143753903310007, | |
| "grad_norm": 0.7687914371490479, | |
| "learning_rate": 7.760430060789828e-06, | |
| "loss": 1.8198, | |
| "step": 57100 | |
| }, | |
| { | |
| "epoch": 0.9159767482825436, | |
| "grad_norm": 0.9463182091712952, | |
| "learning_rate": 7.51602475839736e-06, | |
| "loss": 1.8266, | |
| "step": 57200 | |
| }, | |
| { | |
| "epoch": 0.9175781062340865, | |
| "grad_norm": 1.0767518281936646, | |
| "learning_rate": 7.273190901181783e-06, | |
| "loss": 1.8054, | |
| "step": 57300 | |
| }, | |
| { | |
| "epoch": 0.9191794641856295, | |
| "grad_norm": 0.8242263197898865, | |
| "learning_rate": 7.032082113900434e-06, | |
| "loss": 1.8337, | |
| "step": 57400 | |
| }, | |
| { | |
| "epoch": 0.9207808221371723, | |
| "grad_norm": 0.7926039695739746, | |
| "learning_rate": 6.792850929974142e-06, | |
| "loss": 1.8144, | |
| "step": 57500 | |
| }, | |
| { | |
| "epoch": 0.9223821800887152, | |
| "grad_norm": 0.7732511162757874, | |
| "learning_rate": 6.55564869498956e-06, | |
| "loss": 1.804, | |
| "step": 57600 | |
| }, | |
| { | |
| "epoch": 0.9239835380402581, | |
| "grad_norm": 0.7959622144699097, | |
| "learning_rate": 6.32062547095288e-06, | |
| "loss": 1.8222, | |
| "step": 57700 | |
| }, | |
| { | |
| "epoch": 0.925584895991801, | |
| "grad_norm": 0.8663679957389832, | |
| "learning_rate": 6.087929941355671e-06, | |
| "loss": 1.8496, | |
| "step": 57800 | |
| }, | |
| { | |
| "epoch": 0.927186253943344, | |
| "grad_norm": 0.7793252468109131, | |
| "learning_rate": 5.857709317112736e-06, | |
| "loss": 1.8177, | |
| "step": 57900 | |
| }, | |
| { | |
| "epoch": 0.9287876118948869, | |
| "grad_norm": 0.9085448980331421, | |
| "learning_rate": 5.630109243431608e-06, | |
| "loss": 1.8193, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 0.9303889698464298, | |
| "grad_norm": 0.7569569945335388, | |
| "learning_rate": 5.4052737076725824e-06, | |
| "loss": 1.8196, | |
| "step": 58100 | |
| }, | |
| { | |
| "epoch": 0.9319903277979726, | |
| "grad_norm": 0.8424269556999207, | |
| "learning_rate": 5.1833449482574895e-06, | |
| "loss": 1.835, | |
| "step": 58200 | |
| }, | |
| { | |
| "epoch": 0.9335916857495156, | |
| "grad_norm": 0.8512621521949768, | |
| "learning_rate": 4.964463364685001e-06, | |
| "loss": 1.8145, | |
| "step": 58300 | |
| }, | |
| { | |
| "epoch": 0.9351930437010585, | |
| "grad_norm": 1.0519986152648926, | |
| "learning_rate": 4.748767428709187e-06, | |
| "loss": 1.8213, | |
| "step": 58400 | |
| }, | |
| { | |
| "epoch": 0.9367944016526014, | |
| "grad_norm": 0.7896735072135925, | |
| "learning_rate": 4.536393596737752e-06, | |
| "loss": 1.8243, | |
| "step": 58500 | |
| }, | |
| { | |
| "epoch": 0.9383957596041443, | |
| "grad_norm": 1.0739407539367676, | |
| "learning_rate": 4.327476223505136e-06, | |
| "loss": 1.832, | |
| "step": 58600 | |
| }, | |
| { | |
| "epoch": 0.9399971175556873, | |
| "grad_norm": 0.8374795913696289, | |
| "learning_rate": 4.12214747707527e-06, | |
| "loss": 1.8338, | |
| "step": 58700 | |
| }, | |
| { | |
| "epoch": 0.9415984755072301, | |
| "grad_norm": 1.0221420526504517, | |
| "learning_rate": 3.920537255227669e-06, | |
| "loss": 1.8101, | |
| "step": 58800 | |
| }, | |
| { | |
| "epoch": 0.943199833458773, | |
| "grad_norm": 0.8421764969825745, | |
| "learning_rate": 3.7227731032797853e-06, | |
| "loss": 1.8329, | |
| "step": 58900 | |
| }, | |
| { | |
| "epoch": 0.9448011914103159, | |
| "grad_norm": 0.7701355814933777, | |
| "learning_rate": 3.5289801333976102e-06, | |
| "loss": 1.8216, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 0.9464025493618589, | |
| "grad_norm": 0.7741368412971497, | |
| "learning_rate": 3.339280945445559e-06, | |
| "loss": 1.8272, | |
| "step": 59100 | |
| }, | |
| { | |
| "epoch": 0.9480039073134018, | |
| "grad_norm": 1.7360873222351074, | |
| "learning_rate": 3.1537955494257345e-06, | |
| "loss": 1.8372, | |
| "step": 59200 | |
| }, | |
| { | |
| "epoch": 0.9496052652649447, | |
| "grad_norm": 0.7760699987411499, | |
| "learning_rate": 2.972641289555616e-06, | |
| "loss": 1.8182, | |
| "step": 59300 | |
| }, | |
| { | |
| "epoch": 0.9512066232164876, | |
| "grad_norm": 0.7646809220314026, | |
| "learning_rate": 2.7959327700322036e-06, | |
| "loss": 1.8084, | |
| "step": 59400 | |
| }, | |
| { | |
| "epoch": 0.9528079811680304, | |
| "grad_norm": 0.9442381858825684, | |
| "learning_rate": 2.623781782529625e-06, | |
| "loss": 1.8239, | |
| "step": 59500 | |
| }, | |
| { | |
| "epoch": 0.9544093391195734, | |
| "grad_norm": 0.8009527325630188, | |
| "learning_rate": 2.4562972354759698e-06, | |
| "loss": 1.8272, | |
| "step": 59600 | |
| }, | |
| { | |
| "epoch": 0.9560106970711163, | |
| "grad_norm": 0.7591850757598877, | |
| "learning_rate": 2.293585085154252e-06, | |
| "loss": 1.8314, | |
| "step": 59700 | |
| }, | |
| { | |
| "epoch": 0.9576120550226592, | |
| "grad_norm": 0.7954255938529968, | |
| "learning_rate": 2.135748268670902e-06, | |
| "loss": 1.8341, | |
| "step": 59800 | |
| }, | |
| { | |
| "epoch": 0.9592134129742022, | |
| "grad_norm": 1.0002678632736206, | |
| "learning_rate": 1.9828866388343814e-06, | |
| "loss": 1.8075, | |
| "step": 59900 | |
| }, | |
| { | |
| "epoch": 0.9608147709257451, | |
| "grad_norm": 0.7856830954551697, | |
| "learning_rate": 1.8350969009849483e-06, | |
| "loss": 1.8005, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 0.9624161288772879, | |
| "grad_norm": 0.9126999378204346, | |
| "learning_rate": 1.6924725518156637e-06, | |
| "loss": 1.8277, | |
| "step": 60100 | |
| }, | |
| { | |
| "epoch": 0.9640174868288308, | |
| "grad_norm": 0.8106286525726318, | |
| "learning_rate": 1.5551038202232805e-06, | |
| "loss": 1.8108, | |
| "step": 60200 | |
| }, | |
| { | |
| "epoch": 0.9656188447803737, | |
| "grad_norm": 1.359531044960022, | |
| "learning_rate": 1.4230776102264454e-06, | |
| "loss": 1.8475, | |
| "step": 60300 | |
| }, | |
| { | |
| "epoch": 0.9672202027319167, | |
| "grad_norm": 0.7704586386680603, | |
| "learning_rate": 1.2964774459873364e-06, | |
| "loss": 1.8482, | |
| "step": 60400 | |
| }, | |
| { | |
| "epoch": 0.9688215606834596, | |
| "grad_norm": 0.7488996982574463, | |
| "learning_rate": 1.1753834189715019e-06, | |
| "loss": 1.8115, | |
| "step": 60500 | |
| }, | |
| { | |
| "epoch": 0.9704229186350025, | |
| "grad_norm": 1.662976861000061, | |
| "learning_rate": 1.059872137279342e-06, | |
| "loss": 1.8391, | |
| "step": 60600 | |
| }, | |
| { | |
| "epoch": 0.9720242765865454, | |
| "grad_norm": 1.0111815929412842, | |
| "learning_rate": 9.500166771812902e-07, | |
| "loss": 1.8161, | |
| "step": 60700 | |
| }, | |
| { | |
| "epoch": 0.9736256345380883, | |
| "grad_norm": 0.7973281145095825, | |
| "learning_rate": 8.458865368873204e-07, | |
| "loss": 1.8219, | |
| "step": 60800 | |
| }, | |
| { | |
| "epoch": 0.9752269924896312, | |
| "grad_norm": 0.8591629266738892, | |
| "learning_rate": 7.475475925800968e-07, | |
| "loss": 1.8399, | |
| "step": 60900 | |
| }, | |
| { | |
| "epoch": 0.9768283504411741, | |
| "grad_norm": 0.9209094047546387, | |
| "learning_rate": 6.550620567394883e-07, | |
| "loss": 1.8319, | |
| "step": 61000 | |
| }, | |
| { | |
| "epoch": 0.978429708392717, | |
| "grad_norm": 0.916976273059845, | |
| "learning_rate": 5.684884387849176e-07, | |
| "loss": 1.8189, | |
| "step": 61100 | |
| }, | |
| { | |
| "epoch": 0.98003106634426, | |
| "grad_norm": 0.950470507144928, | |
| "learning_rate": 4.878815080603372e-07, | |
| "loss": 1.8052, | |
| "step": 61200 | |
| }, | |
| { | |
| "epoch": 0.9816324242958029, | |
| "grad_norm": 0.7501734495162964, | |
| "learning_rate": 4.1329225918533277e-07, | |
| "loss": 1.8419, | |
| "step": 61300 | |
| }, | |
| { | |
| "epoch": 0.9832337822473457, | |
| "grad_norm": 0.8855769038200378, | |
| "learning_rate": 3.447678797942389e-07, | |
| "loss": 1.8168, | |
| "step": 61400 | |
| }, | |
| { | |
| "epoch": 0.9848351401988886, | |
| "grad_norm": 0.9513728618621826, | |
| "learning_rate": 2.823517206836701e-07, | |
| "loss": 1.8219, | |
| "step": 61500 | |
| }, | |
| { | |
| "epoch": 0.9864364981504316, | |
| "grad_norm": 0.9888412952423096, | |
| "learning_rate": 2.2608326838736817e-07, | |
| "loss": 1.8183, | |
| "step": 61600 | |
| }, | |
| { | |
| "epoch": 0.9880378561019745, | |
| "grad_norm": 0.8009938597679138, | |
| "learning_rate": 1.7599812019571395e-07, | |
| "loss": 1.8027, | |
| "step": 61700 | |
| }, | |
| { | |
| "epoch": 0.9896392140535174, | |
| "grad_norm": 0.9275427460670471, | |
| "learning_rate": 1.321279616356963e-07, | |
| "loss": 1.8145, | |
| "step": 61800 | |
| }, | |
| { | |
| "epoch": 0.9912405720050603, | |
| "grad_norm": 0.7663293480873108, | |
| "learning_rate": 9.450054642560102e-08, | |
| "loss": 1.8332, | |
| "step": 61900 | |
| }, | |
| { | |
| "epoch": 0.9928419299566033, | |
| "grad_norm": 0.7306997776031494, | |
| "learning_rate": 6.313967891707906e-08, | |
| "loss": 1.8059, | |
| "step": 62000 | |
| }, | |
| { | |
| "epoch": 0.9944432879081461, | |
| "grad_norm": 0.8004014492034912, | |
| "learning_rate": 3.806519903573502e-08, | |
| "loss": 1.8347, | |
| "step": 62100 | |
| }, | |
| { | |
| "epoch": 0.996044645859689, | |
| "grad_norm": 0.7328791618347168, | |
| "learning_rate": 1.9292969729719502e-08, | |
| "loss": 1.8156, | |
| "step": 62200 | |
| }, | |
| { | |
| "epoch": 0.9976460038112319, | |
| "grad_norm": 0.8255366086959839, | |
| "learning_rate": 6.834866934314344e-09, | |
| "loss": 1.8029, | |
| "step": 62300 | |
| }, | |
| { | |
| "epoch": 0.9992473617627748, | |
| "grad_norm": 0.8802406787872314, | |
| "learning_rate": 6.987720588080837e-10, | |
| "loss": 1.8173, | |
| "step": 62400 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "step": 62447, | |
| "total_flos": 7.631778497299481e+18, | |
| "train_loss": 2.1485319636500972, | |
| "train_runtime": 14119.5859, | |
| "train_samples_per_second": 35.382, | |
| "train_steps_per_second": 4.423 | |
| } | |
| ], | |
| "logging_steps": 100, | |
| "max_steps": 62447, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": false, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 7.631778497299481e+18, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |