| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.0, | |
| "eval_steps": 500, | |
| "global_step": 2148, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.013976240391334731, | |
| "grad_norm": 7.36542272567749, | |
| "learning_rate": 4.5e-05, | |
| "loss": 3.7207, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.027952480782669462, | |
| "grad_norm": 2.479189872741699, | |
| "learning_rate": 4.9997813884789515e-05, | |
| "loss": 1.5402, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.041928721174004195, | |
| "grad_norm": 1.2499618530273438, | |
| "learning_rate": 4.999025743417125e-05, | |
| "loss": 1.6544, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.055904961565338925, | |
| "grad_norm": 1.4645347595214844, | |
| "learning_rate": 4.997730529020747e-05, | |
| "loss": 1.1744, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.06988120195667366, | |
| "grad_norm": 1.411839246749878, | |
| "learning_rate": 4.9958960249419854e-05, | |
| "loss": 1.4873, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.08385744234800839, | |
| "grad_norm": 1.320661187171936, | |
| "learning_rate": 4.993522627272057e-05, | |
| "loss": 1.5195, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.09783368273934312, | |
| "grad_norm": 1.4731823205947876, | |
| "learning_rate": 4.9906108484557024e-05, | |
| "loss": 1.7024, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.11180992313067785, | |
| "grad_norm": 1.086996078491211, | |
| "learning_rate": 4.987161317180547e-05, | |
| "loss": 1.4094, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.12578616352201258, | |
| "grad_norm": 0.963653564453125, | |
| "learning_rate": 4.983174778241357e-05, | |
| "loss": 1.4754, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.13976240391334732, | |
| "grad_norm": 1.3541700839996338, | |
| "learning_rate": 4.978652092379231e-05, | |
| "loss": 1.5034, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.15373864430468204, | |
| "grad_norm": 0.8499814867973328, | |
| "learning_rate": 4.9735942360957535e-05, | |
| "loss": 1.1504, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.16771488469601678, | |
| "grad_norm": 1.2124848365783691, | |
| "learning_rate": 4.9680023014421605e-05, | |
| "loss": 1.3261, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.1816911250873515, | |
| "grad_norm": 1.1937811374664307, | |
| "learning_rate": 4.9618774957835484e-05, | |
| "loss": 1.3726, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.19566736547868624, | |
| "grad_norm": 1.1995941400527954, | |
| "learning_rate": 4.9552211415381935e-05, | |
| "loss": 1.2336, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.20964360587002095, | |
| "grad_norm": 1.1256550550460815, | |
| "learning_rate": 4.9480346758920217e-05, | |
| "loss": 1.401, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.2236198462613557, | |
| "grad_norm": 1.2055258750915527, | |
| "learning_rate": 4.9403196504883086e-05, | |
| "loss": 1.268, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.2375960866526904, | |
| "grad_norm": 1.1542116403579712, | |
| "learning_rate": 4.932077731092656e-05, | |
| "loss": 1.2265, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.25157232704402516, | |
| "grad_norm": 1.2236621379852295, | |
| "learning_rate": 4.923310697233336e-05, | |
| "loss": 1.3758, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.2655485674353599, | |
| "grad_norm": 1.2633250951766968, | |
| "learning_rate": 4.9140204418170705e-05, | |
| "loss": 1.4731, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.27952480782669464, | |
| "grad_norm": 1.06014883518219, | |
| "learning_rate": 4.904208970720327e-05, | |
| "loss": 1.3239, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.29350104821802936, | |
| "grad_norm": 1.3706936836242676, | |
| "learning_rate": 4.893878402356229e-05, | |
| "loss": 1.1694, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.3074772886093641, | |
| "grad_norm": 1.0831369161605835, | |
| "learning_rate": 4.8830309672171646e-05, | |
| "loss": 1.2387, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.3214535290006988, | |
| "grad_norm": 1.2866735458374023, | |
| "learning_rate": 4.871669007393197e-05, | |
| "loss": 1.1891, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.33542976939203356, | |
| "grad_norm": 1.4383916854858398, | |
| "learning_rate": 4.859794976066377e-05, | |
| "loss": 1.2125, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.3494060097833683, | |
| "grad_norm": 1.179437518119812, | |
| "learning_rate": 4.847411436981075e-05, | |
| "loss": 1.3652, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.363382250174703, | |
| "grad_norm": 1.160443902015686, | |
| "learning_rate": 4.8345210638904396e-05, | |
| "loss": 1.3129, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.37735849056603776, | |
| "grad_norm": 2.0713257789611816, | |
| "learning_rate": 4.821126639979094e-05, | |
| "loss": 1.3108, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.3913347309573725, | |
| "grad_norm": 1.2409151792526245, | |
| "learning_rate": 4.807231057262225e-05, | |
| "loss": 1.1604, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.4053109713487072, | |
| "grad_norm": 1.328251600265503, | |
| "learning_rate": 4.792837315961154e-05, | |
| "loss": 1.3142, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.4192872117400419, | |
| "grad_norm": 1.1564041376113892, | |
| "learning_rate": 4.777948523855557e-05, | |
| "loss": 1.2178, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.4332634521313767, | |
| "grad_norm": 1.174281358718872, | |
| "learning_rate": 4.7625678956124584e-05, | |
| "loss": 1.2944, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.4472396925227114, | |
| "grad_norm": 1.2078189849853516, | |
| "learning_rate": 4.7466987520921437e-05, | |
| "loss": 1.2028, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.4612159329140461, | |
| "grad_norm": 1.1163133382797241, | |
| "learning_rate": 4.730344519631149e-05, | |
| "loss": 1.2439, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.4751921733053808, | |
| "grad_norm": 1.1989187002182007, | |
| "learning_rate": 4.713508729302474e-05, | |
| "loss": 1.2537, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.4891684136967156, | |
| "grad_norm": 1.1516531705856323, | |
| "learning_rate": 4.6961950161531784e-05, | |
| "loss": 1.2472, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.5031446540880503, | |
| "grad_norm": 1.3662034273147583, | |
| "learning_rate": 4.678407118419538e-05, | |
| "loss": 1.3128, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.517120894479385, | |
| "grad_norm": 1.1363831758499146, | |
| "learning_rate": 4.6601488767199074e-05, | |
| "loss": 1.2577, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.5310971348707197, | |
| "grad_norm": 1.411293864250183, | |
| "learning_rate": 4.641424233225491e-05, | |
| "loss": 1.3468, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.5450733752620545, | |
| "grad_norm": 1.1499321460723877, | |
| "learning_rate": 4.6222372308091785e-05, | |
| "loss": 1.2135, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.5590496156533893, | |
| "grad_norm": 1.371730089187622, | |
| "learning_rate": 4.602592012172644e-05, | |
| "loss": 1.1668, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.573025856044724, | |
| "grad_norm": 1.168100118637085, | |
| "learning_rate": 4.582492818951881e-05, | |
| "loss": 1.2127, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.5870020964360587, | |
| "grad_norm": 1.2999486923217773, | |
| "learning_rate": 4.56194399080139e-05, | |
| "loss": 1.2012, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.6009783368273934, | |
| "grad_norm": 1.2116057872772217, | |
| "learning_rate": 4.54094996445719e-05, | |
| "loss": 1.2952, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.6149545772187281, | |
| "grad_norm": 1.2093985080718994, | |
| "learning_rate": 4.519515272778878e-05, | |
| "loss": 1.3045, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.6289308176100629, | |
| "grad_norm": 1.236147403717041, | |
| "learning_rate": 4.4976445437709305e-05, | |
| "loss": 1.1097, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.6429070580013976, | |
| "grad_norm": 1.2068607807159424, | |
| "learning_rate": 4.4753424995834596e-05, | |
| "loss": 1.1982, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.6568832983927324, | |
| "grad_norm": 1.2430453300476074, | |
| "learning_rate": 4.452613955492649e-05, | |
| "loss": 1.2413, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.6708595387840671, | |
| "grad_norm": 1.3011972904205322, | |
| "learning_rate": 4.4294638188610736e-05, | |
| "loss": 1.2005, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.6848357791754018, | |
| "grad_norm": 1.0899052619934082, | |
| "learning_rate": 4.405897088078147e-05, | |
| "loss": 1.2271, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.6988120195667366, | |
| "grad_norm": 1.1827476024627686, | |
| "learning_rate": 4.3819188514809044e-05, | |
| "loss": 1.242, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.7127882599580713, | |
| "grad_norm": 1.1827216148376465, | |
| "learning_rate": 4.3575342862553766e-05, | |
| "loss": 1.0454, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.726764500349406, | |
| "grad_norm": 1.2986303567886353, | |
| "learning_rate": 4.332748657318767e-05, | |
| "loss": 1.0946, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.7407407407407407, | |
| "grad_norm": 1.2949241399765015, | |
| "learning_rate": 4.307567316182699e-05, | |
| "loss": 1.2538, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.7547169811320755, | |
| "grad_norm": 1.256659984588623, | |
| "learning_rate": 4.2819956997977586e-05, | |
| "loss": 1.3298, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.7686932215234102, | |
| "grad_norm": 1.4315185546875, | |
| "learning_rate": 4.2560393293795926e-05, | |
| "loss": 1.2817, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.782669461914745, | |
| "grad_norm": 1.1938406229019165, | |
| "learning_rate": 4.229703809216812e-05, | |
| "loss": 0.9553, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.7966457023060797, | |
| "grad_norm": 1.1752980947494507, | |
| "learning_rate": 4.2029948254609613e-05, | |
| "loss": 1.0532, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.8106219426974144, | |
| "grad_norm": 1.4171777963638306, | |
| "learning_rate": 4.17591814489881e-05, | |
| "loss": 1.1731, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.8245981830887491, | |
| "grad_norm": 1.2295438051223755, | |
| "learning_rate": 4.1484796137072315e-05, | |
| "loss": 1.2059, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.8385744234800838, | |
| "grad_norm": 1.2784441709518433, | |
| "learning_rate": 4.120685156190952e-05, | |
| "loss": 1.3045, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.8525506638714185, | |
| "grad_norm": 1.3020128011703491, | |
| "learning_rate": 4.0925407735034136e-05, | |
| "loss": 1.2221, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.8665269042627534, | |
| "grad_norm": 1.2527514696121216, | |
| "learning_rate": 4.0640525423510605e-05, | |
| "loss": 1.2331, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.8805031446540881, | |
| "grad_norm": 1.2841899394989014, | |
| "learning_rate": 4.035226613681303e-05, | |
| "loss": 1.1167, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.8944793850454228, | |
| "grad_norm": 1.254504680633545, | |
| "learning_rate": 4.006069211354457e-05, | |
| "loss": 1.2525, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.9084556254367575, | |
| "grad_norm": 1.295681357383728, | |
| "learning_rate": 3.976586630799935e-05, | |
| "loss": 1.1356, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.9224318658280922, | |
| "grad_norm": 1.0384072065353394, | |
| "learning_rate": 3.946785237656992e-05, | |
| "loss": 1.1792, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.9364081062194269, | |
| "grad_norm": 1.1121629476547241, | |
| "learning_rate": 3.916671466400307e-05, | |
| "loss": 1.209, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.9503843466107617, | |
| "grad_norm": 1.432450532913208, | |
| "learning_rate": 3.886251818950702e-05, | |
| "loss": 1.221, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.9643605870020965, | |
| "grad_norm": 1.472678542137146, | |
| "learning_rate": 3.855532863271302e-05, | |
| "loss": 1.2537, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.9783368273934312, | |
| "grad_norm": 1.285548448562622, | |
| "learning_rate": 3.8245212319494354e-05, | |
| "loss": 1.2792, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.9923130677847659, | |
| "grad_norm": 1.2394496202468872, | |
| "learning_rate": 3.793223620764573e-05, | |
| "loss": 1.1743, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 1.0055904961565338, | |
| "grad_norm": 1.683555006980896, | |
| "learning_rate": 3.7616467872426376e-05, | |
| "loss": 1.0054, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 1.0195667365478687, | |
| "grad_norm": 1.3049145936965942, | |
| "learning_rate": 3.7297975491969684e-05, | |
| "loss": 0.9015, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 1.0335429769392033, | |
| "grad_norm": 1.439926028251648, | |
| "learning_rate": 3.697682783256278e-05, | |
| "loss": 0.9361, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 1.047519217330538, | |
| "grad_norm": 1.7054588794708252, | |
| "learning_rate": 3.665309423379904e-05, | |
| "loss": 0.9668, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 1.0614954577218727, | |
| "grad_norm": 1.674256682395935, | |
| "learning_rate": 3.632684459360685e-05, | |
| "loss": 0.9566, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 1.0754716981132075, | |
| "grad_norm": 1.1890208721160889, | |
| "learning_rate": 3.5998149353157815e-05, | |
| "loss": 0.8597, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 1.0894479385045424, | |
| "grad_norm": 1.6892008781433105, | |
| "learning_rate": 3.56670794816577e-05, | |
| "loss": 0.8231, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 1.103424178895877, | |
| "grad_norm": 1.4274325370788574, | |
| "learning_rate": 3.5333706461023275e-05, | |
| "loss": 0.8254, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 1.1174004192872118, | |
| "grad_norm": 1.9769346714019775, | |
| "learning_rate": 3.4998102270448606e-05, | |
| "loss": 0.848, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.1313766596785464, | |
| "grad_norm": 1.1409764289855957, | |
| "learning_rate": 3.466033937086381e-05, | |
| "loss": 0.7997, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 1.1453529000698812, | |
| "grad_norm": 1.563937783241272, | |
| "learning_rate": 3.432049068928994e-05, | |
| "loss": 0.824, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 1.159329140461216, | |
| "grad_norm": 1.51837158203125, | |
| "learning_rate": 3.39786296030931e-05, | |
| "loss": 0.9618, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 1.1733053808525507, | |
| "grad_norm": 1.7045314311981201, | |
| "learning_rate": 3.363482992414152e-05, | |
| "loss": 0.7897, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 1.1872816212438855, | |
| "grad_norm": 1.5531268119812012, | |
| "learning_rate": 3.328916588286858e-05, | |
| "loss": 0.8622, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 1.20125786163522, | |
| "grad_norm": 1.0864923000335693, | |
| "learning_rate": 3.2941712112245624e-05, | |
| "loss": 0.8361, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 1.215234102026555, | |
| "grad_norm": 1.7823400497436523, | |
| "learning_rate": 3.259254363166785e-05, | |
| "loss": 0.7441, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 1.2292103424178895, | |
| "grad_norm": 1.6787755489349365, | |
| "learning_rate": 3.2241735830756656e-05, | |
| "loss": 0.8565, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 1.2431865828092243, | |
| "grad_norm": 1.9149354696273804, | |
| "learning_rate": 3.188936445308221e-05, | |
| "loss": 0.9051, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 1.257162823200559, | |
| "grad_norm": 1.828198790550232, | |
| "learning_rate": 3.153550557980943e-05, | |
| "loss": 0.8709, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.2711390635918938, | |
| "grad_norm": 1.6430469751358032, | |
| "learning_rate": 3.118023561327123e-05, | |
| "loss": 0.8763, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 1.2851153039832286, | |
| "grad_norm": 1.5574500560760498, | |
| "learning_rate": 3.08236312604723e-05, | |
| "loss": 0.9294, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 1.2990915443745632, | |
| "grad_norm": 1.695155382156372, | |
| "learning_rate": 3.0465769516527163e-05, | |
| "loss": 0.8626, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 1.313067784765898, | |
| "grad_norm": 1.6467136144638062, | |
| "learning_rate": 3.010672764803606e-05, | |
| "loss": 0.8066, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 1.3270440251572326, | |
| "grad_norm": 1.6641095876693726, | |
| "learning_rate": 2.9746583176402083e-05, | |
| "loss": 0.8388, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 1.3410202655485675, | |
| "grad_norm": 1.8467156887054443, | |
| "learning_rate": 2.9385413861093474e-05, | |
| "loss": 0.85, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 1.3549965059399023, | |
| "grad_norm": 1.6968249082565308, | |
| "learning_rate": 2.9023297682854383e-05, | |
| "loss": 0.8725, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 1.368972746331237, | |
| "grad_norm": 1.8281203508377075, | |
| "learning_rate": 2.866031282686791e-05, | |
| "loss": 0.9046, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 1.3829489867225715, | |
| "grad_norm": 1.5624685287475586, | |
| "learning_rate": 2.829653766587499e-05, | |
| "loss": 0.8172, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 1.3969252271139063, | |
| "grad_norm": 1.5298397541046143, | |
| "learning_rate": 2.793205074325282e-05, | |
| "loss": 0.9824, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.4109014675052411, | |
| "grad_norm": 1.857814908027649, | |
| "learning_rate": 2.756693075605634e-05, | |
| "loss": 0.8879, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 1.4248777078965758, | |
| "grad_norm": 1.6862074136734009, | |
| "learning_rate": 2.7201256538026698e-05, | |
| "loss": 0.8606, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 1.4388539482879106, | |
| "grad_norm": 1.5606305599212646, | |
| "learning_rate": 2.683510704257003e-05, | |
| "loss": 0.9077, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 1.4528301886792452, | |
| "grad_norm": 1.7978451251983643, | |
| "learning_rate": 2.6468561325710527e-05, | |
| "loss": 0.9191, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 1.46680642907058, | |
| "grad_norm": 1.6466025114059448, | |
| "learning_rate": 2.6101698529021267e-05, | |
| "loss": 0.8355, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 1.4807826694619148, | |
| "grad_norm": 1.6290067434310913, | |
| "learning_rate": 2.5734597862536653e-05, | |
| "loss": 1.0066, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 1.4947589098532494, | |
| "grad_norm": 1.7666162252426147, | |
| "learning_rate": 2.536733858764998e-05, | |
| "loss": 0.8495, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 1.508735150244584, | |
| "grad_norm": 1.616740107536316, | |
| "learning_rate": 2.5e-05, | |
| "loss": 0.8858, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 1.5227113906359189, | |
| "grad_norm": 2.093693733215332, | |
| "learning_rate": 2.4632661412350027e-05, | |
| "loss": 0.8448, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 1.5366876310272537, | |
| "grad_norm": 2.0865767002105713, | |
| "learning_rate": 2.4265402137463356e-05, | |
| "loss": 0.7736, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 1.5506638714185885, | |
| "grad_norm": 1.4208672046661377, | |
| "learning_rate": 2.389830147097874e-05, | |
| "loss": 0.8127, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 1.5646401118099231, | |
| "grad_norm": 1.1758469343185425, | |
| "learning_rate": 2.3531438674289485e-05, | |
| "loss": 0.8384, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 1.5786163522012577, | |
| "grad_norm": 1.971389651298523, | |
| "learning_rate": 2.316489295742997e-05, | |
| "loss": 0.8234, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 1.5925925925925926, | |
| "grad_norm": 1.6852281093597412, | |
| "learning_rate": 2.2798743461973308e-05, | |
| "loss": 0.8124, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 1.6065688329839274, | |
| "grad_norm": 1.8564770221710205, | |
| "learning_rate": 2.2433069243943665e-05, | |
| "loss": 0.9065, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 1.6205450733752622, | |
| "grad_norm": 1.841373324394226, | |
| "learning_rate": 2.2067949256747186e-05, | |
| "loss": 0.8598, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 1.6345213137665968, | |
| "grad_norm": 1.6030656099319458, | |
| "learning_rate": 2.1703462334125013e-05, | |
| "loss": 0.7746, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 1.6484975541579314, | |
| "grad_norm": 1.712646484375, | |
| "learning_rate": 2.1339687173132104e-05, | |
| "loss": 0.8264, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 1.6624737945492662, | |
| "grad_norm": 2.1567349433898926, | |
| "learning_rate": 2.0976702317145623e-05, | |
| "loss": 0.839, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 1.676450034940601, | |
| "grad_norm": 2.055509567260742, | |
| "learning_rate": 2.061458613890653e-05, | |
| "loss": 0.7234, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 1.6904262753319357, | |
| "grad_norm": 1.855790615081787, | |
| "learning_rate": 2.0253416823597926e-05, | |
| "loss": 0.7455, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 1.7044025157232703, | |
| "grad_norm": 1.8636531829833984, | |
| "learning_rate": 1.9893272351963946e-05, | |
| "loss": 0.7849, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 1.718378756114605, | |
| "grad_norm": 1.9092494249343872, | |
| "learning_rate": 1.953423048347284e-05, | |
| "loss": 0.7652, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 1.73235499650594, | |
| "grad_norm": 1.8068718910217285, | |
| "learning_rate": 1.9176368739527706e-05, | |
| "loss": 0.932, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 1.7463312368972748, | |
| "grad_norm": 2.015453338623047, | |
| "learning_rate": 1.8819764386728773e-05, | |
| "loss": 0.7782, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 1.7603074772886094, | |
| "grad_norm": 1.833056092262268, | |
| "learning_rate": 1.8464494420190574e-05, | |
| "loss": 0.8057, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 1.774283717679944, | |
| "grad_norm": 2.147026538848877, | |
| "learning_rate": 1.811063554691779e-05, | |
| "loss": 0.7086, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 1.7882599580712788, | |
| "grad_norm": 1.8407797813415527, | |
| "learning_rate": 1.775826416924335e-05, | |
| "loss": 0.731, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 1.8022361984626136, | |
| "grad_norm": 2.3300631046295166, | |
| "learning_rate": 1.740745636833216e-05, | |
| "loss": 0.6734, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 1.8162124388539485, | |
| "grad_norm": 2.4620823860168457, | |
| "learning_rate": 1.7058287887754375e-05, | |
| "loss": 0.7447, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 1.830188679245283, | |
| "grad_norm": 1.8684221506118774, | |
| "learning_rate": 1.671083411713143e-05, | |
| "loss": 0.7376, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 1.8441649196366177, | |
| "grad_norm": 2.022292375564575, | |
| "learning_rate": 1.6365170075858487e-05, | |
| "loss": 0.6937, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 1.8581411600279525, | |
| "grad_norm": 1.459423542022705, | |
| "learning_rate": 1.60213703969069e-05, | |
| "loss": 0.7536, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 1.8721174004192873, | |
| "grad_norm": 2.1186535358428955, | |
| "learning_rate": 1.567950931071007e-05, | |
| "loss": 0.7683, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 1.886093640810622, | |
| "grad_norm": 2.1873745918273926, | |
| "learning_rate": 1.5339660629136194e-05, | |
| "loss": 0.7859, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 1.9000698812019565, | |
| "grad_norm": 1.9905816316604614, | |
| "learning_rate": 1.5001897729551393e-05, | |
| "loss": 0.7714, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 1.9140461215932913, | |
| "grad_norm": 1.9993782043457031, | |
| "learning_rate": 1.4666293538976727e-05, | |
| "loss": 0.7884, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 1.9280223619846262, | |
| "grad_norm": 2.1728382110595703, | |
| "learning_rate": 1.4332920518342316e-05, | |
| "loss": 0.7586, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 1.941998602375961, | |
| "grad_norm": 1.4806022644042969, | |
| "learning_rate": 1.4001850646842191e-05, | |
| "loss": 0.6538, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 1.9559748427672956, | |
| "grad_norm": 1.89388906955719, | |
| "learning_rate": 1.367315540639315e-05, | |
| "loss": 0.7698, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 1.9699510831586302, | |
| "grad_norm": 2.3090474605560303, | |
| "learning_rate": 1.3346905766200962e-05, | |
| "loss": 0.7763, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 1.983927323549965, | |
| "grad_norm": 1.79945707321167, | |
| "learning_rate": 1.3023172167437213e-05, | |
| "loss": 0.7061, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 1.9979035639412999, | |
| "grad_norm": 2.0168325901031494, | |
| "learning_rate": 1.270202450803032e-05, | |
| "loss": 0.7628, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 2.0111809923130677, | |
| "grad_norm": 1.8525409698486328, | |
| "learning_rate": 1.2383532127573638e-05, | |
| "loss": 0.5339, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 2.0251572327044025, | |
| "grad_norm": 3.1745312213897705, | |
| "learning_rate": 1.2067763792354277e-05, | |
| "loss": 0.5761, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 2.0391334730957373, | |
| "grad_norm": 2.129549741744995, | |
| "learning_rate": 1.1754787680505657e-05, | |
| "loss": 0.5323, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 2.053109713487072, | |
| "grad_norm": 1.1728535890579224, | |
| "learning_rate": 1.1444671367286987e-05, | |
| "loss": 0.5582, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 2.0670859538784065, | |
| "grad_norm": 1.981683611869812, | |
| "learning_rate": 1.1137481810492989e-05, | |
| "loss": 0.5555, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 2.0810621942697414, | |
| "grad_norm": 2.0317366123199463, | |
| "learning_rate": 1.0833285335996934e-05, | |
| "loss": 0.5624, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 2.095038434661076, | |
| "grad_norm": 2.0169522762298584, | |
| "learning_rate": 1.0532147623430085e-05, | |
| "loss": 0.4676, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 2.109014675052411, | |
| "grad_norm": 2.0225350856781006, | |
| "learning_rate": 1.0234133692000652e-05, | |
| "loss": 0.5178, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 2.1229909154437454, | |
| "grad_norm": 1.7321361303329468, | |
| "learning_rate": 9.939307886455435e-06, | |
| "loss": 0.5373, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 2.1369671558350802, | |
| "grad_norm": 1.4348721504211426, | |
| "learning_rate": 9.647733863186966e-06, | |
| "loss": 0.4503, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 2.150943396226415, | |
| "grad_norm": 2.057035446166992, | |
| "learning_rate": 9.359474576489399e-06, | |
| "loss": 0.5713, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 2.16491963661775, | |
| "grad_norm": 2.8267476558685303, | |
| "learning_rate": 9.074592264965873e-06, | |
| "loss": 0.5322, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 2.1788958770090847, | |
| "grad_norm": 3.4240221977233887, | |
| "learning_rate": 8.793148438090484e-06, | |
| "loss": 0.535, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 2.192872117400419, | |
| "grad_norm": 2.6496121883392334, | |
| "learning_rate": 8.515203862927687e-06, | |
| "loss": 0.4921, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 2.206848357791754, | |
| "grad_norm": 2.2704765796661377, | |
| "learning_rate": 8.240818551011905e-06, | |
| "loss": 0.5066, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 2.2208245981830887, | |
| "grad_norm": 2.1157729625701904, | |
| "learning_rate": 7.970051745390389e-06, | |
| "loss": 0.4063, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 2.2348008385744236, | |
| "grad_norm": 0.6612274050712585, | |
| "learning_rate": 7.702961907831882e-06, | |
| "loss": 0.4551, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 2.2487770789657584, | |
| "grad_norm": 2.5119524002075195, | |
| "learning_rate": 7.439606706204083e-06, | |
| "loss": 0.6081, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 2.262753319357093, | |
| "grad_norm": 2.2395474910736084, | |
| "learning_rate": 7.180043002022416e-06, | |
| "loss": 0.4833, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 2.2767295597484276, | |
| "grad_norm": 2.71502947807312, | |
| "learning_rate": 6.9243268381730176e-06, | |
| "loss": 0.5712, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 2.2907058001397624, | |
| "grad_norm": 2.18860125541687, | |
| "learning_rate": 6.6725134268123404e-06, | |
| "loss": 0.5765, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 2.3046820405310973, | |
| "grad_norm": 1.808046817779541, | |
| "learning_rate": 6.424657137446241e-06, | |
| "loss": 0.4617, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 2.318658280922432, | |
| "grad_norm": 2.0803866386413574, | |
| "learning_rate": 6.18081148519096e-06, | |
| "loss": 0.5395, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 2.3326345213137665, | |
| "grad_norm": 2.06593656539917, | |
| "learning_rate": 5.941029119218536e-06, | |
| "loss": 0.5479, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 2.3466107617051013, | |
| "grad_norm": 0.7248632311820984, | |
| "learning_rate": 5.705361811389262e-06, | |
| "loss": 0.5115, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 2.360587002096436, | |
| "grad_norm": 1.9568665027618408, | |
| "learning_rate": 5.473860445073515e-06, | |
| "loss": 0.4513, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 2.374563242487771, | |
| "grad_norm": 1.4630240201950073, | |
| "learning_rate": 5.246575004165408e-06, | |
| "loss": 0.4364, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 2.3885394828791053, | |
| "grad_norm": 1.8359235525131226, | |
| "learning_rate": 5.0235545622907e-06, | |
| "loss": 0.4796, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 2.40251572327044, | |
| "grad_norm": 2.409799575805664, | |
| "learning_rate": 4.804847272211227e-06, | |
| "loss": 0.4301, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 2.416491963661775, | |
| "grad_norm": 2.9206278324127197, | |
| "learning_rate": 4.5905003554281125e-06, | |
| "loss": 0.4912, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 2.43046820405311, | |
| "grad_norm": 2.4633424282073975, | |
| "learning_rate": 4.3805600919861075e-06, | |
| "loss": 0.6244, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 2.4444444444444446, | |
| "grad_norm": 1.9702569246292114, | |
| "learning_rate": 4.175071810481193e-06, | |
| "loss": 0.5056, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 2.458420684835779, | |
| "grad_norm": 2.079681396484375, | |
| "learning_rate": 3.974079878273562e-06, | |
| "loss": 0.4396, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 2.472396925227114, | |
| "grad_norm": 2.3693020343780518, | |
| "learning_rate": 3.777627691908209e-06, | |
| "loss": 0.4821, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 2.4863731656184487, | |
| "grad_norm": 2.4170966148376465, | |
| "learning_rate": 3.5857576677450933e-06, | |
| "loss": 0.5048, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 2.5003494060097835, | |
| "grad_norm": 2.2658681869506836, | |
| "learning_rate": 3.398511232800927e-06, | |
| "loss": 0.5569, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 2.514325646401118, | |
| "grad_norm": 1.8655173778533936, | |
| "learning_rate": 3.2159288158046224e-06, | |
| "loss": 0.516, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 2.5283018867924527, | |
| "grad_norm": 2.5998153686523438, | |
| "learning_rate": 3.0380498384682154e-06, | |
| "loss": 0.4745, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 2.5422781271837875, | |
| "grad_norm": 2.643659830093384, | |
| "learning_rate": 2.8649127069752644e-06, | |
| "loss": 0.5829, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 2.5562543675751224, | |
| "grad_norm": 2.506516933441162, | |
| "learning_rate": 2.696554803688517e-06, | |
| "loss": 0.5319, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 2.570230607966457, | |
| "grad_norm": 2.2960050106048584, | |
| "learning_rate": 2.533012479078575e-06, | |
| "loss": 0.4738, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 2.584206848357792, | |
| "grad_norm": 3.9678826332092285, | |
| "learning_rate": 2.3743210438754243e-06, | |
| "loss": 0.4935, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 2.5981830887491264, | |
| "grad_norm": 2.771167278289795, | |
| "learning_rate": 2.2205147614444312e-06, | |
| "loss": 0.5127, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 2.6121593291404612, | |
| "grad_norm": 3.70699143409729, | |
| "learning_rate": 2.071626840388463e-06, | |
| "loss": 0.4047, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 2.626135569531796, | |
| "grad_norm": 2.3208189010620117, | |
| "learning_rate": 1.9276894273777518e-06, | |
| "loss": 0.5239, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 2.6401118099231304, | |
| "grad_norm": 2.6511785984039307, | |
| "learning_rate": 1.7887336002090639e-06, | |
| "loss": 0.4359, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 2.6540880503144653, | |
| "grad_norm": 1.7029021978378296, | |
| "learning_rate": 1.6547893610956127e-06, | |
| "loss": 0.4334, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 2.6680642907058, | |
| "grad_norm": 2.3018014430999756, | |
| "learning_rate": 1.5258856301892471e-06, | |
| "loss": 0.5468, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 2.682040531097135, | |
| "grad_norm": 2.5188214778900146, | |
| "learning_rate": 1.4020502393362362e-06, | |
| "loss": 0.5802, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 2.6960167714884697, | |
| "grad_norm": 2.3051352500915527, | |
| "learning_rate": 1.2833099260680382e-06, | |
| "loss": 0.5138, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 2.7099930118798046, | |
| "grad_norm": 2.7857372760772705, | |
| "learning_rate": 1.1696903278283543e-06, | |
| "loss": 0.5259, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 2.723969252271139, | |
| "grad_norm": 2.314476251602173, | |
| "learning_rate": 1.06121597643771e-06, | |
| "loss": 0.5131, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 2.737945492662474, | |
| "grad_norm": 2.091313123703003, | |
| "learning_rate": 9.579102927967349e-07, | |
| "loss": 0.4142, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 2.7519217330538086, | |
| "grad_norm": 2.41235089302063, | |
| "learning_rate": 8.59795581829298e-07, | |
| "loss": 0.4547, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 2.765897973445143, | |
| "grad_norm": 2.577587842941284, | |
| "learning_rate": 7.668930276666403e-07, | |
| "loss": 0.5207, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 2.779874213836478, | |
| "grad_norm": 2.426898241043091, | |
| "learning_rate": 6.792226890734444e-07, | |
| "loss": 0.4958, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 2.7938504542278126, | |
| "grad_norm": 1.4604806900024414, | |
| "learning_rate": 5.968034951169155e-07, | |
| "loss": 0.4254, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 2.8078266946191475, | |
| "grad_norm": 3.135892152786255, | |
| "learning_rate": 5.196532410797844e-07, | |
| "loss": 0.4954, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 2.8218029350104823, | |
| "grad_norm": 1.6824243068695068, | |
| "learning_rate": 4.477885846180724e-07, | |
| "loss": 0.585, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 2.835779175401817, | |
| "grad_norm": 2.4747464656829834, | |
| "learning_rate": 3.8122504216451804e-07, | |
| "loss": 0.5324, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 2.8497554157931515, | |
| "grad_norm": 2.6158955097198486, | |
| "learning_rate": 3.1997698557839905e-07, | |
| "loss": 0.5783, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 2.8637316561844863, | |
| "grad_norm": 1.8758794069290161, | |
| "learning_rate": 2.6405763904246706e-07, | |
| "loss": 0.5657, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 2.877707896575821, | |
| "grad_norm": 2.1772301197052, | |
| "learning_rate": 2.134790762076927e-07, | |
| "loss": 0.4668, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 2.891684136967156, | |
| "grad_norm": 2.552476644515991, | |
| "learning_rate": 1.682522175864315e-07, | |
| "loss": 0.5831, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 2.9056603773584904, | |
| "grad_norm": 2.3809163570404053, | |
| "learning_rate": 1.283868281945322e-07, | |
| "loss": 0.4918, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 2.919636617749825, | |
| "grad_norm": 2.3678464889526367, | |
| "learning_rate": 9.389151544298147e-08, | |
| "loss": 0.4573, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 2.93361285814116, | |
| "grad_norm": 2.0146450996398926, | |
| "learning_rate": 6.477372727943798e-08, | |
| "loss": 0.5716, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 2.947589098532495, | |
| "grad_norm": 1.9456125497817993, | |
| "learning_rate": 4.103975058015186e-08, | |
| "loss": 0.5161, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 2.9615653389238297, | |
| "grad_norm": 1.9012235403060913, | |
| "learning_rate": 2.269470979253674e-08, | |
| "loss": 0.3703, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 2.9755415793151645, | |
| "grad_norm": 2.5397086143493652, | |
| "learning_rate": 9.742565828751993e-09, | |
| "loss": 0.4771, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 2.989517819706499, | |
| "grad_norm": 2.45979380607605, | |
| "learning_rate": 2.1861152104868387e-09, | |
| "loss": 0.3966, | |
| "step": 2140 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 2148, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.1937068893945856e+17, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |