| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0359116022099448, |
| "eval_steps": 500, |
| "global_step": 1500, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.006906077348066298, |
| "grad_norm": 10.042680740356445, |
| "learning_rate": 4.000000000000001e-06, |
| "loss": 0.9482, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.013812154696132596, |
| "grad_norm": 2.674130916595459, |
| "learning_rate": 8.444444444444446e-06, |
| "loss": 0.4538, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.020718232044198894, |
| "grad_norm": 2.9186959266662598, |
| "learning_rate": 1.2888888888888889e-05, |
| "loss": 0.2859, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.027624309392265192, |
| "grad_norm": 2.0180764198303223, |
| "learning_rate": 1.7333333333333336e-05, |
| "loss": 0.2322, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.034530386740331494, |
| "grad_norm": 1.7288544178009033, |
| "learning_rate": 2.177777777777778e-05, |
| "loss": 0.2054, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.04143646408839779, |
| "grad_norm": 2.643184185028076, |
| "learning_rate": 2.6222222222222226e-05, |
| "loss": 0.2063, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.04834254143646409, |
| "grad_norm": 1.8499925136566162, |
| "learning_rate": 3.066666666666667e-05, |
| "loss": 0.16, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.055248618784530384, |
| "grad_norm": 1.6038734912872314, |
| "learning_rate": 3.511111111111111e-05, |
| "loss": 0.1561, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.062154696132596686, |
| "grad_norm": 0.8061902523040771, |
| "learning_rate": 3.9555555555555556e-05, |
| "loss": 0.1359, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.06906077348066299, |
| "grad_norm": 2.517512083053589, |
| "learning_rate": 4.4000000000000006e-05, |
| "loss": 0.1365, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.07596685082872928, |
| "grad_norm": 1.773244857788086, |
| "learning_rate": 4.844444444444445e-05, |
| "loss": 0.1359, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.08287292817679558, |
| "grad_norm": 1.7556045055389404, |
| "learning_rate": 5.2888888888888885e-05, |
| "loss": 0.1234, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.08977900552486189, |
| "grad_norm": 1.2455209493637085, |
| "learning_rate": 5.7333333333333336e-05, |
| "loss": 0.116, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.09668508287292818, |
| "grad_norm": 1.0859894752502441, |
| "learning_rate": 6.177777777777779e-05, |
| "loss": 0.1172, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.10359116022099447, |
| "grad_norm": 1.5400012731552124, |
| "learning_rate": 6.622222222222224e-05, |
| "loss": 0.1125, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.11049723756906077, |
| "grad_norm": 0.9862754940986633, |
| "learning_rate": 7.066666666666667e-05, |
| "loss": 0.107, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.11740331491712708, |
| "grad_norm": 0.7784848809242249, |
| "learning_rate": 7.511111111111111e-05, |
| "loss": 0.1013, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.12430939226519337, |
| "grad_norm": 1.40177583694458, |
| "learning_rate": 7.955555555555556e-05, |
| "loss": 0.103, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.13121546961325967, |
| "grad_norm": 0.9513179659843445, |
| "learning_rate": 8.4e-05, |
| "loss": 0.0903, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.13812154696132597, |
| "grad_norm": 1.2130392789840698, |
| "learning_rate": 8.844444444444445e-05, |
| "loss": 0.1034, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.14502762430939226, |
| "grad_norm": 0.6497124433517456, |
| "learning_rate": 9.28888888888889e-05, |
| "loss": 0.1053, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.15193370165745856, |
| "grad_norm": 1.1570148468017578, |
| "learning_rate": 9.733333333333335e-05, |
| "loss": 0.1007, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.15883977900552487, |
| "grad_norm": 1.1539334058761597, |
| "learning_rate": 9.999978398337033e-05, |
| "loss": 0.0842, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.16574585635359115, |
| "grad_norm": 1.02969491481781, |
| "learning_rate": 9.999735381772228e-05, |
| "loss": 0.0855, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.17265193370165746, |
| "grad_norm": 0.652882993221283, |
| "learning_rate": 9.999222359731514e-05, |
| "loss": 0.0817, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.17955801104972377, |
| "grad_norm": 0.7674477100372314, |
| "learning_rate": 9.998439359920107e-05, |
| "loss": 0.0707, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.18646408839779005, |
| "grad_norm": 0.755526065826416, |
| "learning_rate": 9.997386424623091e-05, |
| "loss": 0.0815, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.19337016574585636, |
| "grad_norm": 0.7330914735794067, |
| "learning_rate": 9.996063610703137e-05, |
| "loss": 0.0773, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.20027624309392264, |
| "grad_norm": 0.5776280164718628, |
| "learning_rate": 9.994470989597423e-05, |
| "loss": 0.0861, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.20718232044198895, |
| "grad_norm": 0.6976489424705505, |
| "learning_rate": 9.992608647313789e-05, |
| "loss": 0.071, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.21408839779005526, |
| "grad_norm": 0.550521194934845, |
| "learning_rate": 9.990476684426075e-05, |
| "loss": 0.0743, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.22099447513812154, |
| "grad_norm": 0.5623897314071655, |
| "learning_rate": 9.988075216068711e-05, |
| "loss": 0.068, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.22790055248618785, |
| "grad_norm": 0.5536251068115234, |
| "learning_rate": 9.98540437193048e-05, |
| "loss": 0.0728, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.23480662983425415, |
| "grad_norm": 0.745213508605957, |
| "learning_rate": 9.982464296247522e-05, |
| "loss": 0.0662, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.24171270718232044, |
| "grad_norm": 0.9005521535873413, |
| "learning_rate": 9.979255147795549e-05, |
| "loss": 0.0645, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.24861878453038674, |
| "grad_norm": 0.4740263819694519, |
| "learning_rate": 9.975777099881263e-05, |
| "loss": 0.0669, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.255524861878453, |
| "grad_norm": 0.7341731786727905, |
| "learning_rate": 9.972030340333001e-05, |
| "loss": 0.063, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.26243093922651933, |
| "grad_norm": 0.5400853753089905, |
| "learning_rate": 9.968015071490591e-05, |
| "loss": 0.0574, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.26933701657458564, |
| "grad_norm": 0.7812430262565613, |
| "learning_rate": 9.963731510194425e-05, |
| "loss": 0.0602, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.27624309392265195, |
| "grad_norm": 0.6542518734931946, |
| "learning_rate": 9.959179887773744e-05, |
| "loss": 0.0522, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.28314917127071826, |
| "grad_norm": 0.619616687297821, |
| "learning_rate": 9.954360450034155e-05, |
| "loss": 0.0657, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.2900552486187845, |
| "grad_norm": 0.7797542214393616, |
| "learning_rate": 9.949273457244348e-05, |
| "loss": 0.0568, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.2969613259668508, |
| "grad_norm": 0.6183603405952454, |
| "learning_rate": 9.943919184122043e-05, |
| "loss": 0.0589, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.30386740331491713, |
| "grad_norm": 0.7843531966209412, |
| "learning_rate": 9.938297919819157e-05, |
| "loss": 0.0622, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.31077348066298344, |
| "grad_norm": 0.629660964012146, |
| "learning_rate": 9.932409967906184e-05, |
| "loss": 0.0626, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.31767955801104975, |
| "grad_norm": 0.693767786026001, |
| "learning_rate": 9.926255646355804e-05, |
| "loss": 0.0648, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.324585635359116, |
| "grad_norm": 0.5813653469085693, |
| "learning_rate": 9.91983528752571e-05, |
| "loss": 0.0606, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.3314917127071823, |
| "grad_norm": 0.88108229637146, |
| "learning_rate": 9.91314923814066e-05, |
| "loss": 0.0579, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.3383977900552486, |
| "grad_norm": 1.2719063758850098, |
| "learning_rate": 9.906197859273753e-05, |
| "loss": 0.0601, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.3453038674033149, |
| "grad_norm": 0.566552460193634, |
| "learning_rate": 9.89898152632693e-05, |
| "loss": 0.0578, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.35220994475138123, |
| "grad_norm": 0.5750138163566589, |
| "learning_rate": 9.891500629010694e-05, |
| "loss": 0.0633, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.35911602209944754, |
| "grad_norm": 0.6586229801177979, |
| "learning_rate": 9.88375557132308e-05, |
| "loss": 0.0563, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.3660220994475138, |
| "grad_norm": 0.6225771903991699, |
| "learning_rate": 9.875746771527816e-05, |
| "loss": 0.0568, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.3729281767955801, |
| "grad_norm": 0.657524049282074, |
| "learning_rate": 9.867474662131754e-05, |
| "loss": 0.0489, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.3798342541436464, |
| "grad_norm": 0.7671463489532471, |
| "learning_rate": 9.858939689861506e-05, |
| "loss": 0.0538, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.3867403314917127, |
| "grad_norm": 0.42189261317253113, |
| "learning_rate": 9.850142315639312e-05, |
| "loss": 0.046, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.393646408839779, |
| "grad_norm": 0.48630833625793457, |
| "learning_rate": 9.841083014558158e-05, |
| "loss": 0.0505, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.4005524861878453, |
| "grad_norm": 0.8498868346214294, |
| "learning_rate": 9.831762275856118e-05, |
| "loss": 0.0529, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.4074585635359116, |
| "grad_norm": 0.5209752321243286, |
| "learning_rate": 9.82218060288993e-05, |
| "loss": 0.0504, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.4143646408839779, |
| "grad_norm": 0.46043241024017334, |
| "learning_rate": 9.81233851310781e-05, |
| "loss": 0.0536, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.4212707182320442, |
| "grad_norm": 0.8280401229858398, |
| "learning_rate": 9.802236538021518e-05, |
| "loss": 0.0558, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.4281767955801105, |
| "grad_norm": 0.9656640887260437, |
| "learning_rate": 9.791875223177643e-05, |
| "loss": 0.0512, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.4350828729281768, |
| "grad_norm": 0.6366522312164307, |
| "learning_rate": 9.781255128128148e-05, |
| "loss": 0.0444, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.4419889502762431, |
| "grad_norm": 0.4692375957965851, |
| "learning_rate": 9.77037682640015e-05, |
| "loss": 0.0465, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.4488950276243094, |
| "grad_norm": 0.450482577085495, |
| "learning_rate": 9.759240905464946e-05, |
| "loss": 0.0481, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.4558011049723757, |
| "grad_norm": 0.47211357951164246, |
| "learning_rate": 9.74784796670629e-05, |
| "loss": 0.05, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.462707182320442, |
| "grad_norm": 0.6791536211967468, |
| "learning_rate": 9.736198625387916e-05, |
| "loss": 0.0578, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.4696132596685083, |
| "grad_norm": 0.7372534275054932, |
| "learning_rate": 9.724293510620306e-05, |
| "loss": 0.0585, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.47651933701657456, |
| "grad_norm": 0.49842604994773865, |
| "learning_rate": 9.712133265326722e-05, |
| "loss": 0.0489, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.48342541436464087, |
| "grad_norm": 0.35578662157058716, |
| "learning_rate": 9.699718546208484e-05, |
| "loss": 0.0519, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.4903314917127072, |
| "grad_norm": 0.5673187375068665, |
| "learning_rate": 9.6870500237095e-05, |
| "loss": 0.0623, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.4972375690607735, |
| "grad_norm": 0.5961718559265137, |
| "learning_rate": 9.674128381980072e-05, |
| "loss": 0.0497, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.5041436464088398, |
| "grad_norm": 0.47361811995506287, |
| "learning_rate": 9.660954318839933e-05, |
| "loss": 0.0502, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.511049723756906, |
| "grad_norm": 0.7358847260475159, |
| "learning_rate": 9.647528545740573e-05, |
| "loss": 0.0434, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.5179558011049724, |
| "grad_norm": 0.7085921168327332, |
| "learning_rate": 9.633851787726815e-05, |
| "loss": 0.0534, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.5248618784530387, |
| "grad_norm": 0.5258869528770447, |
| "learning_rate": 9.619924783397661e-05, |
| "loss": 0.0425, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.5317679558011049, |
| "grad_norm": 0.4688310921192169, |
| "learning_rate": 9.6057482848664e-05, |
| "loss": 0.0439, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.5386740331491713, |
| "grad_norm": 0.4370345175266266, |
| "learning_rate": 9.591323057719998e-05, |
| "loss": 0.0464, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.5455801104972375, |
| "grad_norm": 0.45098453760147095, |
| "learning_rate": 9.576649880977748e-05, |
| "loss": 0.0428, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.5524861878453039, |
| "grad_norm": 0.36217615008354187, |
| "learning_rate": 9.561729547049199e-05, |
| "loss": 0.0514, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.5593922651933702, |
| "grad_norm": 0.7802874445915222, |
| "learning_rate": 9.546562861691369e-05, |
| "loss": 0.0487, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.5662983425414365, |
| "grad_norm": 0.4431716799736023, |
| "learning_rate": 9.531150643965223e-05, |
| "loss": 0.0511, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.5732044198895028, |
| "grad_norm": 0.9877989292144775, |
| "learning_rate": 9.51549372619145e-05, |
| "loss": 0.0498, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.580110497237569, |
| "grad_norm": 0.5740323066711426, |
| "learning_rate": 9.499592953905504e-05, |
| "loss": 0.0464, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.5870165745856354, |
| "grad_norm": 0.3987601101398468, |
| "learning_rate": 9.483449185811948e-05, |
| "loss": 0.0569, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.5939226519337016, |
| "grad_norm": 0.6680428385734558, |
| "learning_rate": 9.467063293738081e-05, |
| "loss": 0.049, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.600828729281768, |
| "grad_norm": 0.5118589997291565, |
| "learning_rate": 9.450436162586853e-05, |
| "loss": 0.0462, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.6077348066298343, |
| "grad_norm": 0.4932630658149719, |
| "learning_rate": 9.433568690289075e-05, |
| "loss": 0.0439, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.6146408839779005, |
| "grad_norm": 0.3190293312072754, |
| "learning_rate": 9.416461787754935e-05, |
| "loss": 0.0411, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.6215469613259669, |
| "grad_norm": 0.3397425711154938, |
| "learning_rate": 9.3991163788248e-05, |
| "loss": 0.0458, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.6284530386740331, |
| "grad_norm": 0.7478238344192505, |
| "learning_rate": 9.381533400219318e-05, |
| "loss": 0.0419, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.6353591160220995, |
| "grad_norm": 0.5642843842506409, |
| "learning_rate": 9.36371380148885e-05, |
| "loss": 0.0461, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.6422651933701657, |
| "grad_norm": 0.3694552183151245, |
| "learning_rate": 9.345658544962166e-05, |
| "loss": 0.0415, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.649171270718232, |
| "grad_norm": 0.45040807127952576, |
| "learning_rate": 9.327368605694502e-05, |
| "loss": 0.043, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.6560773480662984, |
| "grad_norm": 0.31986427307128906, |
| "learning_rate": 9.30884497141488e-05, |
| "loss": 0.0478, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.6629834254143646, |
| "grad_norm": 0.6989539265632629, |
| "learning_rate": 9.290088642472783e-05, |
| "loss": 0.0396, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.669889502762431, |
| "grad_norm": 0.5615038871765137, |
| "learning_rate": 9.27110063178412e-05, |
| "loss": 0.0419, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.6767955801104972, |
| "grad_norm": 0.4937025010585785, |
| "learning_rate": 9.251881964776535e-05, |
| "loss": 0.044, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.6837016574585635, |
| "grad_norm": 0.4085635840892792, |
| "learning_rate": 9.232433679334018e-05, |
| "loss": 0.0431, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.6906077348066298, |
| "grad_norm": 0.548740029335022, |
| "learning_rate": 9.212756825740873e-05, |
| "loss": 0.0408, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.6975138121546961, |
| "grad_norm": 0.5693575739860535, |
| "learning_rate": 9.192852466624981e-05, |
| "loss": 0.0411, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.7044198895027625, |
| "grad_norm": 0.532451868057251, |
| "learning_rate": 9.172721676900419e-05, |
| "loss": 0.0426, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.7113259668508287, |
| "grad_norm": 0.6173306703567505, |
| "learning_rate": 9.152365543709416e-05, |
| "loss": 0.0452, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.7182320441988951, |
| "grad_norm": 0.5103174448013306, |
| "learning_rate": 9.131785166363638e-05, |
| "loss": 0.0415, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.7251381215469613, |
| "grad_norm": 0.3975609242916107, |
| "learning_rate": 9.11098165628482e-05, |
| "loss": 0.0346, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.7320441988950276, |
| "grad_norm": 0.48710817098617554, |
| "learning_rate": 9.089956136944751e-05, |
| "loss": 0.0477, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.738950276243094, |
| "grad_norm": 0.44057202339172363, |
| "learning_rate": 9.06870974380459e-05, |
| "loss": 0.0449, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.7458563535911602, |
| "grad_norm": 0.3550662100315094, |
| "learning_rate": 9.047243624253564e-05, |
| "loss": 0.0458, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.7527624309392266, |
| "grad_norm": 0.5015395879745483, |
| "learning_rate": 9.025558937546988e-05, |
| "loss": 0.0347, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.7596685082872928, |
| "grad_norm": 0.6827583312988281, |
| "learning_rate": 9.003656854743667e-05, |
| "loss": 0.0397, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.7665745856353591, |
| "grad_norm": 0.4369044601917267, |
| "learning_rate": 8.981538558642663e-05, |
| "loss": 0.0363, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.7734806629834254, |
| "grad_norm": 0.41478782892227173, |
| "learning_rate": 8.959205243719402e-05, |
| "loss": 0.037, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.7803867403314917, |
| "grad_norm": 0.3793696463108063, |
| "learning_rate": 8.936658116061178e-05, |
| "loss": 0.0462, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.787292817679558, |
| "grad_norm": 0.39440885186195374, |
| "learning_rate": 8.913898393302021e-05, |
| "loss": 0.0435, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.7941988950276243, |
| "grad_norm": 0.3810882270336151, |
| "learning_rate": 8.890927304556935e-05, |
| "loss": 0.0394, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.8011049723756906, |
| "grad_norm": 0.2765768766403198, |
| "learning_rate": 8.867746090355525e-05, |
| "loss": 0.0415, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.8080110497237569, |
| "grad_norm": 0.626876711845398, |
| "learning_rate": 8.844356002574996e-05, |
| "loss": 0.0383, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.8149171270718232, |
| "grad_norm": 0.4267805516719818, |
| "learning_rate": 8.820758304372557e-05, |
| "loss": 0.0345, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.8218232044198895, |
| "grad_norm": 0.6483110785484314, |
| "learning_rate": 8.796954270117199e-05, |
| "loss": 0.0383, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.8287292817679558, |
| "grad_norm": 0.40159228444099426, |
| "learning_rate": 8.772945185320875e-05, |
| "loss": 0.0399, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.835635359116022, |
| "grad_norm": 0.29305708408355713, |
| "learning_rate": 8.74873234656908e-05, |
| "loss": 0.0387, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.8425414364640884, |
| "grad_norm": 0.49229493737220764, |
| "learning_rate": 8.724317061450824e-05, |
| "loss": 0.036, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.8494475138121547, |
| "grad_norm": 0.328227698802948, |
| "learning_rate": 8.699700648488027e-05, |
| "loss": 0.0379, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.856353591160221, |
| "grad_norm": 0.3456778824329376, |
| "learning_rate": 8.674884437064302e-05, |
| "loss": 0.0369, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.8632596685082873, |
| "grad_norm": 0.5480336546897888, |
| "learning_rate": 8.64986976735317e-05, |
| "loss": 0.0411, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.8701657458563536, |
| "grad_norm": 0.37527692317962646, |
| "learning_rate": 8.624657990245687e-05, |
| "loss": 0.0348, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.8770718232044199, |
| "grad_norm": 0.5111171007156372, |
| "learning_rate": 8.599250467277483e-05, |
| "loss": 0.0427, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.8839779005524862, |
| "grad_norm": 0.416939377784729, |
| "learning_rate": 8.573648570555245e-05, |
| "loss": 0.0416, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.8908839779005525, |
| "grad_norm": 0.36200088262557983, |
| "learning_rate": 8.547853682682604e-05, |
| "loss": 0.0408, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.8977900552486188, |
| "grad_norm": 0.3712616264820099, |
| "learning_rate": 8.521867196685482e-05, |
| "loss": 0.0372, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.9046961325966851, |
| "grad_norm": 0.42901334166526794, |
| "learning_rate": 8.495690515936852e-05, |
| "loss": 0.0375, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.9116022099447514, |
| "grad_norm": 0.44897764921188354, |
| "learning_rate": 8.46932505408096e-05, |
| "loss": 0.0386, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.9185082872928176, |
| "grad_norm": 0.50035160779953, |
| "learning_rate": 8.442772234956972e-05, |
| "loss": 0.0344, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.925414364640884, |
| "grad_norm": 0.3799741864204407, |
| "learning_rate": 8.416033492522097e-05, |
| "loss": 0.0395, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.9323204419889503, |
| "grad_norm": 0.4632473289966583, |
| "learning_rate": 8.389110270774128e-05, |
| "loss": 0.0336, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.9392265193370166, |
| "grad_norm": 0.311646044254303, |
| "learning_rate": 8.362004023673474e-05, |
| "loss": 0.036, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.9461325966850829, |
| "grad_norm": 0.4639626145362854, |
| "learning_rate": 8.334716215064637e-05, |
| "loss": 0.0286, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.9530386740331491, |
| "grad_norm": 0.34951701760292053, |
| "learning_rate": 8.30724831859716e-05, |
| "loss": 0.0352, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.9599447513812155, |
| "grad_norm": 0.42107245326042175, |
| "learning_rate": 8.279601817646036e-05, |
| "loss": 0.0321, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.9668508287292817, |
| "grad_norm": 0.3809627294540405, |
| "learning_rate": 8.251778205231617e-05, |
| "loss": 0.0398, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.9737569060773481, |
| "grad_norm": 0.2785834074020386, |
| "learning_rate": 8.223778983938962e-05, |
| "loss": 0.031, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.9806629834254144, |
| "grad_norm": 0.4540090262889862, |
| "learning_rate": 8.19560566583671e-05, |
| "loss": 0.039, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.9875690607734806, |
| "grad_norm": 0.3515707552433014, |
| "learning_rate": 8.167259772395415e-05, |
| "loss": 0.0317, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.994475138121547, |
| "grad_norm": 0.4071841537952423, |
| "learning_rate": 8.138742834405386e-05, |
| "loss": 0.0382, |
| "step": 1440 |
| }, |
| { |
| "epoch": 1.0013812154696133, |
| "grad_norm": 0.5569164752960205, |
| "learning_rate": 8.110056391894005e-05, |
| "loss": 0.0395, |
| "step": 1450 |
| }, |
| { |
| "epoch": 1.0082872928176796, |
| "grad_norm": 0.41124194860458374, |
| "learning_rate": 8.081201994042573e-05, |
| "loss": 0.0362, |
| "step": 1460 |
| }, |
| { |
| "epoch": 1.0151933701657458, |
| "grad_norm": 0.414541631937027, |
| "learning_rate": 8.052181199102646e-05, |
| "loss": 0.0394, |
| "step": 1470 |
| }, |
| { |
| "epoch": 1.022099447513812, |
| "grad_norm": 0.47132864594459534, |
| "learning_rate": 8.022995574311876e-05, |
| "loss": 0.0389, |
| "step": 1480 |
| }, |
| { |
| "epoch": 1.0290055248618784, |
| "grad_norm": 0.39693185687065125, |
| "learning_rate": 7.993646695809378e-05, |
| "loss": 0.0377, |
| "step": 1490 |
| }, |
| { |
| "epoch": 1.0359116022099448, |
| "grad_norm": 0.39153772592544556, |
| "learning_rate": 7.96413614855062e-05, |
| "loss": 0.0404, |
| "step": 1500 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 4500, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 4, |
| "save_steps": 1500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 32, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|