| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.0, | |
| "eval_steps": 500, | |
| "global_step": 24564, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.012212994626282364, | |
| "grad_norm": 2.7971887588500977, | |
| "learning_rate": 4.9796450089561964e-05, | |
| "loss": 0.3257, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.024425989252564728, | |
| "grad_norm": 2.295069456100464, | |
| "learning_rate": 4.9592900179123925e-05, | |
| "loss": 0.1159, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.03663898387884709, | |
| "grad_norm": 1.1914141178131104, | |
| "learning_rate": 4.9389350268685886e-05, | |
| "loss": 0.1073, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.048851978505129456, | |
| "grad_norm": 0.8838738203048706, | |
| "learning_rate": 4.918580035824785e-05, | |
| "loss": 0.1076, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.06106497313141182, | |
| "grad_norm": 1.8016488552093506, | |
| "learning_rate": 4.898225044780981e-05, | |
| "loss": 0.0851, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.07327796775769418, | |
| "grad_norm": 2.5403218269348145, | |
| "learning_rate": 4.8778700537371764e-05, | |
| "loss": 0.1119, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.08549096238397655, | |
| "grad_norm": 1.6208165884017944, | |
| "learning_rate": 4.8575150626933725e-05, | |
| "loss": 0.0968, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.09770395701025891, | |
| "grad_norm": 1.7685565948486328, | |
| "learning_rate": 4.8371600716495686e-05, | |
| "loss": 0.0848, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.10991695163654128, | |
| "grad_norm": 5.9024882316589355, | |
| "learning_rate": 4.816805080605765e-05, | |
| "loss": 0.0816, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.12212994626282364, | |
| "grad_norm": 1.4809324741363525, | |
| "learning_rate": 4.796450089561961e-05, | |
| "loss": 0.0815, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.134342940889106, | |
| "grad_norm": 1.8953092098236084, | |
| "learning_rate": 4.776095098518157e-05, | |
| "loss": 0.0835, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.14655593551538837, | |
| "grad_norm": 0.2310028374195099, | |
| "learning_rate": 4.755740107474353e-05, | |
| "loss": 0.0768, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.15876893014167073, | |
| "grad_norm": 4.047482013702393, | |
| "learning_rate": 4.735385116430549e-05, | |
| "loss": 0.0785, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.1709819247679531, | |
| "grad_norm": 5.049030303955078, | |
| "learning_rate": 4.7150301253867454e-05, | |
| "loss": 0.0609, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.18319491939423546, | |
| "grad_norm": 4.447434902191162, | |
| "learning_rate": 4.6946751343429415e-05, | |
| "loss": 0.0625, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.19540791402051783, | |
| "grad_norm": 0.3569432199001312, | |
| "learning_rate": 4.674320143299138e-05, | |
| "loss": 0.0612, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.2076209086468002, | |
| "grad_norm": 0.6527674794197083, | |
| "learning_rate": 4.653965152255334e-05, | |
| "loss": 0.07, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.21983390327308255, | |
| "grad_norm": 5.448887825012207, | |
| "learning_rate": 4.633610161211529e-05, | |
| "loss": 0.0669, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.23204689789936492, | |
| "grad_norm": 1.804388165473938, | |
| "learning_rate": 4.6132551701677254e-05, | |
| "loss": 0.0636, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.24425989252564728, | |
| "grad_norm": 1.574012279510498, | |
| "learning_rate": 4.5929001791239215e-05, | |
| "loss": 0.0798, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.2564728871519297, | |
| "grad_norm": 0.8235198259353638, | |
| "learning_rate": 4.5725451880801177e-05, | |
| "loss": 0.054, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.268685881778212, | |
| "grad_norm": 0.6802400350570679, | |
| "learning_rate": 4.552190197036313e-05, | |
| "loss": 0.0613, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.2808988764044944, | |
| "grad_norm": 0.5732834935188293, | |
| "learning_rate": 4.531835205992509e-05, | |
| "loss": 0.0544, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.29311187103077674, | |
| "grad_norm": 0.9544196128845215, | |
| "learning_rate": 4.5114802149487054e-05, | |
| "loss": 0.0735, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.30532486565705913, | |
| "grad_norm": 0.8141427040100098, | |
| "learning_rate": 4.4911252239049015e-05, | |
| "loss": 0.0586, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.31753786028334147, | |
| "grad_norm": 3.740103006362915, | |
| "learning_rate": 4.4707702328610976e-05, | |
| "loss": 0.0498, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.32975085490962386, | |
| "grad_norm": 0.24550916254520416, | |
| "learning_rate": 4.450415241817294e-05, | |
| "loss": 0.056, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.3419638495359062, | |
| "grad_norm": 0.7254294157028198, | |
| "learning_rate": 4.43006025077349e-05, | |
| "loss": 0.053, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.3541768441621886, | |
| "grad_norm": 0.5358878970146179, | |
| "learning_rate": 4.409705259729686e-05, | |
| "loss": 0.0561, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.3663898387884709, | |
| "grad_norm": 0.3604901432991028, | |
| "learning_rate": 4.3893502686858815e-05, | |
| "loss": 0.059, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.3786028334147533, | |
| "grad_norm": 0.19227269291877747, | |
| "learning_rate": 4.3689952776420776e-05, | |
| "loss": 0.0548, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.39081582804103565, | |
| "grad_norm": 0.3620028495788574, | |
| "learning_rate": 4.348640286598274e-05, | |
| "loss": 0.0535, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.40302882266731804, | |
| "grad_norm": 4.794760704040527, | |
| "learning_rate": 4.32828529555447e-05, | |
| "loss": 0.0549, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.4152418172936004, | |
| "grad_norm": 0.5320255160331726, | |
| "learning_rate": 4.307930304510666e-05, | |
| "loss": 0.051, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.42745481191988277, | |
| "grad_norm": 2.048164129257202, | |
| "learning_rate": 4.287575313466862e-05, | |
| "loss": 0.067, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.4396678065461651, | |
| "grad_norm": 3.6915972232818604, | |
| "learning_rate": 4.267220322423058e-05, | |
| "loss": 0.0494, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.4518808011724475, | |
| "grad_norm": 0.7861614227294922, | |
| "learning_rate": 4.2468653313792544e-05, | |
| "loss": 0.0652, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.46409379579872984, | |
| "grad_norm": 1.9440407752990723, | |
| "learning_rate": 4.2265103403354505e-05, | |
| "loss": 0.0699, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.4763067904250122, | |
| "grad_norm": 0.15847598016262054, | |
| "learning_rate": 4.206155349291647e-05, | |
| "loss": 0.0575, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.48851978505129456, | |
| "grad_norm": 0.2988128960132599, | |
| "learning_rate": 4.185800358247843e-05, | |
| "loss": 0.0468, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.5007327796775769, | |
| "grad_norm": 1.6092756986618042, | |
| "learning_rate": 4.165445367204039e-05, | |
| "loss": 0.0555, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.5129457743038593, | |
| "grad_norm": 13.008635520935059, | |
| "learning_rate": 4.1450903761602344e-05, | |
| "loss": 0.0604, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.5251587689301417, | |
| "grad_norm": 2.7357659339904785, | |
| "learning_rate": 4.1247353851164305e-05, | |
| "loss": 0.0478, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 0.537371763556424, | |
| "grad_norm": 0.4085894823074341, | |
| "learning_rate": 4.104380394072627e-05, | |
| "loss": 0.0478, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.5495847581827064, | |
| "grad_norm": 1.5472468137741089, | |
| "learning_rate": 4.084025403028823e-05, | |
| "loss": 0.0559, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.5617977528089888, | |
| "grad_norm": 2.616894006729126, | |
| "learning_rate": 4.063670411985019e-05, | |
| "loss": 0.0444, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 0.5740107474352711, | |
| "grad_norm": 0.7861430644989014, | |
| "learning_rate": 4.043315420941215e-05, | |
| "loss": 0.0431, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 0.5862237420615535, | |
| "grad_norm": 1.3745653629302979, | |
| "learning_rate": 4.022960429897411e-05, | |
| "loss": 0.0422, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.5984367366878358, | |
| "grad_norm": 1.471048355102539, | |
| "learning_rate": 4.002605438853607e-05, | |
| "loss": 0.052, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 0.6106497313141183, | |
| "grad_norm": 0.3034818470478058, | |
| "learning_rate": 3.9822504478098035e-05, | |
| "loss": 0.0481, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.6228627259404006, | |
| "grad_norm": 0.5265262722969055, | |
| "learning_rate": 3.9618954567659996e-05, | |
| "loss": 0.0592, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 0.6350757205666829, | |
| "grad_norm": 1.226517915725708, | |
| "learning_rate": 3.941540465722196e-05, | |
| "loss": 0.0554, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 0.6472887151929653, | |
| "grad_norm": 0.365315705537796, | |
| "learning_rate": 3.921185474678392e-05, | |
| "loss": 0.0466, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 0.6595017098192477, | |
| "grad_norm": 0.4508240818977356, | |
| "learning_rate": 3.900830483634587e-05, | |
| "loss": 0.0468, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 0.67171470444553, | |
| "grad_norm": 0.51467365026474, | |
| "learning_rate": 3.8804754925907834e-05, | |
| "loss": 0.054, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.6839276990718124, | |
| "grad_norm": 0.6186398267745972, | |
| "learning_rate": 3.8601205015469796e-05, | |
| "loss": 0.0554, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 0.6961406936980947, | |
| "grad_norm": 0.6723864674568176, | |
| "learning_rate": 3.839765510503176e-05, | |
| "loss": 0.0556, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 0.7083536883243772, | |
| "grad_norm": 3.6368353366851807, | |
| "learning_rate": 3.819410519459372e-05, | |
| "loss": 0.0512, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 0.7205666829506595, | |
| "grad_norm": 3.5719096660614014, | |
| "learning_rate": 3.799055528415568e-05, | |
| "loss": 0.0545, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 0.7327796775769418, | |
| "grad_norm": 1.1756514310836792, | |
| "learning_rate": 3.778700537371764e-05, | |
| "loss": 0.0465, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.7449926722032242, | |
| "grad_norm": 1.2159337997436523, | |
| "learning_rate": 3.7583455463279596e-05, | |
| "loss": 0.0463, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 0.7572056668295066, | |
| "grad_norm": 1.0632232427597046, | |
| "learning_rate": 3.737990555284156e-05, | |
| "loss": 0.0444, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 0.769418661455789, | |
| "grad_norm": 0.669765293598175, | |
| "learning_rate": 3.717635564240352e-05, | |
| "loss": 0.0433, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 0.7816316560820713, | |
| "grad_norm": 0.13478492200374603, | |
| "learning_rate": 3.697280573196548e-05, | |
| "loss": 0.0469, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 0.7938446507083536, | |
| "grad_norm": 0.5963812470436096, | |
| "learning_rate": 3.676925582152744e-05, | |
| "loss": 0.0504, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.8060576453346361, | |
| "grad_norm": 0.5829123258590698, | |
| "learning_rate": 3.6565705911089395e-05, | |
| "loss": 0.0483, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 0.8182706399609184, | |
| "grad_norm": 2.3114776611328125, | |
| "learning_rate": 3.636215600065136e-05, | |
| "loss": 0.0601, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 0.8304836345872008, | |
| "grad_norm": 0.27553310990333557, | |
| "learning_rate": 3.615860609021332e-05, | |
| "loss": 0.0527, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 0.8426966292134831, | |
| "grad_norm": 0.3668135106563568, | |
| "learning_rate": 3.595505617977528e-05, | |
| "loss": 0.0541, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 0.8549096238397655, | |
| "grad_norm": 3.1305336952209473, | |
| "learning_rate": 3.575150626933724e-05, | |
| "loss": 0.0534, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.8671226184660479, | |
| "grad_norm": 0.08432205021381378, | |
| "learning_rate": 3.55479563588992e-05, | |
| "loss": 0.0523, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 0.8793356130923302, | |
| "grad_norm": 1.3692104816436768, | |
| "learning_rate": 3.534440644846116e-05, | |
| "loss": 0.0428, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 0.8915486077186126, | |
| "grad_norm": 1.1145917177200317, | |
| "learning_rate": 3.5140856538023125e-05, | |
| "loss": 0.0528, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 0.903761602344895, | |
| "grad_norm": 0.07234195619821548, | |
| "learning_rate": 3.4937306627585086e-05, | |
| "loss": 0.0449, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 0.9159745969711773, | |
| "grad_norm": 0.12755821645259857, | |
| "learning_rate": 3.473375671714705e-05, | |
| "loss": 0.0381, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.9281875915974597, | |
| "grad_norm": 1.066666603088379, | |
| "learning_rate": 3.453020680670901e-05, | |
| "loss": 0.0411, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 0.940400586223742, | |
| "grad_norm": 2.3837034702301025, | |
| "learning_rate": 3.432665689627097e-05, | |
| "loss": 0.0471, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 0.9526135808500245, | |
| "grad_norm": 0.2601478397846222, | |
| "learning_rate": 3.4123106985832924e-05, | |
| "loss": 0.0408, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 0.9648265754763068, | |
| "grad_norm": 1.0532914400100708, | |
| "learning_rate": 3.3919557075394886e-05, | |
| "loss": 0.0422, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 0.9770395701025891, | |
| "grad_norm": 0.16507047414779663, | |
| "learning_rate": 3.371600716495685e-05, | |
| "loss": 0.0464, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.9892525647288715, | |
| "grad_norm": 0.310465544462204, | |
| "learning_rate": 3.351245725451881e-05, | |
| "loss": 0.056, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_f1": 0.8987804878048782, | |
| "eval_loss": 0.04277478903532028, | |
| "eval_precision": 0.8910749615300066, | |
| "eval_recall": 0.9066204428539477, | |
| "eval_runtime": 133.6334, | |
| "eval_samples_per_second": 61.272, | |
| "eval_steps_per_second": 7.663, | |
| "step": 8188 | |
| }, | |
| { | |
| "epoch": 1.0014655593551538, | |
| "grad_norm": 0.8648662567138672, | |
| "learning_rate": 3.330890734408077e-05, | |
| "loss": 0.0441, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 1.0136785539814364, | |
| "grad_norm": 0.16131815314292908, | |
| "learning_rate": 3.310535743364273e-05, | |
| "loss": 0.0306, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 1.0258915486077187, | |
| "grad_norm": 4.484282970428467, | |
| "learning_rate": 3.290180752320469e-05, | |
| "loss": 0.0383, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 1.038104543234001, | |
| "grad_norm": 0.5343158841133118, | |
| "learning_rate": 3.2698257612766654e-05, | |
| "loss": 0.0331, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 1.0503175378602834, | |
| "grad_norm": 0.028084266930818558, | |
| "learning_rate": 3.2494707702328615e-05, | |
| "loss": 0.0381, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 1.0625305324865657, | |
| "grad_norm": 0.483477920293808, | |
| "learning_rate": 3.2291157791890576e-05, | |
| "loss": 0.0462, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 1.074743527112848, | |
| "grad_norm": 0.8194773197174072, | |
| "learning_rate": 3.208760788145254e-05, | |
| "loss": 0.0346, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 1.0869565217391304, | |
| "grad_norm": 0.11062140762805939, | |
| "learning_rate": 3.188405797101449e-05, | |
| "loss": 0.0369, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 1.0991695163654127, | |
| "grad_norm": 0.300889790058136, | |
| "learning_rate": 3.1680508060576454e-05, | |
| "loss": 0.0297, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 1.111382510991695, | |
| "grad_norm": 0.6278924345970154, | |
| "learning_rate": 3.1476958150138415e-05, | |
| "loss": 0.0349, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 1.1235955056179776, | |
| "grad_norm": 0.542029082775116, | |
| "learning_rate": 3.1273408239700376e-05, | |
| "loss": 0.0473, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 1.13580850024426, | |
| "grad_norm": 0.6147358417510986, | |
| "learning_rate": 3.106985832926234e-05, | |
| "loss": 0.0356, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 1.1480214948705423, | |
| "grad_norm": 1.301965355873108, | |
| "learning_rate": 3.08663084188243e-05, | |
| "loss": 0.0371, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 1.1602344894968246, | |
| "grad_norm": 0.026711974292993546, | |
| "learning_rate": 3.066275850838626e-05, | |
| "loss": 0.0346, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 1.172447484123107, | |
| "grad_norm": 1.258608102798462, | |
| "learning_rate": 3.0459208597948218e-05, | |
| "loss": 0.0334, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 1.1846604787493893, | |
| "grad_norm": 0.36877045035362244, | |
| "learning_rate": 3.025565868751018e-05, | |
| "loss": 0.032, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 1.1968734733756716, | |
| "grad_norm": 0.2519334852695465, | |
| "learning_rate": 3.005210877707214e-05, | |
| "loss": 0.0416, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 1.2090864680019542, | |
| "grad_norm": 0.5204672813415527, | |
| "learning_rate": 2.9848558866634102e-05, | |
| "loss": 0.0366, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 1.2212994626282365, | |
| "grad_norm": 0.43101000785827637, | |
| "learning_rate": 2.9645008956196063e-05, | |
| "loss": 0.0379, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 1.2335124572545189, | |
| "grad_norm": 0.681117057800293, | |
| "learning_rate": 2.9441459045758018e-05, | |
| "loss": 0.0353, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 1.2457254518808012, | |
| "grad_norm": 0.49889543652534485, | |
| "learning_rate": 2.923790913531998e-05, | |
| "loss": 0.0394, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 1.2579384465070835, | |
| "grad_norm": 0.8064567446708679, | |
| "learning_rate": 2.903435922488194e-05, | |
| "loss": 0.034, | |
| "step": 10300 | |
| }, | |
| { | |
| "epoch": 1.2701514411333659, | |
| "grad_norm": 0.21315552294254303, | |
| "learning_rate": 2.8830809314443902e-05, | |
| "loss": 0.0292, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 1.2823644357596482, | |
| "grad_norm": 1.6466035842895508, | |
| "learning_rate": 2.8627259404005863e-05, | |
| "loss": 0.0333, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 1.2945774303859308, | |
| "grad_norm": 1.1944749355316162, | |
| "learning_rate": 2.8423709493567825e-05, | |
| "loss": 0.0357, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 1.3067904250122129, | |
| "grad_norm": 0.6488074064254761, | |
| "learning_rate": 2.8220159583129786e-05, | |
| "loss": 0.0315, | |
| "step": 10700 | |
| }, | |
| { | |
| "epoch": 1.3190034196384954, | |
| "grad_norm": 0.030384689569473267, | |
| "learning_rate": 2.8016609672691747e-05, | |
| "loss": 0.0422, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 1.3312164142647778, | |
| "grad_norm": 0.2363937795162201, | |
| "learning_rate": 2.781305976225371e-05, | |
| "loss": 0.0335, | |
| "step": 10900 | |
| }, | |
| { | |
| "epoch": 1.34342940889106, | |
| "grad_norm": 0.04548358544707298, | |
| "learning_rate": 2.7609509851815666e-05, | |
| "loss": 0.0371, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 1.3556424035173424, | |
| "grad_norm": 0.6259112358093262, | |
| "learning_rate": 2.7405959941377628e-05, | |
| "loss": 0.0412, | |
| "step": 11100 | |
| }, | |
| { | |
| "epoch": 1.3678553981436248, | |
| "grad_norm": 0.42487379908561707, | |
| "learning_rate": 2.720241003093959e-05, | |
| "loss": 0.0252, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 1.3800683927699071, | |
| "grad_norm": 0.9125863313674927, | |
| "learning_rate": 2.6998860120501547e-05, | |
| "loss": 0.0367, | |
| "step": 11300 | |
| }, | |
| { | |
| "epoch": 1.3922813873961895, | |
| "grad_norm": 0.7670263051986694, | |
| "learning_rate": 2.679531021006351e-05, | |
| "loss": 0.0288, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 1.404494382022472, | |
| "grad_norm": 0.1614452451467514, | |
| "learning_rate": 2.6591760299625466e-05, | |
| "loss": 0.0399, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 1.4167073766487543, | |
| "grad_norm": 3.3551249504089355, | |
| "learning_rate": 2.6388210389187428e-05, | |
| "loss": 0.0391, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 1.4289203712750367, | |
| "grad_norm": 0.7188284397125244, | |
| "learning_rate": 2.618466047874939e-05, | |
| "loss": 0.0315, | |
| "step": 11700 | |
| }, | |
| { | |
| "epoch": 1.441133365901319, | |
| "grad_norm": 0.48031413555145264, | |
| "learning_rate": 2.598111056831135e-05, | |
| "loss": 0.0244, | |
| "step": 11800 | |
| }, | |
| { | |
| "epoch": 1.4533463605276014, | |
| "grad_norm": 0.7492583394050598, | |
| "learning_rate": 2.577756065787331e-05, | |
| "loss": 0.0359, | |
| "step": 11900 | |
| }, | |
| { | |
| "epoch": 1.4655593551538837, | |
| "grad_norm": 0.6593573689460754, | |
| "learning_rate": 2.5574010747435273e-05, | |
| "loss": 0.0327, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 1.477772349780166, | |
| "grad_norm": 0.2940855026245117, | |
| "learning_rate": 2.5370460836997234e-05, | |
| "loss": 0.0336, | |
| "step": 12100 | |
| }, | |
| { | |
| "epoch": 1.4899853444064486, | |
| "grad_norm": 0.45900267362594604, | |
| "learning_rate": 2.5166910926559195e-05, | |
| "loss": 0.0242, | |
| "step": 12200 | |
| }, | |
| { | |
| "epoch": 1.5021983390327307, | |
| "grad_norm": 2.2023909091949463, | |
| "learning_rate": 2.4963361016121153e-05, | |
| "loss": 0.0348, | |
| "step": 12300 | |
| }, | |
| { | |
| "epoch": 1.5144113336590133, | |
| "grad_norm": 0.12489739805459976, | |
| "learning_rate": 2.4759811105683115e-05, | |
| "loss": 0.0335, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 1.5266243282852956, | |
| "grad_norm": 1.0575867891311646, | |
| "learning_rate": 2.4556261195245076e-05, | |
| "loss": 0.0199, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 1.538837322911578, | |
| "grad_norm": 1.7309564352035522, | |
| "learning_rate": 2.4352711284807037e-05, | |
| "loss": 0.0316, | |
| "step": 12600 | |
| }, | |
| { | |
| "epoch": 1.5510503175378603, | |
| "grad_norm": 0.925658643245697, | |
| "learning_rate": 2.4149161374369e-05, | |
| "loss": 0.0445, | |
| "step": 12700 | |
| }, | |
| { | |
| "epoch": 1.5632633121641426, | |
| "grad_norm": 0.48667579889297485, | |
| "learning_rate": 2.3945611463930957e-05, | |
| "loss": 0.0437, | |
| "step": 12800 | |
| }, | |
| { | |
| "epoch": 1.5754763067904252, | |
| "grad_norm": 0.11213243752717972, | |
| "learning_rate": 2.3742061553492918e-05, | |
| "loss": 0.0387, | |
| "step": 12900 | |
| }, | |
| { | |
| "epoch": 1.5876893014167073, | |
| "grad_norm": 0.14116732776165009, | |
| "learning_rate": 2.353851164305488e-05, | |
| "loss": 0.033, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 1.5999022960429898, | |
| "grad_norm": 0.686268150806427, | |
| "learning_rate": 2.333496173261684e-05, | |
| "loss": 0.0281, | |
| "step": 13100 | |
| }, | |
| { | |
| "epoch": 1.612115290669272, | |
| "grad_norm": 0.4795430898666382, | |
| "learning_rate": 2.31314118221788e-05, | |
| "loss": 0.0436, | |
| "step": 13200 | |
| }, | |
| { | |
| "epoch": 1.6243282852955545, | |
| "grad_norm": 0.026416413486003876, | |
| "learning_rate": 2.292786191174076e-05, | |
| "loss": 0.0343, | |
| "step": 13300 | |
| }, | |
| { | |
| "epoch": 1.6365412799218368, | |
| "grad_norm": 0.582073986530304, | |
| "learning_rate": 2.2724312001302718e-05, | |
| "loss": 0.0312, | |
| "step": 13400 | |
| }, | |
| { | |
| "epoch": 1.6487542745481192, | |
| "grad_norm": 1.669487476348877, | |
| "learning_rate": 2.252076209086468e-05, | |
| "loss": 0.0384, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 1.6609672691744015, | |
| "grad_norm": 0.19379857182502747, | |
| "learning_rate": 2.231721218042664e-05, | |
| "loss": 0.0322, | |
| "step": 13600 | |
| }, | |
| { | |
| "epoch": 1.6731802638006839, | |
| "grad_norm": 4.540911674499512, | |
| "learning_rate": 2.2113662269988602e-05, | |
| "loss": 0.0363, | |
| "step": 13700 | |
| }, | |
| { | |
| "epoch": 1.6853932584269664, | |
| "grad_norm": 0.24804505705833435, | |
| "learning_rate": 2.1910112359550563e-05, | |
| "loss": 0.0326, | |
| "step": 13800 | |
| }, | |
| { | |
| "epoch": 1.6976062530532485, | |
| "grad_norm": 1.8535521030426025, | |
| "learning_rate": 2.1706562449112524e-05, | |
| "loss": 0.0316, | |
| "step": 13900 | |
| }, | |
| { | |
| "epoch": 1.709819247679531, | |
| "grad_norm": 0.04862889647483826, | |
| "learning_rate": 2.1503012538674482e-05, | |
| "loss": 0.0248, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 1.7220322423058134, | |
| "grad_norm": 0.3953320384025574, | |
| "learning_rate": 2.1299462628236444e-05, | |
| "loss": 0.0393, | |
| "step": 14100 | |
| }, | |
| { | |
| "epoch": 1.7342452369320958, | |
| "grad_norm": 0.5966042876243591, | |
| "learning_rate": 2.1095912717798405e-05, | |
| "loss": 0.0358, | |
| "step": 14200 | |
| }, | |
| { | |
| "epoch": 1.746458231558378, | |
| "grad_norm": 0.1555975377559662, | |
| "learning_rate": 2.0892362807360366e-05, | |
| "loss": 0.0425, | |
| "step": 14300 | |
| }, | |
| { | |
| "epoch": 1.7586712261846604, | |
| "grad_norm": 0.8556230068206787, | |
| "learning_rate": 2.0688812896922328e-05, | |
| "loss": 0.0267, | |
| "step": 14400 | |
| }, | |
| { | |
| "epoch": 1.770884220810943, | |
| "grad_norm": 0.03833279386162758, | |
| "learning_rate": 2.048526298648429e-05, | |
| "loss": 0.034, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 1.783097215437225, | |
| "grad_norm": 0.043861281126737595, | |
| "learning_rate": 2.0281713076046247e-05, | |
| "loss": 0.0288, | |
| "step": 14600 | |
| }, | |
| { | |
| "epoch": 1.7953102100635077, | |
| "grad_norm": 0.28712257742881775, | |
| "learning_rate": 2.0078163165608208e-05, | |
| "loss": 0.0285, | |
| "step": 14700 | |
| }, | |
| { | |
| "epoch": 1.8075232046897898, | |
| "grad_norm": 1.3535864353179932, | |
| "learning_rate": 1.987461325517017e-05, | |
| "loss": 0.0377, | |
| "step": 14800 | |
| }, | |
| { | |
| "epoch": 1.8197361993160723, | |
| "grad_norm": 3.164818048477173, | |
| "learning_rate": 1.967106334473213e-05, | |
| "loss": 0.0334, | |
| "step": 14900 | |
| }, | |
| { | |
| "epoch": 1.8319491939423547, | |
| "grad_norm": 0.08736918866634369, | |
| "learning_rate": 1.9467513434294092e-05, | |
| "loss": 0.0294, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 1.844162188568637, | |
| "grad_norm": 1.25545072555542, | |
| "learning_rate": 1.926396352385605e-05, | |
| "loss": 0.0285, | |
| "step": 15100 | |
| }, | |
| { | |
| "epoch": 1.8563751831949193, | |
| "grad_norm": 0.030480826273560524, | |
| "learning_rate": 1.906041361341801e-05, | |
| "loss": 0.0328, | |
| "step": 15200 | |
| }, | |
| { | |
| "epoch": 1.8685881778212017, | |
| "grad_norm": 1.6334197521209717, | |
| "learning_rate": 1.8856863702979973e-05, | |
| "loss": 0.037, | |
| "step": 15300 | |
| }, | |
| { | |
| "epoch": 1.8808011724474842, | |
| "grad_norm": 1.2553733587265015, | |
| "learning_rate": 1.865331379254193e-05, | |
| "loss": 0.0256, | |
| "step": 15400 | |
| }, | |
| { | |
| "epoch": 1.8930141670737664, | |
| "grad_norm": 0.061297524720430374, | |
| "learning_rate": 1.8449763882103892e-05, | |
| "loss": 0.0276, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 1.905227161700049, | |
| "grad_norm": 1.0915943384170532, | |
| "learning_rate": 1.8246213971665853e-05, | |
| "loss": 0.0362, | |
| "step": 15600 | |
| }, | |
| { | |
| "epoch": 1.9174401563263312, | |
| "grad_norm": 0.020990842953324318, | |
| "learning_rate": 1.8042664061227815e-05, | |
| "loss": 0.025, | |
| "step": 15700 | |
| }, | |
| { | |
| "epoch": 1.9296531509526136, | |
| "grad_norm": 0.09211856126785278, | |
| "learning_rate": 1.7839114150789773e-05, | |
| "loss": 0.0265, | |
| "step": 15800 | |
| }, | |
| { | |
| "epoch": 1.941866145578896, | |
| "grad_norm": 1.5800979137420654, | |
| "learning_rate": 1.7635564240351734e-05, | |
| "loss": 0.0256, | |
| "step": 15900 | |
| }, | |
| { | |
| "epoch": 1.9540791402051783, | |
| "grad_norm": 0.39250850677490234, | |
| "learning_rate": 1.7432014329913695e-05, | |
| "loss": 0.0249, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 1.9662921348314608, | |
| "grad_norm": 0.8597753047943115, | |
| "learning_rate": 1.7228464419475657e-05, | |
| "loss": 0.0355, | |
| "step": 16100 | |
| }, | |
| { | |
| "epoch": 1.978505129457743, | |
| "grad_norm": 0.16734100878238678, | |
| "learning_rate": 1.7024914509037618e-05, | |
| "loss": 0.032, | |
| "step": 16200 | |
| }, | |
| { | |
| "epoch": 1.9907181240840255, | |
| "grad_norm": 0.11750225722789764, | |
| "learning_rate": 1.682136459859958e-05, | |
| "loss": 0.0227, | |
| "step": 16300 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_f1": 0.9197428223035141, | |
| "eval_loss": 0.036899276077747345, | |
| "eval_precision": 0.9117582417582417, | |
| "eval_recall": 0.9278684857973608, | |
| "eval_runtime": 75.3931, | |
| "eval_samples_per_second": 108.604, | |
| "eval_steps_per_second": 13.582, | |
| "step": 16376 | |
| }, | |
| { | |
| "epoch": 2.0029311187103076, | |
| "grad_norm": 0.6276179552078247, | |
| "learning_rate": 1.6617814688161537e-05, | |
| "loss": 0.0326, | |
| "step": 16400 | |
| }, | |
| { | |
| "epoch": 2.01514411333659, | |
| "grad_norm": 0.27882876992225647, | |
| "learning_rate": 1.64142647777235e-05, | |
| "loss": 0.0206, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 2.0273571079628727, | |
| "grad_norm": 0.9930168986320496, | |
| "learning_rate": 1.621071486728546e-05, | |
| "loss": 0.0135, | |
| "step": 16600 | |
| }, | |
| { | |
| "epoch": 2.039570102589155, | |
| "grad_norm": 0.21392406523227692, | |
| "learning_rate": 1.600716495684742e-05, | |
| "loss": 0.028, | |
| "step": 16700 | |
| }, | |
| { | |
| "epoch": 2.0517830972154374, | |
| "grad_norm": 2.1995363235473633, | |
| "learning_rate": 1.5803615046409382e-05, | |
| "loss": 0.0273, | |
| "step": 16800 | |
| }, | |
| { | |
| "epoch": 2.0639960918417195, | |
| "grad_norm": 1.91357421875, | |
| "learning_rate": 1.560006513597134e-05, | |
| "loss": 0.0152, | |
| "step": 16900 | |
| }, | |
| { | |
| "epoch": 2.076209086468002, | |
| "grad_norm": 0.057265687733888626, | |
| "learning_rate": 1.53965152255333e-05, | |
| "loss": 0.0206, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 2.088422081094284, | |
| "grad_norm": 0.05291162431240082, | |
| "learning_rate": 1.5192965315095261e-05, | |
| "loss": 0.022, | |
| "step": 17100 | |
| }, | |
| { | |
| "epoch": 2.1006350757205667, | |
| "grad_norm": 2.424394369125366, | |
| "learning_rate": 1.4989415404657223e-05, | |
| "loss": 0.0178, | |
| "step": 17200 | |
| }, | |
| { | |
| "epoch": 2.112848070346849, | |
| "grad_norm": 8.053882598876953, | |
| "learning_rate": 1.4785865494219184e-05, | |
| "loss": 0.0256, | |
| "step": 17300 | |
| }, | |
| { | |
| "epoch": 2.1250610649731314, | |
| "grad_norm": 1.606079339981079, | |
| "learning_rate": 1.4582315583781145e-05, | |
| "loss": 0.017, | |
| "step": 17400 | |
| }, | |
| { | |
| "epoch": 2.137274059599414, | |
| "grad_norm": 0.26984503865242004, | |
| "learning_rate": 1.4378765673343103e-05, | |
| "loss": 0.0202, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 2.149487054225696, | |
| "grad_norm": 0.044966306537389755, | |
| "learning_rate": 1.4175215762905064e-05, | |
| "loss": 0.0234, | |
| "step": 17600 | |
| }, | |
| { | |
| "epoch": 2.1617000488519786, | |
| "grad_norm": 0.05067300796508789, | |
| "learning_rate": 1.3971665852467026e-05, | |
| "loss": 0.0263, | |
| "step": 17700 | |
| }, | |
| { | |
| "epoch": 2.1739130434782608, | |
| "grad_norm": 0.5125128030776978, | |
| "learning_rate": 1.3768115942028985e-05, | |
| "loss": 0.0216, | |
| "step": 17800 | |
| }, | |
| { | |
| "epoch": 2.1861260381045433, | |
| "grad_norm": 0.04719540849328041, | |
| "learning_rate": 1.3564566031590947e-05, | |
| "loss": 0.0256, | |
| "step": 17900 | |
| }, | |
| { | |
| "epoch": 2.1983390327308254, | |
| "grad_norm": 0.11627175658941269, | |
| "learning_rate": 1.3361016121152908e-05, | |
| "loss": 0.0185, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 2.210552027357108, | |
| "grad_norm": 0.2016720473766327, | |
| "learning_rate": 1.3157466210714866e-05, | |
| "loss": 0.0111, | |
| "step": 18100 | |
| }, | |
| { | |
| "epoch": 2.22276502198339, | |
| "grad_norm": 1.6914150714874268, | |
| "learning_rate": 1.2953916300276827e-05, | |
| "loss": 0.0237, | |
| "step": 18200 | |
| }, | |
| { | |
| "epoch": 2.2349780166096727, | |
| "grad_norm": 0.3582985997200012, | |
| "learning_rate": 1.2750366389838789e-05, | |
| "loss": 0.0188, | |
| "step": 18300 | |
| }, | |
| { | |
| "epoch": 2.247191011235955, | |
| "grad_norm": 0.9769508838653564, | |
| "learning_rate": 1.254681647940075e-05, | |
| "loss": 0.024, | |
| "step": 18400 | |
| }, | |
| { | |
| "epoch": 2.2594040058622373, | |
| "grad_norm": 0.03454025089740753, | |
| "learning_rate": 1.2343266568962711e-05, | |
| "loss": 0.0307, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 2.27161700048852, | |
| "grad_norm": 0.0919230654835701, | |
| "learning_rate": 1.2139716658524671e-05, | |
| "loss": 0.0183, | |
| "step": 18600 | |
| }, | |
| { | |
| "epoch": 2.283829995114802, | |
| "grad_norm": 0.05342525988817215, | |
| "learning_rate": 1.1936166748086632e-05, | |
| "loss": 0.0295, | |
| "step": 18700 | |
| }, | |
| { | |
| "epoch": 2.2960429897410846, | |
| "grad_norm": 0.11520762741565704, | |
| "learning_rate": 1.1732616837648592e-05, | |
| "loss": 0.0187, | |
| "step": 18800 | |
| }, | |
| { | |
| "epoch": 2.308255984367367, | |
| "grad_norm": 1.8612200021743774, | |
| "learning_rate": 1.1529066927210551e-05, | |
| "loss": 0.0228, | |
| "step": 18900 | |
| }, | |
| { | |
| "epoch": 2.3204689789936492, | |
| "grad_norm": 0.9779945611953735, | |
| "learning_rate": 1.1325517016772513e-05, | |
| "loss": 0.0182, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 2.332681973619932, | |
| "grad_norm": 1.9669654369354248, | |
| "learning_rate": 1.1121967106334474e-05, | |
| "loss": 0.0247, | |
| "step": 19100 | |
| }, | |
| { | |
| "epoch": 2.344894968246214, | |
| "grad_norm": 0.1722841113805771, | |
| "learning_rate": 1.0918417195896434e-05, | |
| "loss": 0.0206, | |
| "step": 19200 | |
| }, | |
| { | |
| "epoch": 2.3571079628724965, | |
| "grad_norm": 0.1652793437242508, | |
| "learning_rate": 1.0714867285458395e-05, | |
| "loss": 0.0146, | |
| "step": 19300 | |
| }, | |
| { | |
| "epoch": 2.3693209574987786, | |
| "grad_norm": 0.07285087555646896, | |
| "learning_rate": 1.0511317375020356e-05, | |
| "loss": 0.0151, | |
| "step": 19400 | |
| }, | |
| { | |
| "epoch": 2.381533952125061, | |
| "grad_norm": 2.59061861038208, | |
| "learning_rate": 1.0307767464582316e-05, | |
| "loss": 0.0192, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 2.3937469467513433, | |
| "grad_norm": 0.02776254341006279, | |
| "learning_rate": 1.0104217554144277e-05, | |
| "loss": 0.0245, | |
| "step": 19600 | |
| }, | |
| { | |
| "epoch": 2.405959941377626, | |
| "grad_norm": 0.48207101225852966, | |
| "learning_rate": 9.900667643706239e-06, | |
| "loss": 0.0147, | |
| "step": 19700 | |
| }, | |
| { | |
| "epoch": 2.4181729360039084, | |
| "grad_norm": 0.7725105285644531, | |
| "learning_rate": 9.697117733268198e-06, | |
| "loss": 0.0206, | |
| "step": 19800 | |
| }, | |
| { | |
| "epoch": 2.4303859306301905, | |
| "grad_norm": 1.8201816082000732, | |
| "learning_rate": 9.493567822830158e-06, | |
| "loss": 0.0205, | |
| "step": 19900 | |
| }, | |
| { | |
| "epoch": 2.442598925256473, | |
| "grad_norm": 0.2930428385734558, | |
| "learning_rate": 9.29001791239212e-06, | |
| "loss": 0.0163, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 2.454811919882755, | |
| "grad_norm": 0.7441920638084412, | |
| "learning_rate": 9.086468001954079e-06, | |
| "loss": 0.0181, | |
| "step": 20100 | |
| }, | |
| { | |
| "epoch": 2.4670249145090377, | |
| "grad_norm": 0.5970872640609741, | |
| "learning_rate": 8.88291809151604e-06, | |
| "loss": 0.0172, | |
| "step": 20200 | |
| }, | |
| { | |
| "epoch": 2.47923790913532, | |
| "grad_norm": 0.17312058806419373, | |
| "learning_rate": 8.679368181078002e-06, | |
| "loss": 0.0163, | |
| "step": 20300 | |
| }, | |
| { | |
| "epoch": 2.4914509037616024, | |
| "grad_norm": 0.26520836353302, | |
| "learning_rate": 8.475818270639961e-06, | |
| "loss": 0.016, | |
| "step": 20400 | |
| }, | |
| { | |
| "epoch": 2.5036638983878845, | |
| "grad_norm": 0.08623456209897995, | |
| "learning_rate": 8.272268360201922e-06, | |
| "loss": 0.018, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 2.515876893014167, | |
| "grad_norm": 0.16404370963573456, | |
| "learning_rate": 8.068718449763882e-06, | |
| "loss": 0.0164, | |
| "step": 20600 | |
| }, | |
| { | |
| "epoch": 2.5280898876404496, | |
| "grad_norm": 0.051970474421978, | |
| "learning_rate": 7.865168539325843e-06, | |
| "loss": 0.0203, | |
| "step": 20700 | |
| }, | |
| { | |
| "epoch": 2.5403028822667317, | |
| "grad_norm": 0.08457406610250473, | |
| "learning_rate": 7.661618628887805e-06, | |
| "loss": 0.0211, | |
| "step": 20800 | |
| }, | |
| { | |
| "epoch": 2.5525158768930143, | |
| "grad_norm": 0.35134220123291016, | |
| "learning_rate": 7.4580687184497635e-06, | |
| "loss": 0.018, | |
| "step": 20900 | |
| }, | |
| { | |
| "epoch": 2.5647288715192964, | |
| "grad_norm": 0.487570196390152, | |
| "learning_rate": 7.254518808011725e-06, | |
| "loss": 0.0281, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 2.576941866145579, | |
| "grad_norm": 2.1460368633270264, | |
| "learning_rate": 7.050968897573685e-06, | |
| "loss": 0.0196, | |
| "step": 21100 | |
| }, | |
| { | |
| "epoch": 2.5891548607718615, | |
| "grad_norm": 0.3036395311355591, | |
| "learning_rate": 6.847418987135645e-06, | |
| "loss": 0.0191, | |
| "step": 21200 | |
| }, | |
| { | |
| "epoch": 2.6013678553981436, | |
| "grad_norm": 0.3689348101615906, | |
| "learning_rate": 6.643869076697606e-06, | |
| "loss": 0.0173, | |
| "step": 21300 | |
| }, | |
| { | |
| "epoch": 2.6135808500244258, | |
| "grad_norm": 1.0098440647125244, | |
| "learning_rate": 6.440319166259568e-06, | |
| "loss": 0.0162, | |
| "step": 21400 | |
| }, | |
| { | |
| "epoch": 2.6257938446507083, | |
| "grad_norm": 0.30733248591423035, | |
| "learning_rate": 6.236769255821528e-06, | |
| "loss": 0.0194, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 2.638006839276991, | |
| "grad_norm": 0.4835430085659027, | |
| "learning_rate": 6.0332193453834885e-06, | |
| "loss": 0.0295, | |
| "step": 21600 | |
| }, | |
| { | |
| "epoch": 2.650219833903273, | |
| "grad_norm": 0.041551847010850906, | |
| "learning_rate": 5.829669434945449e-06, | |
| "loss": 0.0209, | |
| "step": 21700 | |
| }, | |
| { | |
| "epoch": 2.6624328285295555, | |
| "grad_norm": 1.990522027015686, | |
| "learning_rate": 5.6261195245074095e-06, | |
| "loss": 0.0269, | |
| "step": 21800 | |
| }, | |
| { | |
| "epoch": 2.6746458231558377, | |
| "grad_norm": 0.04139232635498047, | |
| "learning_rate": 5.42256961406937e-06, | |
| "loss": 0.0226, | |
| "step": 21900 | |
| }, | |
| { | |
| "epoch": 2.68685881778212, | |
| "grad_norm": 0.9341286420822144, | |
| "learning_rate": 5.219019703631331e-06, | |
| "loss": 0.0201, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 2.6990718124084028, | |
| "grad_norm": 0.11153418570756912, | |
| "learning_rate": 5.015469793193292e-06, | |
| "loss": 0.0222, | |
| "step": 22100 | |
| }, | |
| { | |
| "epoch": 2.711284807034685, | |
| "grad_norm": 1.0574121475219727, | |
| "learning_rate": 4.811919882755251e-06, | |
| "loss": 0.0212, | |
| "step": 22200 | |
| }, | |
| { | |
| "epoch": 2.723497801660967, | |
| "grad_norm": 0.9357222318649292, | |
| "learning_rate": 4.608369972317213e-06, | |
| "loss": 0.0219, | |
| "step": 22300 | |
| }, | |
| { | |
| "epoch": 2.7357107962872496, | |
| "grad_norm": 0.18769215047359467, | |
| "learning_rate": 4.404820061879173e-06, | |
| "loss": 0.0192, | |
| "step": 22400 | |
| }, | |
| { | |
| "epoch": 2.747923790913532, | |
| "grad_norm": 1.0952208042144775, | |
| "learning_rate": 4.201270151441134e-06, | |
| "loss": 0.0165, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 2.7601367855398142, | |
| "grad_norm": 0.046009525656700134, | |
| "learning_rate": 3.997720241003094e-06, | |
| "loss": 0.0161, | |
| "step": 22600 | |
| }, | |
| { | |
| "epoch": 2.772349780166097, | |
| "grad_norm": 0.3359615206718445, | |
| "learning_rate": 3.794170330565055e-06, | |
| "loss": 0.0198, | |
| "step": 22700 | |
| }, | |
| { | |
| "epoch": 2.784562774792379, | |
| "grad_norm": 0.03583957999944687, | |
| "learning_rate": 3.590620420127015e-06, | |
| "loss": 0.0157, | |
| "step": 22800 | |
| }, | |
| { | |
| "epoch": 2.7967757694186615, | |
| "grad_norm": 2.4570398330688477, | |
| "learning_rate": 3.3870705096889755e-06, | |
| "loss": 0.0183, | |
| "step": 22900 | |
| }, | |
| { | |
| "epoch": 2.808988764044944, | |
| "grad_norm": 0.0799492597579956, | |
| "learning_rate": 3.1835205992509364e-06, | |
| "loss": 0.0154, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 2.821201758671226, | |
| "grad_norm": 0.17097431421279907, | |
| "learning_rate": 2.979970688812897e-06, | |
| "loss": 0.0208, | |
| "step": 23100 | |
| }, | |
| { | |
| "epoch": 2.8334147532975087, | |
| "grad_norm": 0.042323142290115356, | |
| "learning_rate": 2.776420778374858e-06, | |
| "loss": 0.0107, | |
| "step": 23200 | |
| }, | |
| { | |
| "epoch": 2.845627747923791, | |
| "grad_norm": 0.6305797100067139, | |
| "learning_rate": 2.5728708679368183e-06, | |
| "loss": 0.0225, | |
| "step": 23300 | |
| }, | |
| { | |
| "epoch": 2.8578407425500734, | |
| "grad_norm": 0.05080363526940346, | |
| "learning_rate": 2.3693209574987788e-06, | |
| "loss": 0.0238, | |
| "step": 23400 | |
| }, | |
| { | |
| "epoch": 2.8700537371763555, | |
| "grad_norm": 0.04388800263404846, | |
| "learning_rate": 2.1657710470607397e-06, | |
| "loss": 0.0184, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 2.882266731802638, | |
| "grad_norm": 2.4991371631622314, | |
| "learning_rate": 1.9622211366226997e-06, | |
| "loss": 0.0196, | |
| "step": 23600 | |
| }, | |
| { | |
| "epoch": 2.89447972642892, | |
| "grad_norm": 0.059519946575164795, | |
| "learning_rate": 1.7586712261846606e-06, | |
| "loss": 0.0156, | |
| "step": 23700 | |
| }, | |
| { | |
| "epoch": 2.9066927210552027, | |
| "grad_norm": 0.044085703790187836, | |
| "learning_rate": 1.5551213157466213e-06, | |
| "loss": 0.0161, | |
| "step": 23800 | |
| }, | |
| { | |
| "epoch": 2.9189057156814853, | |
| "grad_norm": 0.024006502702832222, | |
| "learning_rate": 1.3515714053085818e-06, | |
| "loss": 0.0172, | |
| "step": 23900 | |
| }, | |
| { | |
| "epoch": 2.9311187103077674, | |
| "grad_norm": 0.7654680609703064, | |
| "learning_rate": 1.1480214948705422e-06, | |
| "loss": 0.0165, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 2.94333170493405, | |
| "grad_norm": 0.8878483772277832, | |
| "learning_rate": 9.444715844325028e-07, | |
| "loss": 0.0174, | |
| "step": 24100 | |
| }, | |
| { | |
| "epoch": 2.955544699560332, | |
| "grad_norm": 3.2117550373077393, | |
| "learning_rate": 7.409216739944635e-07, | |
| "loss": 0.0171, | |
| "step": 24200 | |
| }, | |
| { | |
| "epoch": 2.9677576941866146, | |
| "grad_norm": 0.6114596128463745, | |
| "learning_rate": 5.373717635564241e-07, | |
| "loss": 0.0136, | |
| "step": 24300 | |
| }, | |
| { | |
| "epoch": 2.979970688812897, | |
| "grad_norm": 0.0984087809920311, | |
| "learning_rate": 3.3382185311838467e-07, | |
| "loss": 0.0166, | |
| "step": 24400 | |
| }, | |
| { | |
| "epoch": 2.9921836834391793, | |
| "grad_norm": 0.01637178845703602, | |
| "learning_rate": 1.3027194268034525e-07, | |
| "loss": 0.0133, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_f1": 0.9273861231763003, | |
| "eval_loss": 0.03446226194500923, | |
| "eval_precision": 0.9235803016858918, | |
| "eval_recall": 0.9312234399463207, | |
| "eval_runtime": 62.6474, | |
| "eval_samples_per_second": 130.7, | |
| "eval_steps_per_second": 16.345, | |
| "step": 24564 | |
| } | |
| ], | |
| "logging_steps": 100, | |
| "max_steps": 24564, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.283942398980096e+16, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |