| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.18321729571271528, | |
| "eval_steps": 500, | |
| "global_step": 1000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.00018321729571271528, | |
| "grad_norm": 7.481241625973833, | |
| "learning_rate": 0.0, | |
| "loss": 0.764, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.00036643459142543056, | |
| "grad_norm": 7.936839352385908, | |
| "learning_rate": 1.8315018315018317e-08, | |
| "loss": 0.7505, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.0005496518871381459, | |
| "grad_norm": 8.381229660678587, | |
| "learning_rate": 3.6630036630036635e-08, | |
| "loss": 0.7767, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.0007328691828508611, | |
| "grad_norm": 7.533434041856819, | |
| "learning_rate": 5.494505494505495e-08, | |
| "loss": 0.7545, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.0009160864785635764, | |
| "grad_norm": 6.4876435204860545, | |
| "learning_rate": 7.326007326007327e-08, | |
| "loss": 0.7254, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.0010993037742762918, | |
| "grad_norm": 6.984439562746247, | |
| "learning_rate": 9.157509157509159e-08, | |
| "loss": 0.7238, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.001282521069989007, | |
| "grad_norm": 8.432795226889532, | |
| "learning_rate": 1.098901098901099e-07, | |
| "loss": 0.7667, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.0014657383657017222, | |
| "grad_norm": 6.615389400475257, | |
| "learning_rate": 1.282051282051282e-07, | |
| "loss": 0.739, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.0016489556614144375, | |
| "grad_norm": 7.043438048491804, | |
| "learning_rate": 1.4652014652014654e-07, | |
| "loss": 0.7412, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.001832172957127153, | |
| "grad_norm": 7.763888342467084, | |
| "learning_rate": 1.6483516483516484e-07, | |
| "loss": 0.739, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.002015390252839868, | |
| "grad_norm": 7.115680896600135, | |
| "learning_rate": 1.8315018315018317e-07, | |
| "loss": 0.6814, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.0021986075485525836, | |
| "grad_norm": 9.472751553447468, | |
| "learning_rate": 2.014652014652015e-07, | |
| "loss": 0.7694, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.0023818248442652986, | |
| "grad_norm": 9.164365397122241, | |
| "learning_rate": 2.197802197802198e-07, | |
| "loss": 0.8101, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.002565042139978014, | |
| "grad_norm": 7.831934204465113, | |
| "learning_rate": 2.3809523809523811e-07, | |
| "loss": 0.7504, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.002748259435690729, | |
| "grad_norm": 6.7423320112288865, | |
| "learning_rate": 2.564102564102564e-07, | |
| "loss": 0.7442, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.0029314767314034445, | |
| "grad_norm": 7.8103949840893145, | |
| "learning_rate": 2.7472527472527475e-07, | |
| "loss": 0.7782, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.00311469402711616, | |
| "grad_norm": 6.1519268993583465, | |
| "learning_rate": 2.930402930402931e-07, | |
| "loss": 0.663, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.003297911322828875, | |
| "grad_norm": 8.049047835270914, | |
| "learning_rate": 3.113553113553114e-07, | |
| "loss": 0.7934, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.0034811286185415903, | |
| "grad_norm": 6.388709624292669, | |
| "learning_rate": 3.296703296703297e-07, | |
| "loss": 0.7241, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.003664345914254306, | |
| "grad_norm": 7.039170133500172, | |
| "learning_rate": 3.47985347985348e-07, | |
| "loss": 0.7133, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.003847563209967021, | |
| "grad_norm": 8.416142106876258, | |
| "learning_rate": 3.6630036630036635e-07, | |
| "loss": 0.7787, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.004030780505679736, | |
| "grad_norm": 5.942542945633484, | |
| "learning_rate": 3.846153846153847e-07, | |
| "loss": 0.692, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.004213997801392451, | |
| "grad_norm": 6.397620608740201, | |
| "learning_rate": 4.02930402930403e-07, | |
| "loss": 0.6885, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.004397215097105167, | |
| "grad_norm": 4.789506080162222, | |
| "learning_rate": 4.212454212454213e-07, | |
| "loss": 0.6818, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.004580432392817882, | |
| "grad_norm": 5.4323837663846275, | |
| "learning_rate": 4.395604395604396e-07, | |
| "loss": 0.7202, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.004763649688530597, | |
| "grad_norm": 6.001418955741802, | |
| "learning_rate": 4.578754578754579e-07, | |
| "loss": 0.6915, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.004946866984243312, | |
| "grad_norm": 6.931290264891086, | |
| "learning_rate": 4.7619047619047623e-07, | |
| "loss": 0.7214, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.005130084279956028, | |
| "grad_norm": 5.423177642103934, | |
| "learning_rate": 4.945054945054946e-07, | |
| "loss": 0.6627, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.005313301575668743, | |
| "grad_norm": 5.824082478920574, | |
| "learning_rate": 5.128205128205128e-07, | |
| "loss": 0.7128, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.005496518871381458, | |
| "grad_norm": 4.837515053542817, | |
| "learning_rate": 5.311355311355311e-07, | |
| "loss": 0.7063, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.005679736167094174, | |
| "grad_norm": 4.2178492878732, | |
| "learning_rate": 5.494505494505495e-07, | |
| "loss": 0.6667, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.005862953462806889, | |
| "grad_norm": 3.7310273500960283, | |
| "learning_rate": 5.677655677655678e-07, | |
| "loss": 0.6724, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.006046170758519604, | |
| "grad_norm": 3.4436166557785013, | |
| "learning_rate": 5.860805860805862e-07, | |
| "loss": 0.6158, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.00622938805423232, | |
| "grad_norm": 2.6914219359650264, | |
| "learning_rate": 6.043956043956044e-07, | |
| "loss": 0.6384, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.006412605349945035, | |
| "grad_norm": 2.8611154351289803, | |
| "learning_rate": 6.227106227106228e-07, | |
| "loss": 0.6564, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.00659582264565775, | |
| "grad_norm": 2.915608387835296, | |
| "learning_rate": 6.41025641025641e-07, | |
| "loss": 0.6556, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.006779039941370466, | |
| "grad_norm": 3.104210670394775, | |
| "learning_rate": 6.593406593406594e-07, | |
| "loss": 0.6571, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.006962257237083181, | |
| "grad_norm": 2.416024185643152, | |
| "learning_rate": 6.776556776556777e-07, | |
| "loss": 0.6118, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.007145474532795896, | |
| "grad_norm": 2.2840327527366187, | |
| "learning_rate": 6.95970695970696e-07, | |
| "loss": 0.5773, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.007328691828508612, | |
| "grad_norm": 2.994955066139323, | |
| "learning_rate": 7.142857142857143e-07, | |
| "loss": 0.6621, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.007511909124221327, | |
| "grad_norm": 2.4977146662969956, | |
| "learning_rate": 7.326007326007327e-07, | |
| "loss": 0.5878, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.007695126419934042, | |
| "grad_norm": 2.2697538551105443, | |
| "learning_rate": 7.50915750915751e-07, | |
| "loss": 0.6161, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.007878343715646757, | |
| "grad_norm": 2.040084870467292, | |
| "learning_rate": 7.692307692307694e-07, | |
| "loss": 0.625, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.008061561011359472, | |
| "grad_norm": 1.4264410515550054, | |
| "learning_rate": 7.875457875457876e-07, | |
| "loss": 0.5991, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.008244778307072188, | |
| "grad_norm": 1.3307329745484617, | |
| "learning_rate": 8.05860805860806e-07, | |
| "loss": 0.5802, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.008427995602784902, | |
| "grad_norm": 1.3186002922814006, | |
| "learning_rate": 8.241758241758242e-07, | |
| "loss": 0.5971, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.008611212898497618, | |
| "grad_norm": 1.4454419288227918, | |
| "learning_rate": 8.424908424908426e-07, | |
| "loss": 0.6152, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.008794430194210334, | |
| "grad_norm": 1.2728865809977838, | |
| "learning_rate": 8.608058608058609e-07, | |
| "loss": 0.6176, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.008977647489923048, | |
| "grad_norm": 1.2387163683388445, | |
| "learning_rate": 8.791208791208792e-07, | |
| "loss": 0.6233, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.009160864785635764, | |
| "grad_norm": 1.1465107255795282, | |
| "learning_rate": 8.974358974358975e-07, | |
| "loss": 0.5795, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.00934408208134848, | |
| "grad_norm": 1.264265250343769, | |
| "learning_rate": 9.157509157509158e-07, | |
| "loss": 0.6262, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.009527299377061194, | |
| "grad_norm": 0.919312835052228, | |
| "learning_rate": 9.340659340659342e-07, | |
| "loss": 0.5535, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.00971051667277391, | |
| "grad_norm": 0.8672735287012353, | |
| "learning_rate": 9.523809523809525e-07, | |
| "loss": 0.5327, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.009893733968486624, | |
| "grad_norm": 0.8288169490471791, | |
| "learning_rate": 9.706959706959708e-07, | |
| "loss": 0.5673, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.01007695126419934, | |
| "grad_norm": 0.8603030677007734, | |
| "learning_rate": 9.890109890109891e-07, | |
| "loss": 0.5547, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.010260168559912056, | |
| "grad_norm": 0.7482006424960487, | |
| "learning_rate": 1.0073260073260074e-06, | |
| "loss": 0.6012, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.01044338585562477, | |
| "grad_norm": 0.6144742944660406, | |
| "learning_rate": 1.0256410256410257e-06, | |
| "loss": 0.5231, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.010626603151337486, | |
| "grad_norm": 0.634897676727318, | |
| "learning_rate": 1.0439560439560442e-06, | |
| "loss": 0.5726, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.010809820447050202, | |
| "grad_norm": 0.6410545094813134, | |
| "learning_rate": 1.0622710622710622e-06, | |
| "loss": 0.5695, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.010993037742762916, | |
| "grad_norm": 0.6609456014192859, | |
| "learning_rate": 1.0805860805860807e-06, | |
| "loss": 0.5805, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.011176255038475632, | |
| "grad_norm": 0.7416419125176478, | |
| "learning_rate": 1.098901098901099e-06, | |
| "loss": 0.634, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.011359472334188348, | |
| "grad_norm": 0.6851293138177442, | |
| "learning_rate": 1.1172161172161173e-06, | |
| "loss": 0.5544, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.011542689629901062, | |
| "grad_norm": 0.6383351643137376, | |
| "learning_rate": 1.1355311355311355e-06, | |
| "loss": 0.557, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.011725906925613778, | |
| "grad_norm": 0.6348085314283121, | |
| "learning_rate": 1.153846153846154e-06, | |
| "loss": 0.5632, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.011909124221326494, | |
| "grad_norm": 0.7024363928163538, | |
| "learning_rate": 1.1721611721611723e-06, | |
| "loss": 0.5839, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.012092341517039208, | |
| "grad_norm": 0.5900911656210056, | |
| "learning_rate": 1.1904761904761906e-06, | |
| "loss": 0.5344, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.012275558812751924, | |
| "grad_norm": 0.6275963867777459, | |
| "learning_rate": 1.2087912087912089e-06, | |
| "loss": 0.5584, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.01245877610846464, | |
| "grad_norm": 0.5865171487667508, | |
| "learning_rate": 1.2271062271062271e-06, | |
| "loss": 0.575, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.012641993404177354, | |
| "grad_norm": 0.674046870317235, | |
| "learning_rate": 1.2454212454212456e-06, | |
| "loss": 0.5744, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.01282521069989007, | |
| "grad_norm": 0.60918911566989, | |
| "learning_rate": 1.2637362637362637e-06, | |
| "loss": 0.5423, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.013008427995602785, | |
| "grad_norm": 0.6221860528344966, | |
| "learning_rate": 1.282051282051282e-06, | |
| "loss": 0.5652, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.0131916452913155, | |
| "grad_norm": 0.606665778835579, | |
| "learning_rate": 1.3003663003663005e-06, | |
| "loss": 0.6123, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.013374862587028215, | |
| "grad_norm": 0.5884723188789052, | |
| "learning_rate": 1.3186813186813187e-06, | |
| "loss": 0.5863, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.013558079882740931, | |
| "grad_norm": 0.5824062487652404, | |
| "learning_rate": 1.336996336996337e-06, | |
| "loss": 0.5918, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.013741297178453645, | |
| "grad_norm": 0.5460196146561194, | |
| "learning_rate": 1.3553113553113553e-06, | |
| "loss": 0.5912, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.013924514474166361, | |
| "grad_norm": 0.49213553293102813, | |
| "learning_rate": 1.3736263736263738e-06, | |
| "loss": 0.5421, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.014107731769879077, | |
| "grad_norm": 0.5594168254817149, | |
| "learning_rate": 1.391941391941392e-06, | |
| "loss": 0.5122, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.014290949065591791, | |
| "grad_norm": 0.5277088034821339, | |
| "learning_rate": 1.4102564102564104e-06, | |
| "loss": 0.5678, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.014474166361304507, | |
| "grad_norm": 0.49574136870511754, | |
| "learning_rate": 1.4285714285714286e-06, | |
| "loss": 0.5498, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.014657383657017223, | |
| "grad_norm": 0.5944104688680958, | |
| "learning_rate": 1.4468864468864471e-06, | |
| "loss": 0.5469, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.014840600952729937, | |
| "grad_norm": 0.4732970613815555, | |
| "learning_rate": 1.4652014652014654e-06, | |
| "loss": 0.5233, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.015023818248442653, | |
| "grad_norm": 0.49591541086638596, | |
| "learning_rate": 1.4835164835164837e-06, | |
| "loss": 0.5367, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.015207035544155369, | |
| "grad_norm": 0.4883648219378977, | |
| "learning_rate": 1.501831501831502e-06, | |
| "loss": 0.5782, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.015390252839868083, | |
| "grad_norm": 0.48559293967760114, | |
| "learning_rate": 1.5201465201465202e-06, | |
| "loss": 0.5578, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.015573470135580799, | |
| "grad_norm": 0.5802435990379926, | |
| "learning_rate": 1.5384615384615387e-06, | |
| "loss": 0.5562, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.015756687431293513, | |
| "grad_norm": 0.6655689083082568, | |
| "learning_rate": 1.556776556776557e-06, | |
| "loss": 0.5916, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.01593990472700623, | |
| "grad_norm": 0.5272919501805459, | |
| "learning_rate": 1.5750915750915753e-06, | |
| "loss": 0.5462, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.016123122022718945, | |
| "grad_norm": 0.5025768009972991, | |
| "learning_rate": 1.5934065934065933e-06, | |
| "loss": 0.564, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.01630633931843166, | |
| "grad_norm": 0.4868371012830415, | |
| "learning_rate": 1.611721611721612e-06, | |
| "loss": 0.557, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.016489556614144377, | |
| "grad_norm": 0.4724914851279723, | |
| "learning_rate": 1.6300366300366301e-06, | |
| "loss": 0.534, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.01667277390985709, | |
| "grad_norm": 0.4933114328584066, | |
| "learning_rate": 1.6483516483516484e-06, | |
| "loss": 0.5628, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.016855991205569805, | |
| "grad_norm": 0.5429724547645147, | |
| "learning_rate": 1.6666666666666667e-06, | |
| "loss": 0.5693, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.017039208501282523, | |
| "grad_norm": 0.46970029586030615, | |
| "learning_rate": 1.6849816849816852e-06, | |
| "loss": 0.5761, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.017222425796995237, | |
| "grad_norm": 0.5466204682716642, | |
| "learning_rate": 1.7032967032967034e-06, | |
| "loss": 0.5407, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.01740564309270795, | |
| "grad_norm": 0.46448768993410167, | |
| "learning_rate": 1.7216117216117217e-06, | |
| "loss": 0.5784, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.01758886038842067, | |
| "grad_norm": 0.46769210989227256, | |
| "learning_rate": 1.73992673992674e-06, | |
| "loss": 0.5262, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.017772077684133383, | |
| "grad_norm": 0.4412027593954725, | |
| "learning_rate": 1.7582417582417585e-06, | |
| "loss": 0.5608, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.017955294979846097, | |
| "grad_norm": 0.5975733130220022, | |
| "learning_rate": 1.7765567765567768e-06, | |
| "loss": 0.5633, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.018138512275558814, | |
| "grad_norm": 0.46156861116006753, | |
| "learning_rate": 1.794871794871795e-06, | |
| "loss": 0.5576, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.01832172957127153, | |
| "grad_norm": 0.5193644526534718, | |
| "learning_rate": 1.8131868131868133e-06, | |
| "loss": 0.5533, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.018504946866984243, | |
| "grad_norm": 0.479596247036775, | |
| "learning_rate": 1.8315018315018316e-06, | |
| "loss": 0.5337, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.01868816416269696, | |
| "grad_norm": 0.4610500867184236, | |
| "learning_rate": 1.84981684981685e-06, | |
| "loss": 0.5314, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.018871381458409674, | |
| "grad_norm": 0.481950984787821, | |
| "learning_rate": 1.8681318681318684e-06, | |
| "loss": 0.5865, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.01905459875412239, | |
| "grad_norm": 0.5225151739123198, | |
| "learning_rate": 1.8864468864468866e-06, | |
| "loss": 0.5694, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.019237816049835106, | |
| "grad_norm": 0.46381468108353424, | |
| "learning_rate": 1.904761904761905e-06, | |
| "loss": 0.557, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.01942103334554782, | |
| "grad_norm": 0.45411743556679485, | |
| "learning_rate": 1.9230769230769234e-06, | |
| "loss": 0.5648, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.019604250641260534, | |
| "grad_norm": 0.4859210831259208, | |
| "learning_rate": 1.9413919413919417e-06, | |
| "loss": 0.5685, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.01978746793697325, | |
| "grad_norm": 0.4205701080898951, | |
| "learning_rate": 1.95970695970696e-06, | |
| "loss": 0.5426, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.019970685232685966, | |
| "grad_norm": 0.5044757429436201, | |
| "learning_rate": 1.9780219780219782e-06, | |
| "loss": 0.5509, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.02015390252839868, | |
| "grad_norm": 0.47571345015861244, | |
| "learning_rate": 1.9963369963369965e-06, | |
| "loss": 0.5281, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.020337119824111394, | |
| "grad_norm": 0.5681283820477757, | |
| "learning_rate": 2.0146520146520148e-06, | |
| "loss": 0.5398, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.020520337119824112, | |
| "grad_norm": 0.48199138528425167, | |
| "learning_rate": 2.032967032967033e-06, | |
| "loss": 0.56, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.020703554415536826, | |
| "grad_norm": 0.4841461198775233, | |
| "learning_rate": 2.0512820512820513e-06, | |
| "loss": 0.5454, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.02088677171124954, | |
| "grad_norm": 0.4680411378560794, | |
| "learning_rate": 2.0695970695970696e-06, | |
| "loss": 0.516, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.021069989006962258, | |
| "grad_norm": 0.4426302566203345, | |
| "learning_rate": 2.0879120879120883e-06, | |
| "loss": 0.5493, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.021253206302674972, | |
| "grad_norm": 0.5379521772056074, | |
| "learning_rate": 2.1062271062271066e-06, | |
| "loss": 0.5642, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.021436423598387686, | |
| "grad_norm": 0.42644965401865687, | |
| "learning_rate": 2.1245421245421245e-06, | |
| "loss": 0.564, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.021619640894100404, | |
| "grad_norm": 1.3341683924694292, | |
| "learning_rate": 2.1428571428571427e-06, | |
| "loss": 0.5831, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.021802858189813118, | |
| "grad_norm": 0.5613781478585862, | |
| "learning_rate": 2.1611721611721614e-06, | |
| "loss": 0.5598, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.021986075485525832, | |
| "grad_norm": 0.4389899980262906, | |
| "learning_rate": 2.1794871794871797e-06, | |
| "loss": 0.5498, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.02216929278123855, | |
| "grad_norm": 0.5433793107490897, | |
| "learning_rate": 2.197802197802198e-06, | |
| "loss": 0.5838, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.022352510076951264, | |
| "grad_norm": 0.49992126724035435, | |
| "learning_rate": 2.2161172161172163e-06, | |
| "loss": 0.5581, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.022535727372663978, | |
| "grad_norm": 0.5192253683114394, | |
| "learning_rate": 2.2344322344322345e-06, | |
| "loss": 0.5963, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.022718944668376696, | |
| "grad_norm": 0.5218720529273816, | |
| "learning_rate": 2.252747252747253e-06, | |
| "loss": 0.5568, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.02290216196408941, | |
| "grad_norm": 0.41135006043138017, | |
| "learning_rate": 2.271062271062271e-06, | |
| "loss": 0.5256, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.023085379259802124, | |
| "grad_norm": 0.4960132808732071, | |
| "learning_rate": 2.2893772893772894e-06, | |
| "loss": 0.5636, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.02326859655551484, | |
| "grad_norm": 0.40294874528313, | |
| "learning_rate": 2.307692307692308e-06, | |
| "loss": 0.5373, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.023451813851227556, | |
| "grad_norm": 0.49129951716432685, | |
| "learning_rate": 2.3260073260073264e-06, | |
| "loss": 0.5571, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.02363503114694027, | |
| "grad_norm": 0.4541626724950761, | |
| "learning_rate": 2.3443223443223446e-06, | |
| "loss": 0.5407, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.023818248442652987, | |
| "grad_norm": 0.44854257290105054, | |
| "learning_rate": 2.362637362637363e-06, | |
| "loss": 0.5516, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.0240014657383657, | |
| "grad_norm": 0.4439039076319426, | |
| "learning_rate": 2.380952380952381e-06, | |
| "loss": 0.5192, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.024184683034078416, | |
| "grad_norm": 0.4410959524536652, | |
| "learning_rate": 2.3992673992673995e-06, | |
| "loss": 0.5678, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.024367900329791133, | |
| "grad_norm": 0.49361868947998544, | |
| "learning_rate": 2.4175824175824177e-06, | |
| "loss": 0.5591, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.024551117625503847, | |
| "grad_norm": 0.5023022664764156, | |
| "learning_rate": 2.435897435897436e-06, | |
| "loss": 0.5818, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.02473433492121656, | |
| "grad_norm": 0.4287249843742935, | |
| "learning_rate": 2.4542124542124543e-06, | |
| "loss": 0.5532, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.02491755221692928, | |
| "grad_norm": 0.43595844702277614, | |
| "learning_rate": 2.472527472527473e-06, | |
| "loss": 0.534, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.025100769512641993, | |
| "grad_norm": 0.5546618594341888, | |
| "learning_rate": 2.4908424908424913e-06, | |
| "loss": 0.5058, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.025283986808354707, | |
| "grad_norm": 0.5017549598118495, | |
| "learning_rate": 2.509157509157509e-06, | |
| "loss": 0.5416, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.025467204104067425, | |
| "grad_norm": 0.47047331776441903, | |
| "learning_rate": 2.5274725274725274e-06, | |
| "loss": 0.5557, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.02565042139978014, | |
| "grad_norm": 0.4881037742451912, | |
| "learning_rate": 2.5457875457875457e-06, | |
| "loss": 0.5494, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.025833638695492853, | |
| "grad_norm": 0.41933017046045795, | |
| "learning_rate": 2.564102564102564e-06, | |
| "loss": 0.5729, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.02601685599120557, | |
| "grad_norm": 0.39974209370594543, | |
| "learning_rate": 2.582417582417583e-06, | |
| "loss": 0.5381, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.026200073286918285, | |
| "grad_norm": 0.4473481461077892, | |
| "learning_rate": 2.600732600732601e-06, | |
| "loss": 0.5336, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.026383290582631, | |
| "grad_norm": 0.45197895778035446, | |
| "learning_rate": 2.6190476190476192e-06, | |
| "loss": 0.5271, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.026566507878343717, | |
| "grad_norm": 0.49529182062519755, | |
| "learning_rate": 2.6373626373626375e-06, | |
| "loss": 0.5468, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.02674972517405643, | |
| "grad_norm": 0.5413416851523152, | |
| "learning_rate": 2.6556776556776558e-06, | |
| "loss": 0.5807, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.026932942469769145, | |
| "grad_norm": 0.4703582853460863, | |
| "learning_rate": 2.673992673992674e-06, | |
| "loss": 0.538, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.027116159765481863, | |
| "grad_norm": 0.4332295907365602, | |
| "learning_rate": 2.6923076923076923e-06, | |
| "loss": 0.5167, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.027299377061194577, | |
| "grad_norm": 0.458616583095158, | |
| "learning_rate": 2.7106227106227106e-06, | |
| "loss": 0.5226, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.02748259435690729, | |
| "grad_norm": 0.43740744109233864, | |
| "learning_rate": 2.728937728937729e-06, | |
| "loss": 0.541, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.02766581165262001, | |
| "grad_norm": 0.5735574364330706, | |
| "learning_rate": 2.7472527472527476e-06, | |
| "loss": 0.5835, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.027849028948332723, | |
| "grad_norm": 0.5055518766772779, | |
| "learning_rate": 2.765567765567766e-06, | |
| "loss": 0.5631, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.028032246244045437, | |
| "grad_norm": 0.43271616931082146, | |
| "learning_rate": 2.783882783882784e-06, | |
| "loss": 0.5426, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.028215463539758154, | |
| "grad_norm": 0.4971463373765023, | |
| "learning_rate": 2.8021978021978024e-06, | |
| "loss": 0.5389, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.02839868083547087, | |
| "grad_norm": 0.4591734918280654, | |
| "learning_rate": 2.8205128205128207e-06, | |
| "loss": 0.5309, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.028581898131183583, | |
| "grad_norm": 0.45247087926212365, | |
| "learning_rate": 2.838827838827839e-06, | |
| "loss": 0.5536, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.0287651154268963, | |
| "grad_norm": 0.46626015341742694, | |
| "learning_rate": 2.8571428571428573e-06, | |
| "loss": 0.559, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.028948332722609015, | |
| "grad_norm": 0.4834854871636071, | |
| "learning_rate": 2.8754578754578755e-06, | |
| "loss": 0.5476, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.02913155001832173, | |
| "grad_norm": 0.570546858013933, | |
| "learning_rate": 2.8937728937728942e-06, | |
| "loss": 0.5394, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.029314767314034446, | |
| "grad_norm": 0.45441480104130544, | |
| "learning_rate": 2.9120879120879125e-06, | |
| "loss": 0.5029, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.02949798460974716, | |
| "grad_norm": 0.5206871098196969, | |
| "learning_rate": 2.930402930402931e-06, | |
| "loss": 0.528, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.029681201905459875, | |
| "grad_norm": 0.43359542229542136, | |
| "learning_rate": 2.948717948717949e-06, | |
| "loss": 0.5502, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.029864419201172592, | |
| "grad_norm": 0.46302617065984364, | |
| "learning_rate": 2.9670329670329673e-06, | |
| "loss": 0.5447, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.030047636496885306, | |
| "grad_norm": 0.3915642271916536, | |
| "learning_rate": 2.9853479853479856e-06, | |
| "loss": 0.503, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.03023085379259802, | |
| "grad_norm": 0.42991246482419715, | |
| "learning_rate": 3.003663003663004e-06, | |
| "loss": 0.521, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.030414071088310738, | |
| "grad_norm": 0.468175424095518, | |
| "learning_rate": 3.021978021978022e-06, | |
| "loss": 0.5101, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.030597288384023452, | |
| "grad_norm": 0.6468735604295471, | |
| "learning_rate": 3.0402930402930405e-06, | |
| "loss": 0.5617, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.030780505679736166, | |
| "grad_norm": 0.5058923699848836, | |
| "learning_rate": 3.058608058608059e-06, | |
| "loss": 0.5154, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.030963722975448884, | |
| "grad_norm": 0.45437537978064513, | |
| "learning_rate": 3.0769230769230774e-06, | |
| "loss": 0.4993, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.031146940271161598, | |
| "grad_norm": 0.467183819204265, | |
| "learning_rate": 3.0952380952380957e-06, | |
| "loss": 0.5701, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.031330157566874316, | |
| "grad_norm": 0.3835674459614267, | |
| "learning_rate": 3.113553113553114e-06, | |
| "loss": 0.4902, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.031513374862587026, | |
| "grad_norm": 0.4292795112150875, | |
| "learning_rate": 3.1318681318681323e-06, | |
| "loss": 0.5237, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.031696592158299744, | |
| "grad_norm": 0.6593051731116806, | |
| "learning_rate": 3.1501831501831505e-06, | |
| "loss": 0.5361, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.03187980945401246, | |
| "grad_norm": 0.4718436483558741, | |
| "learning_rate": 3.1684981684981684e-06, | |
| "loss": 0.5324, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.03206302674972517, | |
| "grad_norm": 0.48018416222395494, | |
| "learning_rate": 3.1868131868131867e-06, | |
| "loss": 0.5273, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.03224624404543789, | |
| "grad_norm": 0.6316028239985647, | |
| "learning_rate": 3.205128205128206e-06, | |
| "loss": 0.5346, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.03242946134115061, | |
| "grad_norm": 0.4596646339699305, | |
| "learning_rate": 3.223443223443224e-06, | |
| "loss": 0.5263, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.03261267863686332, | |
| "grad_norm": 0.6529252419894329, | |
| "learning_rate": 3.2417582417582424e-06, | |
| "loss": 0.5442, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.032795895932576036, | |
| "grad_norm": 0.5538108341969676, | |
| "learning_rate": 3.2600732600732602e-06, | |
| "loss": 0.543, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.03297911322828875, | |
| "grad_norm": 0.46058359751530825, | |
| "learning_rate": 3.2783882783882785e-06, | |
| "loss": 0.5101, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.033162330524001464, | |
| "grad_norm": 0.42238032667898895, | |
| "learning_rate": 3.2967032967032968e-06, | |
| "loss": 0.5487, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.03334554781971418, | |
| "grad_norm": 0.49973876221605035, | |
| "learning_rate": 3.315018315018315e-06, | |
| "loss": 0.5333, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.0335287651154269, | |
| "grad_norm": 0.49185079032879564, | |
| "learning_rate": 3.3333333333333333e-06, | |
| "loss": 0.5733, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.03371198241113961, | |
| "grad_norm": 0.41357500203470565, | |
| "learning_rate": 3.3516483516483516e-06, | |
| "loss": 0.5108, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.03389519970685233, | |
| "grad_norm": 0.45597659979596383, | |
| "learning_rate": 3.3699633699633703e-06, | |
| "loss": 0.5316, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.034078417002565045, | |
| "grad_norm": 0.523873134306111, | |
| "learning_rate": 3.3882783882783886e-06, | |
| "loss": 0.5516, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.034261634298277756, | |
| "grad_norm": 0.4604705346503309, | |
| "learning_rate": 3.406593406593407e-06, | |
| "loss": 0.5463, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.03444485159399047, | |
| "grad_norm": 0.4494660179181277, | |
| "learning_rate": 3.424908424908425e-06, | |
| "loss": 0.5195, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.03462806888970319, | |
| "grad_norm": 0.47161535336220833, | |
| "learning_rate": 3.4432234432234434e-06, | |
| "loss": 0.5495, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.0348112861854159, | |
| "grad_norm": 0.41422931205807795, | |
| "learning_rate": 3.4615384615384617e-06, | |
| "loss": 0.5165, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.03499450348112862, | |
| "grad_norm": 0.48552414748511474, | |
| "learning_rate": 3.47985347985348e-06, | |
| "loss": 0.5207, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.03517772077684134, | |
| "grad_norm": 0.49918231460984896, | |
| "learning_rate": 3.4981684981684982e-06, | |
| "loss": 0.5055, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.03536093807255405, | |
| "grad_norm": 0.5047784031000427, | |
| "learning_rate": 3.516483516483517e-06, | |
| "loss": 0.5541, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.035544155368266765, | |
| "grad_norm": 0.45515393167769386, | |
| "learning_rate": 3.5347985347985352e-06, | |
| "loss": 0.5324, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.03572737266397948, | |
| "grad_norm": 0.4697755766255993, | |
| "learning_rate": 3.5531135531135535e-06, | |
| "loss": 0.5317, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.035910589959692193, | |
| "grad_norm": 0.47670842684148323, | |
| "learning_rate": 3.5714285714285718e-06, | |
| "loss": 0.5331, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.03609380725540491, | |
| "grad_norm": 0.46160363428859325, | |
| "learning_rate": 3.58974358974359e-06, | |
| "loss": 0.5645, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.03627702455111763, | |
| "grad_norm": 0.5456279444043086, | |
| "learning_rate": 3.6080586080586083e-06, | |
| "loss": 0.4923, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.03646024184683034, | |
| "grad_norm": 0.45679324633890633, | |
| "learning_rate": 3.6263736263736266e-06, | |
| "loss": 0.5408, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.03664345914254306, | |
| "grad_norm": 0.4038620329916062, | |
| "learning_rate": 3.644688644688645e-06, | |
| "loss": 0.5542, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.036826676438255775, | |
| "grad_norm": 0.42338470275172285, | |
| "learning_rate": 3.663003663003663e-06, | |
| "loss": 0.5509, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.037009893733968485, | |
| "grad_norm": 0.48862631901451187, | |
| "learning_rate": 3.681318681318682e-06, | |
| "loss": 0.533, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.0371931110296812, | |
| "grad_norm": 0.4339739485667441, | |
| "learning_rate": 3.6996336996337e-06, | |
| "loss": 0.5331, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.03737632832539392, | |
| "grad_norm": 0.4040593227125272, | |
| "learning_rate": 3.7179487179487184e-06, | |
| "loss": 0.568, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.03755954562110663, | |
| "grad_norm": 0.5025361443024049, | |
| "learning_rate": 3.7362637362637367e-06, | |
| "loss": 0.5616, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.03774276291681935, | |
| "grad_norm": 0.43671528407867277, | |
| "learning_rate": 3.754578754578755e-06, | |
| "loss": 0.5468, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.037925980212532066, | |
| "grad_norm": 0.5426409729355149, | |
| "learning_rate": 3.7728937728937733e-06, | |
| "loss": 0.5274, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.03810919750824478, | |
| "grad_norm": 0.4383066270524436, | |
| "learning_rate": 3.7912087912087915e-06, | |
| "loss": 0.5491, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.038292414803957495, | |
| "grad_norm": 0.4541571165503901, | |
| "learning_rate": 3.80952380952381e-06, | |
| "loss": 0.511, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.03847563209967021, | |
| "grad_norm": 0.42914694472434756, | |
| "learning_rate": 3.827838827838828e-06, | |
| "loss": 0.4891, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.03865884939538292, | |
| "grad_norm": 0.4583586099579229, | |
| "learning_rate": 3.846153846153847e-06, | |
| "loss": 0.5359, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.03884206669109564, | |
| "grad_norm": 0.46332307938408596, | |
| "learning_rate": 3.864468864468865e-06, | |
| "loss": 0.506, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.03902528398680836, | |
| "grad_norm": 0.4979093495563886, | |
| "learning_rate": 3.882783882783883e-06, | |
| "loss": 0.5523, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.03920850128252107, | |
| "grad_norm": 0.524225251704003, | |
| "learning_rate": 3.901098901098901e-06, | |
| "loss": 0.543, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.039391718578233786, | |
| "grad_norm": 0.5147235908704881, | |
| "learning_rate": 3.91941391941392e-06, | |
| "loss": 0.5465, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.0395749358739465, | |
| "grad_norm": 0.4170039710750516, | |
| "learning_rate": 3.937728937728938e-06, | |
| "loss": 0.557, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.039758153169659215, | |
| "grad_norm": 0.4459151957597281, | |
| "learning_rate": 3.9560439560439565e-06, | |
| "loss": 0.5176, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.03994137046537193, | |
| "grad_norm": 0.4387028825678643, | |
| "learning_rate": 3.974358974358974e-06, | |
| "loss": 0.5433, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.04012458776108464, | |
| "grad_norm": 0.4606220492260897, | |
| "learning_rate": 3.992673992673993e-06, | |
| "loss": 0.5558, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.04030780505679736, | |
| "grad_norm": 0.43876788524484817, | |
| "learning_rate": 4.010989010989012e-06, | |
| "loss": 0.496, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.04049102235251008, | |
| "grad_norm": 0.5354442317270937, | |
| "learning_rate": 4.0293040293040296e-06, | |
| "loss": 0.4883, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.04067423964822279, | |
| "grad_norm": 0.4471338769246311, | |
| "learning_rate": 4.047619047619048e-06, | |
| "loss": 0.5248, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.040857456943935506, | |
| "grad_norm": 0.4728450428012797, | |
| "learning_rate": 4.065934065934066e-06, | |
| "loss": 0.5412, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.041040674239648224, | |
| "grad_norm": 0.5021628515290991, | |
| "learning_rate": 4.084249084249085e-06, | |
| "loss": 0.5398, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.041223891535360935, | |
| "grad_norm": 0.379469323319607, | |
| "learning_rate": 4.102564102564103e-06, | |
| "loss": 0.5484, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.04140710883107365, | |
| "grad_norm": 0.4682923741442823, | |
| "learning_rate": 4.120879120879121e-06, | |
| "loss": 0.5221, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.04159032612678637, | |
| "grad_norm": 0.5298230539031403, | |
| "learning_rate": 4.139194139194139e-06, | |
| "loss": 0.5248, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.04177354342249908, | |
| "grad_norm": 0.448919135267925, | |
| "learning_rate": 4.157509157509158e-06, | |
| "loss": 0.5492, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.0419567607182118, | |
| "grad_norm": 0.4651550006247672, | |
| "learning_rate": 4.175824175824177e-06, | |
| "loss": 0.5244, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.042139978013924516, | |
| "grad_norm": 0.46885786974024124, | |
| "learning_rate": 4.1941391941391945e-06, | |
| "loss": 0.5145, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.04232319530963723, | |
| "grad_norm": 0.9939588576814024, | |
| "learning_rate": 4.212454212454213e-06, | |
| "loss": 0.507, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.042506412605349944, | |
| "grad_norm": 0.42445387506164906, | |
| "learning_rate": 4.230769230769231e-06, | |
| "loss": 0.528, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.04268962990106266, | |
| "grad_norm": 0.4386077325175301, | |
| "learning_rate": 4.249084249084249e-06, | |
| "loss": 0.5235, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.04287284719677537, | |
| "grad_norm": 0.4275787850644743, | |
| "learning_rate": 4.267399267399268e-06, | |
| "loss": 0.5268, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.04305606449248809, | |
| "grad_norm": 0.3819534900324145, | |
| "learning_rate": 4.2857142857142855e-06, | |
| "loss": 0.5108, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.04323928178820081, | |
| "grad_norm": 0.5069196499403993, | |
| "learning_rate": 4.304029304029305e-06, | |
| "loss": 0.4914, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.04342249908391352, | |
| "grad_norm": 0.4224671659155117, | |
| "learning_rate": 4.322344322344323e-06, | |
| "loss": 0.531, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.043605716379626236, | |
| "grad_norm": 0.49892435842001814, | |
| "learning_rate": 4.340659340659341e-06, | |
| "loss": 0.5309, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.043788933675338954, | |
| "grad_norm": 0.4435906661481072, | |
| "learning_rate": 4.358974358974359e-06, | |
| "loss": 0.5386, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.043972150971051664, | |
| "grad_norm": 0.4646701721197805, | |
| "learning_rate": 4.377289377289377e-06, | |
| "loss": 0.5445, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.04415536826676438, | |
| "grad_norm": 0.42345412346700445, | |
| "learning_rate": 4.395604395604396e-06, | |
| "loss": 0.5171, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.0443385855624771, | |
| "grad_norm": 0.4664513677280782, | |
| "learning_rate": 4.413919413919414e-06, | |
| "loss": 0.5471, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.04452180285818981, | |
| "grad_norm": 0.5007403600133091, | |
| "learning_rate": 4.4322344322344325e-06, | |
| "loss": 0.5224, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.04470502015390253, | |
| "grad_norm": 0.4202775330369337, | |
| "learning_rate": 4.45054945054945e-06, | |
| "loss": 0.5283, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.044888237449615245, | |
| "grad_norm": 0.485312343325994, | |
| "learning_rate": 4.468864468864469e-06, | |
| "loss": 0.5365, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.045071454745327956, | |
| "grad_norm": 0.4796744192562041, | |
| "learning_rate": 4.487179487179488e-06, | |
| "loss": 0.5481, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.045254672041040674, | |
| "grad_norm": 0.4699525411145978, | |
| "learning_rate": 4.505494505494506e-06, | |
| "loss": 0.526, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 0.04543788933675339, | |
| "grad_norm": 0.37235134296143163, | |
| "learning_rate": 4.523809523809524e-06, | |
| "loss": 0.4929, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.0456211066324661, | |
| "grad_norm": 0.4594065464998237, | |
| "learning_rate": 4.542124542124542e-06, | |
| "loss": 0.5435, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.04580432392817882, | |
| "grad_norm": 0.4812140084395847, | |
| "learning_rate": 4.560439560439561e-06, | |
| "loss": 0.4714, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.04598754122389154, | |
| "grad_norm": 0.44275937617791644, | |
| "learning_rate": 4.578754578754579e-06, | |
| "loss": 0.5289, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 0.04617075851960425, | |
| "grad_norm": 0.543477861772032, | |
| "learning_rate": 4.5970695970695975e-06, | |
| "loss": 0.5176, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.046353975815316965, | |
| "grad_norm": 0.9195336625704912, | |
| "learning_rate": 4.615384615384616e-06, | |
| "loss": 0.5315, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 0.04653719311102968, | |
| "grad_norm": 0.4922315670719196, | |
| "learning_rate": 4.633699633699634e-06, | |
| "loss": 0.5364, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.046720410406742394, | |
| "grad_norm": 0.47361690353516367, | |
| "learning_rate": 4.652014652014653e-06, | |
| "loss": 0.5591, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.04690362770245511, | |
| "grad_norm": 0.5164779414047217, | |
| "learning_rate": 4.6703296703296706e-06, | |
| "loss": 0.5538, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.04708684499816783, | |
| "grad_norm": 0.421397036777767, | |
| "learning_rate": 4.688644688644689e-06, | |
| "loss": 0.5338, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 0.04727006229388054, | |
| "grad_norm": 0.5053871259325204, | |
| "learning_rate": 4.706959706959707e-06, | |
| "loss": 0.544, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.04745327958959326, | |
| "grad_norm": 0.4607447877406368, | |
| "learning_rate": 4.725274725274726e-06, | |
| "loss": 0.4812, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 0.047636496885305975, | |
| "grad_norm": 0.4875422302168998, | |
| "learning_rate": 4.743589743589744e-06, | |
| "loss": 0.5614, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.047819714181018685, | |
| "grad_norm": 0.41579068200919733, | |
| "learning_rate": 4.761904761904762e-06, | |
| "loss": 0.4729, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 0.0480029314767314, | |
| "grad_norm": 0.46282242693556186, | |
| "learning_rate": 4.780219780219781e-06, | |
| "loss": 0.5224, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.04818614877244412, | |
| "grad_norm": 0.4482830307148575, | |
| "learning_rate": 4.798534798534799e-06, | |
| "loss": 0.5326, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 0.04836936606815683, | |
| "grad_norm": 0.4342637761169385, | |
| "learning_rate": 4.816849816849818e-06, | |
| "loss": 0.5056, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.04855258336386955, | |
| "grad_norm": 0.42342307710917526, | |
| "learning_rate": 4.8351648351648355e-06, | |
| "loss": 0.5049, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.04873580065958227, | |
| "grad_norm": 0.41567729548709964, | |
| "learning_rate": 4.853479853479854e-06, | |
| "loss": 0.5313, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.04891901795529498, | |
| "grad_norm": 0.44877730848158315, | |
| "learning_rate": 4.871794871794872e-06, | |
| "loss": 0.5027, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 0.049102235251007695, | |
| "grad_norm": 0.46298601715996757, | |
| "learning_rate": 4.890109890109891e-06, | |
| "loss": 0.5418, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.04928545254672041, | |
| "grad_norm": 0.4233152082129357, | |
| "learning_rate": 4.908424908424909e-06, | |
| "loss": 0.534, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 0.04946866984243312, | |
| "grad_norm": 0.42327618076780654, | |
| "learning_rate": 4.926739926739927e-06, | |
| "loss": 0.5073, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.04965188713814584, | |
| "grad_norm": 0.47132160003804374, | |
| "learning_rate": 4.945054945054946e-06, | |
| "loss": 0.564, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 0.04983510443385856, | |
| "grad_norm": 0.467395758671848, | |
| "learning_rate": 4.963369963369964e-06, | |
| "loss": 0.5194, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.05001832172957127, | |
| "grad_norm": 0.4377241688268797, | |
| "learning_rate": 4.9816849816849826e-06, | |
| "loss": 0.5473, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 0.05020153902528399, | |
| "grad_norm": 0.4344310773487788, | |
| "learning_rate": 5e-06, | |
| "loss": 0.5198, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.050384756320996704, | |
| "grad_norm": 0.505691771505538, | |
| "learning_rate": 5.018315018315018e-06, | |
| "loss": 0.5001, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.050567973616709415, | |
| "grad_norm": 0.4741702696479342, | |
| "learning_rate": 5.036630036630037e-06, | |
| "loss": 0.5269, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.05075119091242213, | |
| "grad_norm": 0.4123760853657366, | |
| "learning_rate": 5.054945054945055e-06, | |
| "loss": 0.5366, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 0.05093440820813485, | |
| "grad_norm": 0.5165952852785715, | |
| "learning_rate": 5.0732600732600735e-06, | |
| "loss": 0.5629, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.05111762550384756, | |
| "grad_norm": 0.6018743338441076, | |
| "learning_rate": 5.091575091575091e-06, | |
| "loss": 0.5268, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.05130084279956028, | |
| "grad_norm": 0.4647905088113548, | |
| "learning_rate": 5.10989010989011e-06, | |
| "loss": 0.5353, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.051484060095272996, | |
| "grad_norm": 0.4666862939383661, | |
| "learning_rate": 5.128205128205128e-06, | |
| "loss": 0.5582, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 0.05166727739098571, | |
| "grad_norm": 0.42295571485014016, | |
| "learning_rate": 5.146520146520147e-06, | |
| "loss": 0.5389, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.051850494686698424, | |
| "grad_norm": 0.5088982589681916, | |
| "learning_rate": 5.164835164835166e-06, | |
| "loss": 0.5114, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 0.05203371198241114, | |
| "grad_norm": 0.39252264391052066, | |
| "learning_rate": 5.183150183150184e-06, | |
| "loss": 0.5078, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.05221692927812385, | |
| "grad_norm": 0.47464881831711925, | |
| "learning_rate": 5.201465201465202e-06, | |
| "loss": 0.5719, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.05240014657383657, | |
| "grad_norm": 0.42811181510690394, | |
| "learning_rate": 5.219780219780221e-06, | |
| "loss": 0.536, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.05258336386954929, | |
| "grad_norm": 0.4983761768332983, | |
| "learning_rate": 5.2380952380952384e-06, | |
| "loss": 0.5454, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 0.052766581165262, | |
| "grad_norm": 0.3871752405078846, | |
| "learning_rate": 5.256410256410257e-06, | |
| "loss": 0.5172, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.052949798460974716, | |
| "grad_norm": 0.44878294094458826, | |
| "learning_rate": 5.274725274725275e-06, | |
| "loss": 0.4965, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 0.053133015756687434, | |
| "grad_norm": 0.4193937449864018, | |
| "learning_rate": 5.293040293040294e-06, | |
| "loss": 0.5418, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.053316233052400144, | |
| "grad_norm": 0.47326184468203625, | |
| "learning_rate": 5.3113553113553116e-06, | |
| "loss": 0.5465, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 0.05349945034811286, | |
| "grad_norm": 0.46890596965933473, | |
| "learning_rate": 5.32967032967033e-06, | |
| "loss": 0.5427, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.05368266764382558, | |
| "grad_norm": 0.4718404244115825, | |
| "learning_rate": 5.347985347985348e-06, | |
| "loss": 0.5315, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 0.05386588493953829, | |
| "grad_norm": 0.5313479853203268, | |
| "learning_rate": 5.366300366300367e-06, | |
| "loss": 0.5261, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.05404910223525101, | |
| "grad_norm": 0.4619862699623299, | |
| "learning_rate": 5.384615384615385e-06, | |
| "loss": 0.5224, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.054232319530963725, | |
| "grad_norm": 0.4235637361399875, | |
| "learning_rate": 5.402930402930403e-06, | |
| "loss": 0.5484, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.054415536826676436, | |
| "grad_norm": 0.4826898937600368, | |
| "learning_rate": 5.421245421245421e-06, | |
| "loss": 0.524, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 0.054598754122389154, | |
| "grad_norm": 0.43904512964940123, | |
| "learning_rate": 5.43956043956044e-06, | |
| "loss": 0.5486, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.05478197141810187, | |
| "grad_norm": 0.5045757484045217, | |
| "learning_rate": 5.457875457875458e-06, | |
| "loss": 0.5407, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 0.05496518871381458, | |
| "grad_norm": 0.47829971819207484, | |
| "learning_rate": 5.476190476190477e-06, | |
| "loss": 0.5344, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.0551484060095273, | |
| "grad_norm": 0.416644246441645, | |
| "learning_rate": 5.494505494505495e-06, | |
| "loss": 0.5111, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 0.05533162330524002, | |
| "grad_norm": 0.488275746902462, | |
| "learning_rate": 5.512820512820514e-06, | |
| "loss": 0.54, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.05551484060095273, | |
| "grad_norm": 0.43082352297647686, | |
| "learning_rate": 5.531135531135532e-06, | |
| "loss": 0.5219, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 0.055698057896665445, | |
| "grad_norm": 0.41708996725660685, | |
| "learning_rate": 5.5494505494505504e-06, | |
| "loss": 0.5272, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.05588127519237816, | |
| "grad_norm": 0.4748217492221608, | |
| "learning_rate": 5.567765567765568e-06, | |
| "loss": 0.5439, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.056064492488090874, | |
| "grad_norm": 0.5257169187612324, | |
| "learning_rate": 5.586080586080587e-06, | |
| "loss": 0.5671, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.05624770978380359, | |
| "grad_norm": 0.4243472668028098, | |
| "learning_rate": 5.604395604395605e-06, | |
| "loss": 0.5044, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 0.05643092707951631, | |
| "grad_norm": 0.43876355547814727, | |
| "learning_rate": 5.6227106227106235e-06, | |
| "loss": 0.4815, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.05661414437522902, | |
| "grad_norm": 0.43342398130791976, | |
| "learning_rate": 5.641025641025641e-06, | |
| "loss": 0.5308, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 0.05679736167094174, | |
| "grad_norm": 0.4660705177035744, | |
| "learning_rate": 5.65934065934066e-06, | |
| "loss": 0.5686, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.056980578966654455, | |
| "grad_norm": 0.4170384834874546, | |
| "learning_rate": 5.677655677655678e-06, | |
| "loss": 0.4965, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 0.057163796262367166, | |
| "grad_norm": 0.4568771189697619, | |
| "learning_rate": 5.695970695970696e-06, | |
| "loss": 0.5074, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.05734701355807988, | |
| "grad_norm": 0.4666988354433752, | |
| "learning_rate": 5.7142857142857145e-06, | |
| "loss": 0.5225, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 0.0575302308537926, | |
| "grad_norm": 0.38305953336818827, | |
| "learning_rate": 5.732600732600732e-06, | |
| "loss": 0.5057, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.05771344814950531, | |
| "grad_norm": 0.8971752127635111, | |
| "learning_rate": 5.750915750915751e-06, | |
| "loss": 0.5556, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.05789666544521803, | |
| "grad_norm": 0.4580428245754137, | |
| "learning_rate": 5.769230769230769e-06, | |
| "loss": 0.5344, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.05807988274093075, | |
| "grad_norm": 0.414647309534276, | |
| "learning_rate": 5.7875457875457885e-06, | |
| "loss": 0.5177, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 0.05826310003664346, | |
| "grad_norm": 0.45495854796733415, | |
| "learning_rate": 5.805860805860807e-06, | |
| "loss": 0.5214, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.058446317332356175, | |
| "grad_norm": 0.47098300998769715, | |
| "learning_rate": 5.824175824175825e-06, | |
| "loss": 0.5519, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 0.05862953462806889, | |
| "grad_norm": 0.43335718768494447, | |
| "learning_rate": 5.842490842490844e-06, | |
| "loss": 0.5282, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.0588127519237816, | |
| "grad_norm": 0.4649559782288625, | |
| "learning_rate": 5.860805860805862e-06, | |
| "loss": 0.5163, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 0.05899596921949432, | |
| "grad_norm": 0.44623794630893965, | |
| "learning_rate": 5.8791208791208794e-06, | |
| "loss": 0.514, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.05917918651520704, | |
| "grad_norm": 0.4521538486948526, | |
| "learning_rate": 5.897435897435898e-06, | |
| "loss": 0.5248, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 0.05936240381091975, | |
| "grad_norm": 0.453853173278213, | |
| "learning_rate": 5.915750915750916e-06, | |
| "loss": 0.5105, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.05954562110663247, | |
| "grad_norm": 0.45615939787874893, | |
| "learning_rate": 5.934065934065935e-06, | |
| "loss": 0.5321, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.059728838402345184, | |
| "grad_norm": 0.5103507598238842, | |
| "learning_rate": 5.9523809523809525e-06, | |
| "loss": 0.4889, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.059912055698057895, | |
| "grad_norm": 0.4755868373014201, | |
| "learning_rate": 5.970695970695971e-06, | |
| "loss": 0.5233, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 0.06009527299377061, | |
| "grad_norm": 0.48186711326734255, | |
| "learning_rate": 5.989010989010989e-06, | |
| "loss": 0.5216, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.06027849028948333, | |
| "grad_norm": 0.406290487409726, | |
| "learning_rate": 6.007326007326008e-06, | |
| "loss": 0.5048, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 0.06046170758519604, | |
| "grad_norm": 0.47965200235867606, | |
| "learning_rate": 6.025641025641026e-06, | |
| "loss": 0.513, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.06064492488090876, | |
| "grad_norm": 0.492103246179344, | |
| "learning_rate": 6.043956043956044e-06, | |
| "loss": 0.5553, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 0.060828142176621476, | |
| "grad_norm": 0.49171785130479284, | |
| "learning_rate": 6.062271062271062e-06, | |
| "loss": 0.5289, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.06101135947233419, | |
| "grad_norm": 0.4659535600554528, | |
| "learning_rate": 6.080586080586081e-06, | |
| "loss": 0.5143, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 0.061194576768046904, | |
| "grad_norm": 0.43136677550682173, | |
| "learning_rate": 6.0989010989011e-06, | |
| "loss": 0.469, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.06137779406375962, | |
| "grad_norm": 0.6094192173431031, | |
| "learning_rate": 6.117216117216118e-06, | |
| "loss": 0.5121, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.06156101135947233, | |
| "grad_norm": 0.45352827270619606, | |
| "learning_rate": 6.135531135531136e-06, | |
| "loss": 0.5097, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.06174422865518505, | |
| "grad_norm": 0.42626268219917746, | |
| "learning_rate": 6.153846153846155e-06, | |
| "loss": 0.5234, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 0.06192744595089777, | |
| "grad_norm": 0.4438479757326601, | |
| "learning_rate": 6.172161172161173e-06, | |
| "loss": 0.4852, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.06211066324661048, | |
| "grad_norm": 0.688188614062373, | |
| "learning_rate": 6.1904761904761914e-06, | |
| "loss": 0.502, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 0.062293880542323196, | |
| "grad_norm": 0.5160627484540854, | |
| "learning_rate": 6.208791208791209e-06, | |
| "loss": 0.521, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.062477097838035914, | |
| "grad_norm": 0.4356067335955304, | |
| "learning_rate": 6.227106227106228e-06, | |
| "loss": 0.4918, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 0.06266031513374863, | |
| "grad_norm": 0.5001791021027777, | |
| "learning_rate": 6.245421245421246e-06, | |
| "loss": 0.5173, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.06284353242946134, | |
| "grad_norm": 0.45621279623031163, | |
| "learning_rate": 6.2637362637362645e-06, | |
| "loss": 0.5532, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 0.06302674972517405, | |
| "grad_norm": 0.4686583276600699, | |
| "learning_rate": 6.282051282051282e-06, | |
| "loss": 0.5544, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.06320996702088677, | |
| "grad_norm": 0.4500457038114704, | |
| "learning_rate": 6.300366300366301e-06, | |
| "loss": 0.5094, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.06339318431659949, | |
| "grad_norm": 0.4707435610591325, | |
| "learning_rate": 6.318681318681319e-06, | |
| "loss": 0.5317, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.0635764016123122, | |
| "grad_norm": 0.44910822533973516, | |
| "learning_rate": 6.336996336996337e-06, | |
| "loss": 0.5063, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 0.06375961890802492, | |
| "grad_norm": 0.4262957774336457, | |
| "learning_rate": 6.3553113553113555e-06, | |
| "loss": 0.5389, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.06394283620373763, | |
| "grad_norm": 0.4958715530578741, | |
| "learning_rate": 6.373626373626373e-06, | |
| "loss": 0.5253, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 0.06412605349945034, | |
| "grad_norm": 0.4706503863353741, | |
| "learning_rate": 6.391941391941392e-06, | |
| "loss": 0.5078, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.06430927079516306, | |
| "grad_norm": 0.5025375698465898, | |
| "learning_rate": 6.410256410256412e-06, | |
| "loss": 0.5366, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 0.06449248809087578, | |
| "grad_norm": 0.43307402952194485, | |
| "learning_rate": 6.4285714285714295e-06, | |
| "loss": 0.5053, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.0646757053865885, | |
| "grad_norm": 0.4145957842766705, | |
| "learning_rate": 6.446886446886448e-06, | |
| "loss": 0.5134, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 0.06485892268230121, | |
| "grad_norm": 0.42558146560441634, | |
| "learning_rate": 6.465201465201466e-06, | |
| "loss": 0.498, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.06504213997801392, | |
| "grad_norm": 0.6808736114735602, | |
| "learning_rate": 6.483516483516485e-06, | |
| "loss": 0.4865, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.06522535727372664, | |
| "grad_norm": 0.4858578720351213, | |
| "learning_rate": 6.5018315018315026e-06, | |
| "loss": 0.4977, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.06540857456943935, | |
| "grad_norm": 0.5523209851617275, | |
| "learning_rate": 6.5201465201465204e-06, | |
| "loss": 0.5278, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 0.06559179186515207, | |
| "grad_norm": 0.4756243917905379, | |
| "learning_rate": 6.538461538461539e-06, | |
| "loss": 0.5018, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.06577500916086479, | |
| "grad_norm": 0.5270789359516691, | |
| "learning_rate": 6.556776556776557e-06, | |
| "loss": 0.5127, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 0.0659582264565775, | |
| "grad_norm": 0.5213313487503423, | |
| "learning_rate": 6.575091575091576e-06, | |
| "loss": 0.504, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.06614144375229021, | |
| "grad_norm": 0.47699985237826076, | |
| "learning_rate": 6.5934065934065935e-06, | |
| "loss": 0.5356, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 0.06632466104800293, | |
| "grad_norm": 0.4398115760336562, | |
| "learning_rate": 6.611721611721612e-06, | |
| "loss": 0.5038, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.06650787834371565, | |
| "grad_norm": 0.5848808647247892, | |
| "learning_rate": 6.63003663003663e-06, | |
| "loss": 0.5393, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 0.06669109563942836, | |
| "grad_norm": 0.40297449884724584, | |
| "learning_rate": 6.648351648351649e-06, | |
| "loss": 0.5231, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.06687431293514108, | |
| "grad_norm": 0.4615954062532406, | |
| "learning_rate": 6.666666666666667e-06, | |
| "loss": 0.5017, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.0670575302308538, | |
| "grad_norm": 0.4204668889117905, | |
| "learning_rate": 6.684981684981685e-06, | |
| "loss": 0.5651, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.0672407475265665, | |
| "grad_norm": 0.4765525618556606, | |
| "learning_rate": 6.703296703296703e-06, | |
| "loss": 0.5253, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 0.06742396482227922, | |
| "grad_norm": 0.431663431335195, | |
| "learning_rate": 6.721611721611723e-06, | |
| "loss": 0.5398, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.06760718211799194, | |
| "grad_norm": 0.4403652862248983, | |
| "learning_rate": 6.739926739926741e-06, | |
| "loss": 0.5252, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 0.06779039941370466, | |
| "grad_norm": 0.483725795531489, | |
| "learning_rate": 6.758241758241759e-06, | |
| "loss": 0.5349, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.06797361670941737, | |
| "grad_norm": 0.8892216780264888, | |
| "learning_rate": 6.776556776556777e-06, | |
| "loss": 0.5184, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 0.06815683400513009, | |
| "grad_norm": 0.48775432933817636, | |
| "learning_rate": 6.794871794871796e-06, | |
| "loss": 0.5275, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.0683400513008428, | |
| "grad_norm": 0.4772597017011689, | |
| "learning_rate": 6.813186813186814e-06, | |
| "loss": 0.5517, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 0.06852326859655551, | |
| "grad_norm": 0.46418441159832313, | |
| "learning_rate": 6.831501831501832e-06, | |
| "loss": 0.5429, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.06870648589226823, | |
| "grad_norm": 0.45474945807496187, | |
| "learning_rate": 6.84981684981685e-06, | |
| "loss": 0.4999, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.06888970318798095, | |
| "grad_norm": 0.5576640330118253, | |
| "learning_rate": 6.868131868131869e-06, | |
| "loss": 0.5198, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.06907292048369366, | |
| "grad_norm": 0.4921371781399045, | |
| "learning_rate": 6.886446886446887e-06, | |
| "loss": 0.5379, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 0.06925613777940638, | |
| "grad_norm": 0.41639097386056484, | |
| "learning_rate": 6.9047619047619055e-06, | |
| "loss": 0.519, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.06943935507511909, | |
| "grad_norm": 0.4778909271586564, | |
| "learning_rate": 6.923076923076923e-06, | |
| "loss": 0.5336, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 0.0696225723708318, | |
| "grad_norm": 0.4735414706699025, | |
| "learning_rate": 6.941391941391942e-06, | |
| "loss": 0.5, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.06980578966654452, | |
| "grad_norm": 0.5646322357431656, | |
| "learning_rate": 6.95970695970696e-06, | |
| "loss": 0.5073, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 0.06998900696225724, | |
| "grad_norm": 0.48231469572609614, | |
| "learning_rate": 6.978021978021979e-06, | |
| "loss": 0.519, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.07017222425796996, | |
| "grad_norm": 0.5051213072897166, | |
| "learning_rate": 6.9963369963369965e-06, | |
| "loss": 0.5315, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 0.07035544155368267, | |
| "grad_norm": 0.5166250853237789, | |
| "learning_rate": 7.014652014652014e-06, | |
| "loss": 0.4978, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.07053865884939538, | |
| "grad_norm": 0.43900982727967125, | |
| "learning_rate": 7.032967032967034e-06, | |
| "loss": 0.4913, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.0707218761451081, | |
| "grad_norm": 0.5262819691743885, | |
| "learning_rate": 7.051282051282053e-06, | |
| "loss": 0.5189, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.07090509344082081, | |
| "grad_norm": 0.4906825009780104, | |
| "learning_rate": 7.0695970695970705e-06, | |
| "loss": 0.5586, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 0.07108831073653353, | |
| "grad_norm": 0.48825547184252527, | |
| "learning_rate": 7.087912087912089e-06, | |
| "loss": 0.5218, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.07127152803224625, | |
| "grad_norm": 0.5005437189224704, | |
| "learning_rate": 7.106227106227107e-06, | |
| "loss": 0.5088, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 0.07145474532795897, | |
| "grad_norm": 0.4820090004987099, | |
| "learning_rate": 7.124542124542126e-06, | |
| "loss": 0.5132, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.07163796262367167, | |
| "grad_norm": 0.46585258023856246, | |
| "learning_rate": 7.1428571428571436e-06, | |
| "loss": 0.5198, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 0.07182117991938439, | |
| "grad_norm": 0.5351346796257885, | |
| "learning_rate": 7.161172161172162e-06, | |
| "loss": 0.5174, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.0720043972150971, | |
| "grad_norm": 0.505980784032641, | |
| "learning_rate": 7.17948717948718e-06, | |
| "loss": 0.517, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 0.07218761451080982, | |
| "grad_norm": 0.3991493735258601, | |
| "learning_rate": 7.197802197802198e-06, | |
| "loss": 0.5193, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.07237083180652254, | |
| "grad_norm": 0.47050897190858826, | |
| "learning_rate": 7.216117216117217e-06, | |
| "loss": 0.5278, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.07255404910223526, | |
| "grad_norm": 0.42709223237684557, | |
| "learning_rate": 7.2344322344322345e-06, | |
| "loss": 0.5233, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.07273726639794796, | |
| "grad_norm": 0.40701893214503493, | |
| "learning_rate": 7.252747252747253e-06, | |
| "loss": 0.4594, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 0.07292048369366068, | |
| "grad_norm": 0.4793723507064191, | |
| "learning_rate": 7.271062271062271e-06, | |
| "loss": 0.5022, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.0731037009893734, | |
| "grad_norm": 0.4084855491847111, | |
| "learning_rate": 7.28937728937729e-06, | |
| "loss": 0.5177, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 0.07328691828508611, | |
| "grad_norm": 0.48791204362548757, | |
| "learning_rate": 7.307692307692308e-06, | |
| "loss": 0.5336, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.07347013558079883, | |
| "grad_norm": 0.4721917787188149, | |
| "learning_rate": 7.326007326007326e-06, | |
| "loss": 0.5291, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 0.07365335287651155, | |
| "grad_norm": 0.4482564665450733, | |
| "learning_rate": 7.344322344322346e-06, | |
| "loss": 0.516, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.07383657017222425, | |
| "grad_norm": 0.503350703275625, | |
| "learning_rate": 7.362637362637364e-06, | |
| "loss": 0.5232, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 0.07401978746793697, | |
| "grad_norm": 0.5152368271826678, | |
| "learning_rate": 7.380952380952382e-06, | |
| "loss": 0.5258, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.07420300476364969, | |
| "grad_norm": 0.4523690267979874, | |
| "learning_rate": 7.3992673992674e-06, | |
| "loss": 0.5007, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.0743862220593624, | |
| "grad_norm": 0.44122338596098665, | |
| "learning_rate": 7.417582417582418e-06, | |
| "loss": 0.5325, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.07456943935507512, | |
| "grad_norm": 0.418021168922302, | |
| "learning_rate": 7.435897435897437e-06, | |
| "loss": 0.5048, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 0.07475265665078784, | |
| "grad_norm": 0.44537098554090276, | |
| "learning_rate": 7.454212454212455e-06, | |
| "loss": 0.5294, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.07493587394650054, | |
| "grad_norm": 0.41384101894590325, | |
| "learning_rate": 7.472527472527473e-06, | |
| "loss": 0.5119, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 0.07511909124221326, | |
| "grad_norm": 0.4647470180215214, | |
| "learning_rate": 7.490842490842491e-06, | |
| "loss": 0.5202, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.07530230853792598, | |
| "grad_norm": 0.4836409216489063, | |
| "learning_rate": 7.50915750915751e-06, | |
| "loss": 0.5056, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 0.0754855258336387, | |
| "grad_norm": 0.45195196209865357, | |
| "learning_rate": 7.527472527472528e-06, | |
| "loss": 0.5213, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.07566874312935142, | |
| "grad_norm": 0.5036829282617328, | |
| "learning_rate": 7.5457875457875465e-06, | |
| "loss": 0.5347, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 0.07585196042506413, | |
| "grad_norm": 0.4666038283310089, | |
| "learning_rate": 7.564102564102564e-06, | |
| "loss": 0.5151, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.07603517772077684, | |
| "grad_norm": 0.5067769344340564, | |
| "learning_rate": 7.582417582417583e-06, | |
| "loss": 0.5171, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.07621839501648955, | |
| "grad_norm": 0.4093230258713508, | |
| "learning_rate": 7.600732600732601e-06, | |
| "loss": 0.5039, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.07640161231220227, | |
| "grad_norm": 0.6498006485071868, | |
| "learning_rate": 7.61904761904762e-06, | |
| "loss": 0.5232, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 0.07658482960791499, | |
| "grad_norm": 0.4558152414900924, | |
| "learning_rate": 7.637362637362638e-06, | |
| "loss": 0.5232, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.0767680469036277, | |
| "grad_norm": 0.4804930639066785, | |
| "learning_rate": 7.655677655677656e-06, | |
| "loss": 0.5088, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 0.07695126419934042, | |
| "grad_norm": 0.5081120095319347, | |
| "learning_rate": 7.673992673992676e-06, | |
| "loss": 0.5308, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.07713448149505313, | |
| "grad_norm": 0.42393212923875745, | |
| "learning_rate": 7.692307692307694e-06, | |
| "loss": 0.5084, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 0.07731769879076585, | |
| "grad_norm": 0.44672075335716155, | |
| "learning_rate": 7.710622710622711e-06, | |
| "loss": 0.483, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.07750091608647856, | |
| "grad_norm": 0.4942211324726324, | |
| "learning_rate": 7.72893772893773e-06, | |
| "loss": 0.5236, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 0.07768413338219128, | |
| "grad_norm": 0.39817279842841763, | |
| "learning_rate": 7.747252747252749e-06, | |
| "loss": 0.5332, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.077867350677904, | |
| "grad_norm": 0.4608671155196199, | |
| "learning_rate": 7.765567765567767e-06, | |
| "loss": 0.523, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.07805056797361672, | |
| "grad_norm": 0.5225991254379231, | |
| "learning_rate": 7.783882783882785e-06, | |
| "loss": 0.5171, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.07823378526932942, | |
| "grad_norm": 0.5423490196220466, | |
| "learning_rate": 7.802197802197802e-06, | |
| "loss": 0.4985, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 0.07841700256504214, | |
| "grad_norm": 0.502419506962013, | |
| "learning_rate": 7.820512820512822e-06, | |
| "loss": 0.5087, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.07860021986075486, | |
| "grad_norm": 0.5102984182578104, | |
| "learning_rate": 7.83882783882784e-06, | |
| "loss": 0.5358, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 0.07878343715646757, | |
| "grad_norm": 0.4742126281062651, | |
| "learning_rate": 7.857142857142858e-06, | |
| "loss": 0.5585, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.07896665445218029, | |
| "grad_norm": 0.508787409495283, | |
| "learning_rate": 7.875457875457876e-06, | |
| "loss": 0.5411, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 0.079149871747893, | |
| "grad_norm": 0.4559258370924022, | |
| "learning_rate": 7.893772893772893e-06, | |
| "loss": 0.5229, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.07933308904360571, | |
| "grad_norm": 0.6320276655690735, | |
| "learning_rate": 7.912087912087913e-06, | |
| "loss": 0.4688, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 0.07951630633931843, | |
| "grad_norm": 0.9797165625621964, | |
| "learning_rate": 7.93040293040293e-06, | |
| "loss": 0.5372, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.07969952363503115, | |
| "grad_norm": 0.43648526458810954, | |
| "learning_rate": 7.948717948717949e-06, | |
| "loss": 0.4528, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.07988274093074386, | |
| "grad_norm": 0.46101717931557235, | |
| "learning_rate": 7.967032967032966e-06, | |
| "loss": 0.4836, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.08006595822645658, | |
| "grad_norm": 0.46019219132505085, | |
| "learning_rate": 7.985347985347986e-06, | |
| "loss": 0.4874, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 0.08024917552216929, | |
| "grad_norm": 0.4228905699110607, | |
| "learning_rate": 8.003663003663006e-06, | |
| "loss": 0.483, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.080432392817882, | |
| "grad_norm": 0.4920935432281776, | |
| "learning_rate": 8.021978021978023e-06, | |
| "loss": 0.489, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 0.08061561011359472, | |
| "grad_norm": 0.45385267234753407, | |
| "learning_rate": 8.040293040293041e-06, | |
| "loss": 0.5262, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.08079882740930744, | |
| "grad_norm": 0.48390990285633756, | |
| "learning_rate": 8.058608058608059e-06, | |
| "loss": 0.4969, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 0.08098204470502016, | |
| "grad_norm": 0.4424118373260341, | |
| "learning_rate": 8.076923076923077e-06, | |
| "loss": 0.5217, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.08116526200073287, | |
| "grad_norm": 0.4647327426734162, | |
| "learning_rate": 8.095238095238097e-06, | |
| "loss": 0.5103, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 0.08134847929644558, | |
| "grad_norm": 0.42019112334691044, | |
| "learning_rate": 8.113553113553114e-06, | |
| "loss": 0.5378, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.0815316965921583, | |
| "grad_norm": 0.40127483204178854, | |
| "learning_rate": 8.131868131868132e-06, | |
| "loss": 0.4813, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.08171491388787101, | |
| "grad_norm": 0.4606579465800615, | |
| "learning_rate": 8.15018315018315e-06, | |
| "loss": 0.5067, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.08189813118358373, | |
| "grad_norm": 0.4715535246345655, | |
| "learning_rate": 8.16849816849817e-06, | |
| "loss": 0.5181, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 0.08208134847929645, | |
| "grad_norm": 0.5386430386856308, | |
| "learning_rate": 8.186813186813188e-06, | |
| "loss": 0.5188, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.08226456577500917, | |
| "grad_norm": 0.5389030998275688, | |
| "learning_rate": 8.205128205128205e-06, | |
| "loss": 0.516, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 0.08244778307072187, | |
| "grad_norm": 0.5181954163327931, | |
| "learning_rate": 8.223443223443223e-06, | |
| "loss": 0.5099, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.08263100036643459, | |
| "grad_norm": 0.5425277965290457, | |
| "learning_rate": 8.241758241758243e-06, | |
| "loss": 0.5508, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 0.0828142176621473, | |
| "grad_norm": 0.4366152129754919, | |
| "learning_rate": 8.26007326007326e-06, | |
| "loss": 0.4772, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.08299743495786002, | |
| "grad_norm": 0.45607475441878936, | |
| "learning_rate": 8.278388278388278e-06, | |
| "loss": 0.5319, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 0.08318065225357274, | |
| "grad_norm": 0.4712644997156548, | |
| "learning_rate": 8.296703296703298e-06, | |
| "loss": 0.5477, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.08336386954928546, | |
| "grad_norm": 0.6856852588986854, | |
| "learning_rate": 8.315018315018316e-06, | |
| "loss": 0.5235, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.08354708684499816, | |
| "grad_norm": 0.5019345602931549, | |
| "learning_rate": 8.333333333333334e-06, | |
| "loss": 0.5101, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.08373030414071088, | |
| "grad_norm": 0.4155972768165925, | |
| "learning_rate": 8.351648351648353e-06, | |
| "loss": 0.4986, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 0.0839135214364236, | |
| "grad_norm": 0.45725631484653695, | |
| "learning_rate": 8.369963369963371e-06, | |
| "loss": 0.5233, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.08409673873213631, | |
| "grad_norm": 0.5500464841633228, | |
| "learning_rate": 8.388278388278389e-06, | |
| "loss": 0.5225, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 0.08427995602784903, | |
| "grad_norm": 0.5210381864086956, | |
| "learning_rate": 8.406593406593407e-06, | |
| "loss": 0.5509, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.08446317332356175, | |
| "grad_norm": 0.4559821107303101, | |
| "learning_rate": 8.424908424908426e-06, | |
| "loss": 0.5118, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 0.08464639061927445, | |
| "grad_norm": 0.48323545775926524, | |
| "learning_rate": 8.443223443223444e-06, | |
| "loss": 0.5049, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.08482960791498717, | |
| "grad_norm": 0.4812859935236779, | |
| "learning_rate": 8.461538461538462e-06, | |
| "loss": 0.5204, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 0.08501282521069989, | |
| "grad_norm": 0.454474341410942, | |
| "learning_rate": 8.47985347985348e-06, | |
| "loss": 0.5387, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.0851960425064126, | |
| "grad_norm": 0.6615650181808158, | |
| "learning_rate": 8.498168498168498e-06, | |
| "loss": 0.5393, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.08537925980212532, | |
| "grad_norm": 0.4355325124699567, | |
| "learning_rate": 8.516483516483517e-06, | |
| "loss": 0.5027, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 0.08556247709783804, | |
| "grad_norm": 0.43977659231205646, | |
| "learning_rate": 8.534798534798535e-06, | |
| "loss": 0.4851, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 0.08574569439355074, | |
| "grad_norm": 0.5490378411527629, | |
| "learning_rate": 8.553113553113553e-06, | |
| "loss": 0.5087, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 0.08592891168926346, | |
| "grad_norm": 0.4334113007101785, | |
| "learning_rate": 8.571428571428571e-06, | |
| "loss": 0.4874, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 0.08611212898497618, | |
| "grad_norm": 0.48093462345535853, | |
| "learning_rate": 8.58974358974359e-06, | |
| "loss": 0.5103, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.0862953462806889, | |
| "grad_norm": 0.509104778628553, | |
| "learning_rate": 8.60805860805861e-06, | |
| "loss": 0.5125, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 0.08647856357640162, | |
| "grad_norm": 0.46329459466937095, | |
| "learning_rate": 8.626373626373628e-06, | |
| "loss": 0.4862, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 0.08666178087211433, | |
| "grad_norm": 0.41221910536829426, | |
| "learning_rate": 8.644688644688646e-06, | |
| "loss": 0.5058, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 0.08684499816782704, | |
| "grad_norm": 0.4965957595147397, | |
| "learning_rate": 8.663003663003664e-06, | |
| "loss": 0.5045, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 0.08702821546353975, | |
| "grad_norm": 0.6831024596734047, | |
| "learning_rate": 8.681318681318681e-06, | |
| "loss": 0.533, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.08721143275925247, | |
| "grad_norm": 0.44117689667679433, | |
| "learning_rate": 8.699633699633701e-06, | |
| "loss": 0.494, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 0.08739465005496519, | |
| "grad_norm": 0.49552902332130444, | |
| "learning_rate": 8.717948717948719e-06, | |
| "loss": 0.5295, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 0.08757786735067791, | |
| "grad_norm": 0.5202864223018397, | |
| "learning_rate": 8.736263736263737e-06, | |
| "loss": 0.5315, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 0.08776108464639062, | |
| "grad_norm": 0.4531456591891388, | |
| "learning_rate": 8.754578754578755e-06, | |
| "loss": 0.5177, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 0.08794430194210333, | |
| "grad_norm": 0.5060531781359298, | |
| "learning_rate": 8.772893772893774e-06, | |
| "loss": 0.5422, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.08812751923781605, | |
| "grad_norm": 0.44222377285427994, | |
| "learning_rate": 8.791208791208792e-06, | |
| "loss": 0.4679, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 0.08831073653352876, | |
| "grad_norm": 0.4308135949373974, | |
| "learning_rate": 8.80952380952381e-06, | |
| "loss": 0.5057, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 0.08849395382924148, | |
| "grad_norm": 0.5107147323097014, | |
| "learning_rate": 8.827838827838828e-06, | |
| "loss": 0.5147, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 0.0886771711249542, | |
| "grad_norm": 0.4722359038239213, | |
| "learning_rate": 8.846153846153847e-06, | |
| "loss": 0.5069, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 0.08886038842066692, | |
| "grad_norm": 0.4913307139634766, | |
| "learning_rate": 8.864468864468865e-06, | |
| "loss": 0.4943, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.08904360571637962, | |
| "grad_norm": 0.47381589638250404, | |
| "learning_rate": 8.882783882783883e-06, | |
| "loss": 0.5432, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 0.08922682301209234, | |
| "grad_norm": 0.4763727220823097, | |
| "learning_rate": 8.9010989010989e-06, | |
| "loss": 0.4765, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 0.08941004030780506, | |
| "grad_norm": 0.48581942940784734, | |
| "learning_rate": 8.91941391941392e-06, | |
| "loss": 0.4918, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 0.08959325760351777, | |
| "grad_norm": 0.4771435540271998, | |
| "learning_rate": 8.937728937728938e-06, | |
| "loss": 0.5146, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 0.08977647489923049, | |
| "grad_norm": 0.5554558590150881, | |
| "learning_rate": 8.956043956043958e-06, | |
| "loss": 0.5139, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.08995969219494321, | |
| "grad_norm": 0.4409852140988399, | |
| "learning_rate": 8.974358974358976e-06, | |
| "loss": 0.494, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 0.09014290949065591, | |
| "grad_norm": 0.470617109732078, | |
| "learning_rate": 8.992673992673993e-06, | |
| "loss": 0.5293, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 0.09032612678636863, | |
| "grad_norm": 0.48719044521659705, | |
| "learning_rate": 9.010989010989011e-06, | |
| "loss": 0.5111, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 0.09050934408208135, | |
| "grad_norm": 0.46876439756978366, | |
| "learning_rate": 9.02930402930403e-06, | |
| "loss": 0.4942, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 0.09069256137779406, | |
| "grad_norm": 0.46671821534033575, | |
| "learning_rate": 9.047619047619049e-06, | |
| "loss": 0.5617, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.09087577867350678, | |
| "grad_norm": 0.4868372988815388, | |
| "learning_rate": 9.065934065934067e-06, | |
| "loss": 0.5195, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 0.0910589959692195, | |
| "grad_norm": 0.47526685976460153, | |
| "learning_rate": 9.084249084249084e-06, | |
| "loss": 0.4561, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 0.0912422132649322, | |
| "grad_norm": 0.526795639153496, | |
| "learning_rate": 9.102564102564104e-06, | |
| "loss": 0.4905, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 0.09142543056064492, | |
| "grad_norm": 0.47935527575760656, | |
| "learning_rate": 9.120879120879122e-06, | |
| "loss": 0.5129, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 0.09160864785635764, | |
| "grad_norm": 0.47020582683735346, | |
| "learning_rate": 9.13919413919414e-06, | |
| "loss": 0.511, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.09179186515207036, | |
| "grad_norm": 0.46760420885699033, | |
| "learning_rate": 9.157509157509158e-06, | |
| "loss": 0.4789, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 0.09197508244778307, | |
| "grad_norm": 0.4695251283934706, | |
| "learning_rate": 9.175824175824175e-06, | |
| "loss": 0.5351, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 0.09215829974349579, | |
| "grad_norm": 0.5153360795669553, | |
| "learning_rate": 9.194139194139195e-06, | |
| "loss": 0.5522, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 0.0923415170392085, | |
| "grad_norm": 0.44806081150815513, | |
| "learning_rate": 9.212454212454213e-06, | |
| "loss": 0.5173, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 0.09252473433492121, | |
| "grad_norm": 0.451536504838503, | |
| "learning_rate": 9.230769230769232e-06, | |
| "loss": 0.5082, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.09270795163063393, | |
| "grad_norm": 0.48436384147084766, | |
| "learning_rate": 9.24908424908425e-06, | |
| "loss": 0.5388, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 0.09289116892634665, | |
| "grad_norm": 0.4609113225702784, | |
| "learning_rate": 9.267399267399268e-06, | |
| "loss": 0.4892, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 0.09307438622205937, | |
| "grad_norm": 0.4098711492197863, | |
| "learning_rate": 9.285714285714288e-06, | |
| "loss": 0.4856, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 0.09325760351777208, | |
| "grad_norm": 0.4715675907322317, | |
| "learning_rate": 9.304029304029305e-06, | |
| "loss": 0.53, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 0.09344082081348479, | |
| "grad_norm": 0.5026574189450673, | |
| "learning_rate": 9.322344322344323e-06, | |
| "loss": 0.5314, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.0936240381091975, | |
| "grad_norm": 0.4253199666429807, | |
| "learning_rate": 9.340659340659341e-06, | |
| "loss": 0.5068, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 0.09380725540491022, | |
| "grad_norm": 0.45894485399853974, | |
| "learning_rate": 9.358974358974359e-06, | |
| "loss": 0.5235, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 0.09399047270062294, | |
| "grad_norm": 0.7513225377427756, | |
| "learning_rate": 9.377289377289379e-06, | |
| "loss": 0.5071, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 0.09417368999633566, | |
| "grad_norm": 0.5231017027547264, | |
| "learning_rate": 9.395604395604396e-06, | |
| "loss": 0.5339, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 0.09435690729204838, | |
| "grad_norm": 0.5146997140217462, | |
| "learning_rate": 9.413919413919414e-06, | |
| "loss": 0.5052, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.09454012458776108, | |
| "grad_norm": 0.47760493192836756, | |
| "learning_rate": 9.432234432234432e-06, | |
| "loss": 0.5102, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 0.0947233418834738, | |
| "grad_norm": 0.44117590454697736, | |
| "learning_rate": 9.450549450549452e-06, | |
| "loss": 0.5237, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 0.09490655917918651, | |
| "grad_norm": 0.5004680185091399, | |
| "learning_rate": 9.46886446886447e-06, | |
| "loss": 0.5243, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 0.09508977647489923, | |
| "grad_norm": 0.4549800276149351, | |
| "learning_rate": 9.487179487179487e-06, | |
| "loss": 0.5139, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 0.09527299377061195, | |
| "grad_norm": 0.4263327588755031, | |
| "learning_rate": 9.505494505494505e-06, | |
| "loss": 0.4877, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.09545621106632467, | |
| "grad_norm": 0.4484968491218323, | |
| "learning_rate": 9.523809523809525e-06, | |
| "loss": 0.4986, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 0.09563942836203737, | |
| "grad_norm": 0.39390345192807547, | |
| "learning_rate": 9.542124542124543e-06, | |
| "loss": 0.5116, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 0.09582264565775009, | |
| "grad_norm": 0.4529147798983982, | |
| "learning_rate": 9.560439560439562e-06, | |
| "loss": 0.5312, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 0.0960058629534628, | |
| "grad_norm": 0.49701803503982106, | |
| "learning_rate": 9.57875457875458e-06, | |
| "loss": 0.529, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 0.09618908024917552, | |
| "grad_norm": 0.4097214793479841, | |
| "learning_rate": 9.597069597069598e-06, | |
| "loss": 0.5005, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.09637229754488824, | |
| "grad_norm": 0.5148151884655592, | |
| "learning_rate": 9.615384615384616e-06, | |
| "loss": 0.53, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 0.09655551484060096, | |
| "grad_norm": 0.4264857545219357, | |
| "learning_rate": 9.633699633699635e-06, | |
| "loss": 0.5136, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 0.09673873213631366, | |
| "grad_norm": 0.45426464252638443, | |
| "learning_rate": 9.652014652014653e-06, | |
| "loss": 0.5117, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 0.09692194943202638, | |
| "grad_norm": 0.47571176071645493, | |
| "learning_rate": 9.670329670329671e-06, | |
| "loss": 0.5185, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 0.0971051667277391, | |
| "grad_norm": 0.42418752426373113, | |
| "learning_rate": 9.688644688644689e-06, | |
| "loss": 0.5285, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.09728838402345182, | |
| "grad_norm": 0.48590348616099827, | |
| "learning_rate": 9.706959706959708e-06, | |
| "loss": 0.5339, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 0.09747160131916453, | |
| "grad_norm": 0.463774513888958, | |
| "learning_rate": 9.725274725274726e-06, | |
| "loss": 0.5386, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 0.09765481861487725, | |
| "grad_norm": 0.4891808396070614, | |
| "learning_rate": 9.743589743589744e-06, | |
| "loss": 0.5326, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 0.09783803591058995, | |
| "grad_norm": 0.40841004082693305, | |
| "learning_rate": 9.761904761904762e-06, | |
| "loss": 0.5085, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 0.09802125320630267, | |
| "grad_norm": 0.42494728872814735, | |
| "learning_rate": 9.780219780219781e-06, | |
| "loss": 0.4751, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 0.09820447050201539, | |
| "grad_norm": 0.4594283822364781, | |
| "learning_rate": 9.7985347985348e-06, | |
| "loss": 0.5275, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 0.09838768779772811, | |
| "grad_norm": 0.4064302259117676, | |
| "learning_rate": 9.816849816849817e-06, | |
| "loss": 0.5089, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 0.09857090509344082, | |
| "grad_norm": 0.4860027724973198, | |
| "learning_rate": 9.835164835164835e-06, | |
| "loss": 0.505, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 0.09875412238915354, | |
| "grad_norm": 0.47625362870716265, | |
| "learning_rate": 9.853479853479855e-06, | |
| "loss": 0.4954, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 0.09893733968486625, | |
| "grad_norm": 0.4592940212317861, | |
| "learning_rate": 9.871794871794872e-06, | |
| "loss": 0.4871, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.09912055698057896, | |
| "grad_norm": 0.42717127053329573, | |
| "learning_rate": 9.890109890109892e-06, | |
| "loss": 0.5261, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 0.09930377427629168, | |
| "grad_norm": 0.43970535809042904, | |
| "learning_rate": 9.90842490842491e-06, | |
| "loss": 0.4866, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 0.0994869915720044, | |
| "grad_norm": 0.42143360797263807, | |
| "learning_rate": 9.926739926739928e-06, | |
| "loss": 0.5113, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 0.09967020886771712, | |
| "grad_norm": 0.4416711016318173, | |
| "learning_rate": 9.945054945054946e-06, | |
| "loss": 0.5116, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 0.09985342616342983, | |
| "grad_norm": 0.45325267149265236, | |
| "learning_rate": 9.963369963369965e-06, | |
| "loss": 0.5283, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 0.10003664345914254, | |
| "grad_norm": 0.3948707773420248, | |
| "learning_rate": 9.981684981684983e-06, | |
| "loss": 0.5114, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 0.10021986075485526, | |
| "grad_norm": 0.42983426021717475, | |
| "learning_rate": 1e-05, | |
| "loss": 0.5169, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 0.10040307805056797, | |
| "grad_norm": 0.5119731461361727, | |
| "learning_rate": 9.999998977359419e-06, | |
| "loss": 0.518, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 0.10058629534628069, | |
| "grad_norm": 0.48479775231872485, | |
| "learning_rate": 9.999995909438092e-06, | |
| "loss": 0.5207, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 0.10076951264199341, | |
| "grad_norm": 0.45005420614602115, | |
| "learning_rate": 9.999990796237274e-06, | |
| "loss": 0.4878, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.10095272993770613, | |
| "grad_norm": 0.40595587220813967, | |
| "learning_rate": 9.999983637759059e-06, | |
| "loss": 0.4712, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 0.10113594723341883, | |
| "grad_norm": 0.43893870910937327, | |
| "learning_rate": 9.999974434006372e-06, | |
| "loss": 0.4964, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 0.10131916452913155, | |
| "grad_norm": 0.4638079942473788, | |
| "learning_rate": 9.99996318498298e-06, | |
| "loss": 0.4834, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 0.10150238182484426, | |
| "grad_norm": 0.45078724585599195, | |
| "learning_rate": 9.999949890693484e-06, | |
| "loss": 0.4828, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 0.10168559912055698, | |
| "grad_norm": 0.4968230440871285, | |
| "learning_rate": 9.999934551143319e-06, | |
| "loss": 0.5146, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 0.1018688164162697, | |
| "grad_norm": 0.4431824586567993, | |
| "learning_rate": 9.999917166338767e-06, | |
| "loss": 0.5117, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 0.10205203371198242, | |
| "grad_norm": 0.40809319706771474, | |
| "learning_rate": 9.999897736286932e-06, | |
| "loss": 0.4942, | |
| "step": 557 | |
| }, | |
| { | |
| "epoch": 0.10223525100769512, | |
| "grad_norm": 0.46904649406632837, | |
| "learning_rate": 9.999876260995767e-06, | |
| "loss": 0.5223, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 0.10241846830340784, | |
| "grad_norm": 0.45993616230767786, | |
| "learning_rate": 9.999852740474054e-06, | |
| "loss": 0.498, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 0.10260168559912056, | |
| "grad_norm": 0.41328723379038895, | |
| "learning_rate": 9.999827174731414e-06, | |
| "loss": 0.5294, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.10278490289483327, | |
| "grad_norm": 0.4584774156426854, | |
| "learning_rate": 9.999799563778307e-06, | |
| "loss": 0.5138, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 0.10296812019054599, | |
| "grad_norm": 0.4626480547701558, | |
| "learning_rate": 9.999769907626024e-06, | |
| "loss": 0.5122, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 0.10315133748625871, | |
| "grad_norm": 0.4647555669122866, | |
| "learning_rate": 9.9997382062867e-06, | |
| "loss": 0.5276, | |
| "step": 563 | |
| }, | |
| { | |
| "epoch": 0.10333455478197141, | |
| "grad_norm": 0.41824885855037686, | |
| "learning_rate": 9.9997044597733e-06, | |
| "loss": 0.4976, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 0.10351777207768413, | |
| "grad_norm": 0.41187305924650414, | |
| "learning_rate": 9.999668668099628e-06, | |
| "loss": 0.4953, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 0.10370098937339685, | |
| "grad_norm": 0.42660595756816655, | |
| "learning_rate": 9.999630831280329e-06, | |
| "loss": 0.514, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 0.10388420666910957, | |
| "grad_norm": 0.5099786508796531, | |
| "learning_rate": 9.999590949330876e-06, | |
| "loss": 0.5038, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 0.10406742396482228, | |
| "grad_norm": 0.4926825490754442, | |
| "learning_rate": 9.999549022267582e-06, | |
| "loss": 0.5535, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 0.104250641260535, | |
| "grad_norm": 0.4578490443823955, | |
| "learning_rate": 9.999505050107601e-06, | |
| "loss": 0.5075, | |
| "step": 569 | |
| }, | |
| { | |
| "epoch": 0.1044338585562477, | |
| "grad_norm": 0.4733323605035433, | |
| "learning_rate": 9.99945903286892e-06, | |
| "loss": 0.4747, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.10461707585196042, | |
| "grad_norm": 0.44893796479946446, | |
| "learning_rate": 9.999410970570358e-06, | |
| "loss": 0.4711, | |
| "step": 571 | |
| }, | |
| { | |
| "epoch": 0.10480029314767314, | |
| "grad_norm": 0.5254914900315981, | |
| "learning_rate": 9.99936086323158e-06, | |
| "loss": 0.5358, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 0.10498351044338586, | |
| "grad_norm": 0.537923540220477, | |
| "learning_rate": 9.999308710873083e-06, | |
| "loss": 0.5079, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 0.10516672773909858, | |
| "grad_norm": 0.45536567675754, | |
| "learning_rate": 9.999254513516196e-06, | |
| "loss": 0.4908, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 0.10534994503481128, | |
| "grad_norm": 0.38323869490085266, | |
| "learning_rate": 9.999198271183094e-06, | |
| "loss": 0.4983, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.105533162330524, | |
| "grad_norm": 0.43580085679521197, | |
| "learning_rate": 9.999139983896779e-06, | |
| "loss": 0.5031, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 0.10571637962623671, | |
| "grad_norm": 0.4193926406628939, | |
| "learning_rate": 9.999079651681096e-06, | |
| "loss": 0.5026, | |
| "step": 577 | |
| }, | |
| { | |
| "epoch": 0.10589959692194943, | |
| "grad_norm": 0.5149327291476714, | |
| "learning_rate": 9.999017274560722e-06, | |
| "loss": 0.5414, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 0.10608281421766215, | |
| "grad_norm": 0.44098660936462153, | |
| "learning_rate": 9.998952852561176e-06, | |
| "loss": 0.5032, | |
| "step": 579 | |
| }, | |
| { | |
| "epoch": 0.10626603151337487, | |
| "grad_norm": 0.4413320145497695, | |
| "learning_rate": 9.998886385708807e-06, | |
| "loss": 0.497, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.10644924880908757, | |
| "grad_norm": 0.45575842795199323, | |
| "learning_rate": 9.998817874030808e-06, | |
| "loss": 0.5207, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 0.10663246610480029, | |
| "grad_norm": 0.532654166955093, | |
| "learning_rate": 9.9987473175552e-06, | |
| "loss": 0.5212, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 0.106815683400513, | |
| "grad_norm": 0.4157619606008366, | |
| "learning_rate": 9.998674716310846e-06, | |
| "loss": 0.5085, | |
| "step": 583 | |
| }, | |
| { | |
| "epoch": 0.10699890069622572, | |
| "grad_norm": 0.4868388397165033, | |
| "learning_rate": 9.998600070327444e-06, | |
| "loss": 0.5306, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 0.10718211799193844, | |
| "grad_norm": 0.49401937562080056, | |
| "learning_rate": 9.998523379635527e-06, | |
| "loss": 0.5429, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 0.10736533528765116, | |
| "grad_norm": 0.3986626914030523, | |
| "learning_rate": 9.99844464426647e-06, | |
| "loss": 0.4695, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 0.10754855258336386, | |
| "grad_norm": 0.49524254606268836, | |
| "learning_rate": 9.998363864252474e-06, | |
| "loss": 0.5226, | |
| "step": 587 | |
| }, | |
| { | |
| "epoch": 0.10773176987907658, | |
| "grad_norm": 0.46186447850660334, | |
| "learning_rate": 9.998281039626588e-06, | |
| "loss": 0.5294, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 0.1079149871747893, | |
| "grad_norm": 0.4207082230286084, | |
| "learning_rate": 9.99819617042269e-06, | |
| "loss": 0.5096, | |
| "step": 589 | |
| }, | |
| { | |
| "epoch": 0.10809820447050202, | |
| "grad_norm": 0.3955565788721964, | |
| "learning_rate": 9.998109256675496e-06, | |
| "loss": 0.4931, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.10828142176621473, | |
| "grad_norm": 0.36723747707336185, | |
| "learning_rate": 9.998020298420559e-06, | |
| "loss": 0.4964, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 0.10846463906192745, | |
| "grad_norm": 0.4185544526751163, | |
| "learning_rate": 9.997929295694266e-06, | |
| "loss": 0.4801, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 0.10864785635764015, | |
| "grad_norm": 0.42948593775837285, | |
| "learning_rate": 9.997836248533845e-06, | |
| "loss": 0.5197, | |
| "step": 593 | |
| }, | |
| { | |
| "epoch": 0.10883107365335287, | |
| "grad_norm": 0.43966759837587943, | |
| "learning_rate": 9.997741156977356e-06, | |
| "loss": 0.5254, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 0.10901429094906559, | |
| "grad_norm": 0.4632445570564666, | |
| "learning_rate": 9.997644021063698e-06, | |
| "loss": 0.4802, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 0.10919750824477831, | |
| "grad_norm": 0.46480621716730125, | |
| "learning_rate": 9.997544840832604e-06, | |
| "loss": 0.5219, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 0.10938072554049103, | |
| "grad_norm": 0.44567173311984926, | |
| "learning_rate": 9.997443616324645e-06, | |
| "loss": 0.5105, | |
| "step": 597 | |
| }, | |
| { | |
| "epoch": 0.10956394283620374, | |
| "grad_norm": 0.4534263401515789, | |
| "learning_rate": 9.997340347581226e-06, | |
| "loss": 0.5212, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 0.10974716013191645, | |
| "grad_norm": 0.40359525147680225, | |
| "learning_rate": 9.99723503464459e-06, | |
| "loss": 0.5261, | |
| "step": 599 | |
| }, | |
| { | |
| "epoch": 0.10993037742762916, | |
| "grad_norm": 0.8305073137318505, | |
| "learning_rate": 9.99712767755782e-06, | |
| "loss": 0.5173, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.11011359472334188, | |
| "grad_norm": 0.5039765167575108, | |
| "learning_rate": 9.997018276364825e-06, | |
| "loss": 0.4983, | |
| "step": 601 | |
| }, | |
| { | |
| "epoch": 0.1102968120190546, | |
| "grad_norm": 0.4000444070826362, | |
| "learning_rate": 9.99690683111036e-06, | |
| "loss": 0.4876, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 0.11048002931476732, | |
| "grad_norm": 0.45535979366815893, | |
| "learning_rate": 9.99679334184001e-06, | |
| "loss": 0.5116, | |
| "step": 603 | |
| }, | |
| { | |
| "epoch": 0.11066324661048003, | |
| "grad_norm": 0.5208092576973751, | |
| "learning_rate": 9.996677808600202e-06, | |
| "loss": 0.5079, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 0.11084646390619274, | |
| "grad_norm": 0.47721138790862244, | |
| "learning_rate": 9.996560231438193e-06, | |
| "loss": 0.5031, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 0.11102968120190546, | |
| "grad_norm": 0.4106250945716929, | |
| "learning_rate": 9.996440610402078e-06, | |
| "loss": 0.4789, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 0.11121289849761817, | |
| "grad_norm": 0.4981443340009068, | |
| "learning_rate": 9.996318945540792e-06, | |
| "loss": 0.4749, | |
| "step": 607 | |
| }, | |
| { | |
| "epoch": 0.11139611579333089, | |
| "grad_norm": 0.4365748987869686, | |
| "learning_rate": 9.996195236904098e-06, | |
| "loss": 0.503, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 0.11157933308904361, | |
| "grad_norm": 0.42769357659836243, | |
| "learning_rate": 9.996069484542605e-06, | |
| "loss": 0.4883, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 0.11176255038475633, | |
| "grad_norm": 0.4468425715852285, | |
| "learning_rate": 9.995941688507749e-06, | |
| "loss": 0.528, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.11194576768046903, | |
| "grad_norm": 0.44305501127316244, | |
| "learning_rate": 9.995811848851807e-06, | |
| "loss": 0.5192, | |
| "step": 611 | |
| }, | |
| { | |
| "epoch": 0.11212898497618175, | |
| "grad_norm": 0.4430509247761743, | |
| "learning_rate": 9.995679965627891e-06, | |
| "loss": 0.4879, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 0.11231220227189447, | |
| "grad_norm": 0.4330736232909904, | |
| "learning_rate": 9.995546038889948e-06, | |
| "loss": 0.5315, | |
| "step": 613 | |
| }, | |
| { | |
| "epoch": 0.11249541956760718, | |
| "grad_norm": 0.38536887827486094, | |
| "learning_rate": 9.995410068692763e-06, | |
| "loss": 0.4789, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 0.1126786368633199, | |
| "grad_norm": 0.5420414741691238, | |
| "learning_rate": 9.995272055091954e-06, | |
| "loss": 0.4863, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 0.11286185415903262, | |
| "grad_norm": 0.4166977498062619, | |
| "learning_rate": 9.995131998143976e-06, | |
| "loss": 0.4863, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 0.11304507145474532, | |
| "grad_norm": 0.40681536551456327, | |
| "learning_rate": 9.99498989790612e-06, | |
| "loss": 0.5158, | |
| "step": 617 | |
| }, | |
| { | |
| "epoch": 0.11322828875045804, | |
| "grad_norm": 0.4164498167643854, | |
| "learning_rate": 9.994845754436516e-06, | |
| "loss": 0.5287, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 0.11341150604617076, | |
| "grad_norm": 0.430875770675555, | |
| "learning_rate": 9.994699567794123e-06, | |
| "loss": 0.4949, | |
| "step": 619 | |
| }, | |
| { | |
| "epoch": 0.11359472334188347, | |
| "grad_norm": 0.47209089186963143, | |
| "learning_rate": 9.994551338038742e-06, | |
| "loss": 0.5212, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.11377794063759619, | |
| "grad_norm": 0.4693460118731517, | |
| "learning_rate": 9.994401065231008e-06, | |
| "loss": 0.5218, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 0.11396115793330891, | |
| "grad_norm": 0.43645941737097255, | |
| "learning_rate": 9.994248749432388e-06, | |
| "loss": 0.5355, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 0.11414437522902161, | |
| "grad_norm": 0.45174829183811943, | |
| "learning_rate": 9.994094390705189e-06, | |
| "loss": 0.4941, | |
| "step": 623 | |
| }, | |
| { | |
| "epoch": 0.11432759252473433, | |
| "grad_norm": 0.4609885421062077, | |
| "learning_rate": 9.993937989112554e-06, | |
| "loss": 0.5424, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 0.11451080982044705, | |
| "grad_norm": 0.4711205796072519, | |
| "learning_rate": 9.993779544718459e-06, | |
| "loss": 0.5054, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.11469402711615977, | |
| "grad_norm": 0.4692458224302816, | |
| "learning_rate": 9.993619057587714e-06, | |
| "loss": 0.5013, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 0.11487724441187248, | |
| "grad_norm": 0.44543459536907126, | |
| "learning_rate": 9.99345652778597e-06, | |
| "loss": 0.4882, | |
| "step": 627 | |
| }, | |
| { | |
| "epoch": 0.1150604617075852, | |
| "grad_norm": 0.43461884283611496, | |
| "learning_rate": 9.993291955379713e-06, | |
| "loss": 0.5352, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 0.1152436790032979, | |
| "grad_norm": 0.41500076390959145, | |
| "learning_rate": 9.993125340436258e-06, | |
| "loss": 0.4895, | |
| "step": 629 | |
| }, | |
| { | |
| "epoch": 0.11542689629901062, | |
| "grad_norm": 0.4307377636082482, | |
| "learning_rate": 9.992956683023762e-06, | |
| "loss": 0.5149, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.11561011359472334, | |
| "grad_norm": 0.45085961488644083, | |
| "learning_rate": 9.992785983211214e-06, | |
| "loss": 0.5061, | |
| "step": 631 | |
| }, | |
| { | |
| "epoch": 0.11579333089043606, | |
| "grad_norm": 0.45702457348234676, | |
| "learning_rate": 9.992613241068444e-06, | |
| "loss": 0.4947, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 0.11597654818614878, | |
| "grad_norm": 0.46046219980195896, | |
| "learning_rate": 9.992438456666108e-06, | |
| "loss": 0.4727, | |
| "step": 633 | |
| }, | |
| { | |
| "epoch": 0.1161597654818615, | |
| "grad_norm": 0.5109084834664002, | |
| "learning_rate": 9.992261630075704e-06, | |
| "loss": 0.5015, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 0.1163429827775742, | |
| "grad_norm": 0.4605028589793491, | |
| "learning_rate": 9.992082761369567e-06, | |
| "loss": 0.518, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 0.11652620007328691, | |
| "grad_norm": 0.4135254020313275, | |
| "learning_rate": 9.991901850620861e-06, | |
| "loss": 0.4833, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 0.11670941736899963, | |
| "grad_norm": 0.5226822189268372, | |
| "learning_rate": 9.99171889790359e-06, | |
| "loss": 0.5118, | |
| "step": 637 | |
| }, | |
| { | |
| "epoch": 0.11689263466471235, | |
| "grad_norm": 0.4061164579366039, | |
| "learning_rate": 9.991533903292592e-06, | |
| "loss": 0.4953, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 0.11707585196042507, | |
| "grad_norm": 2.3765281557608486, | |
| "learning_rate": 9.99134686686354e-06, | |
| "loss": 0.4708, | |
| "step": 639 | |
| }, | |
| { | |
| "epoch": 0.11725906925613779, | |
| "grad_norm": 0.4675538148921928, | |
| "learning_rate": 9.991157788692942e-06, | |
| "loss": 0.4994, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.11744228655185049, | |
| "grad_norm": 0.4467597960418954, | |
| "learning_rate": 9.990966668858144e-06, | |
| "loss": 0.4982, | |
| "step": 641 | |
| }, | |
| { | |
| "epoch": 0.1176255038475632, | |
| "grad_norm": 0.46222076234046117, | |
| "learning_rate": 9.99077350743732e-06, | |
| "loss": 0.5208, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 0.11780872114327592, | |
| "grad_norm": 0.45856899421216063, | |
| "learning_rate": 9.990578304509488e-06, | |
| "loss": 0.5109, | |
| "step": 643 | |
| }, | |
| { | |
| "epoch": 0.11799193843898864, | |
| "grad_norm": 0.5006843127431095, | |
| "learning_rate": 9.990381060154496e-06, | |
| "loss": 0.4941, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 0.11817515573470136, | |
| "grad_norm": 0.4638055815431298, | |
| "learning_rate": 9.990181774453028e-06, | |
| "loss": 0.509, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 0.11835837303041408, | |
| "grad_norm": 0.4429725714841303, | |
| "learning_rate": 9.989980447486601e-06, | |
| "loss": 0.5196, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 0.11854159032612678, | |
| "grad_norm": 0.44051330732830757, | |
| "learning_rate": 9.989777079337572e-06, | |
| "loss": 0.5316, | |
| "step": 647 | |
| }, | |
| { | |
| "epoch": 0.1187248076218395, | |
| "grad_norm": 0.41555588890083767, | |
| "learning_rate": 9.989571670089129e-06, | |
| "loss": 0.5126, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 0.11890802491755222, | |
| "grad_norm": 0.46239837428682623, | |
| "learning_rate": 9.989364219825295e-06, | |
| "loss": 0.506, | |
| "step": 649 | |
| }, | |
| { | |
| "epoch": 0.11909124221326493, | |
| "grad_norm": 0.416093035128269, | |
| "learning_rate": 9.98915472863093e-06, | |
| "loss": 0.5194, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.11927445950897765, | |
| "grad_norm": 0.43847114193541153, | |
| "learning_rate": 9.988943196591727e-06, | |
| "loss": 0.4994, | |
| "step": 651 | |
| }, | |
| { | |
| "epoch": 0.11945767680469037, | |
| "grad_norm": 0.44451525143619486, | |
| "learning_rate": 9.988729623794215e-06, | |
| "loss": 0.5283, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 0.11964089410040307, | |
| "grad_norm": 0.4041297930697904, | |
| "learning_rate": 9.988514010325758e-06, | |
| "loss": 0.5049, | |
| "step": 653 | |
| }, | |
| { | |
| "epoch": 0.11982411139611579, | |
| "grad_norm": 0.4075716530480146, | |
| "learning_rate": 9.988296356274551e-06, | |
| "loss": 0.4921, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 0.12000732869182851, | |
| "grad_norm": 0.40736195443308915, | |
| "learning_rate": 9.988076661729631e-06, | |
| "loss": 0.4805, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 0.12019054598754123, | |
| "grad_norm": 0.4545077167148167, | |
| "learning_rate": 9.987854926780863e-06, | |
| "loss": 0.4789, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 0.12037376328325394, | |
| "grad_norm": 0.5314832542562551, | |
| "learning_rate": 9.987631151518948e-06, | |
| "loss": 0.5263, | |
| "step": 657 | |
| }, | |
| { | |
| "epoch": 0.12055698057896666, | |
| "grad_norm": 0.4967524623940482, | |
| "learning_rate": 9.987405336035425e-06, | |
| "loss": 0.5106, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 0.12074019787467936, | |
| "grad_norm": 0.4306664623952609, | |
| "learning_rate": 9.987177480422663e-06, | |
| "loss": 0.5376, | |
| "step": 659 | |
| }, | |
| { | |
| "epoch": 0.12092341517039208, | |
| "grad_norm": 0.4158801663482348, | |
| "learning_rate": 9.98694758477387e-06, | |
| "loss": 0.4838, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.1211066324661048, | |
| "grad_norm": 0.40605820771792417, | |
| "learning_rate": 9.986715649183084e-06, | |
| "loss": 0.505, | |
| "step": 661 | |
| }, | |
| { | |
| "epoch": 0.12128984976181752, | |
| "grad_norm": 0.4085721106839356, | |
| "learning_rate": 9.986481673745183e-06, | |
| "loss": 0.4993, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 0.12147306705753023, | |
| "grad_norm": 0.4785768918832099, | |
| "learning_rate": 9.986245658555873e-06, | |
| "loss": 0.5387, | |
| "step": 663 | |
| }, | |
| { | |
| "epoch": 0.12165628435324295, | |
| "grad_norm": 0.4175528791505364, | |
| "learning_rate": 9.986007603711698e-06, | |
| "loss": 0.4976, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 0.12183950164895566, | |
| "grad_norm": 0.4323331836919794, | |
| "learning_rate": 9.985767509310035e-06, | |
| "loss": 0.5346, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 0.12202271894466837, | |
| "grad_norm": 0.4868011056584651, | |
| "learning_rate": 9.9855253754491e-06, | |
| "loss": 0.5309, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 0.12220593624038109, | |
| "grad_norm": 0.46372133085485157, | |
| "learning_rate": 9.985281202227936e-06, | |
| "loss": 0.5053, | |
| "step": 667 | |
| }, | |
| { | |
| "epoch": 0.12238915353609381, | |
| "grad_norm": 0.48326269099644514, | |
| "learning_rate": 9.985034989746423e-06, | |
| "loss": 0.4941, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 0.12257237083180653, | |
| "grad_norm": 0.4557552947362274, | |
| "learning_rate": 9.984786738105279e-06, | |
| "loss": 0.5121, | |
| "step": 669 | |
| }, | |
| { | |
| "epoch": 0.12275558812751924, | |
| "grad_norm": 0.39704238565295197, | |
| "learning_rate": 9.98453644740605e-06, | |
| "loss": 0.4962, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.12293880542323195, | |
| "grad_norm": 0.40645243334504044, | |
| "learning_rate": 9.98428411775112e-06, | |
| "loss": 0.5046, | |
| "step": 671 | |
| }, | |
| { | |
| "epoch": 0.12312202271894467, | |
| "grad_norm": 0.42629021258457467, | |
| "learning_rate": 9.984029749243707e-06, | |
| "loss": 0.5084, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 0.12330524001465738, | |
| "grad_norm": 0.45965819318406503, | |
| "learning_rate": 9.98377334198786e-06, | |
| "loss": 0.4759, | |
| "step": 673 | |
| }, | |
| { | |
| "epoch": 0.1234884573103701, | |
| "grad_norm": 0.41967629717267835, | |
| "learning_rate": 9.983514896088466e-06, | |
| "loss": 0.498, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 0.12367167460608282, | |
| "grad_norm": 0.4295939596150219, | |
| "learning_rate": 9.983254411651242e-06, | |
| "loss": 0.5243, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.12385489190179554, | |
| "grad_norm": 0.4808089074794197, | |
| "learning_rate": 9.982991888782742e-06, | |
| "loss": 0.5, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 0.12403810919750824, | |
| "grad_norm": 0.48728725704379316, | |
| "learning_rate": 9.982727327590352e-06, | |
| "loss": 0.5459, | |
| "step": 677 | |
| }, | |
| { | |
| "epoch": 0.12422132649322096, | |
| "grad_norm": 0.42799437772375803, | |
| "learning_rate": 9.982460728182292e-06, | |
| "loss": 0.5488, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 0.12440454378893367, | |
| "grad_norm": 0.4334349975707624, | |
| "learning_rate": 9.982192090667618e-06, | |
| "loss": 0.501, | |
| "step": 679 | |
| }, | |
| { | |
| "epoch": 0.12458776108464639, | |
| "grad_norm": 0.45671378695636383, | |
| "learning_rate": 9.981921415156217e-06, | |
| "loss": 0.5236, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.12477097838035911, | |
| "grad_norm": 0.49531406109254905, | |
| "learning_rate": 9.98164870175881e-06, | |
| "loss": 0.5224, | |
| "step": 681 | |
| }, | |
| { | |
| "epoch": 0.12495419567607183, | |
| "grad_norm": 0.3902067301024602, | |
| "learning_rate": 9.981373950586952e-06, | |
| "loss": 0.4835, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 0.12513741297178455, | |
| "grad_norm": 0.46995820446689096, | |
| "learning_rate": 9.981097161753032e-06, | |
| "loss": 0.527, | |
| "step": 683 | |
| }, | |
| { | |
| "epoch": 0.12532063026749726, | |
| "grad_norm": 0.43642001976560174, | |
| "learning_rate": 9.980818335370273e-06, | |
| "loss": 0.514, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 0.12550384756320998, | |
| "grad_norm": 0.5074521053811396, | |
| "learning_rate": 9.980537471552728e-06, | |
| "loss": 0.4897, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 0.12568706485892267, | |
| "grad_norm": 0.43625566798430576, | |
| "learning_rate": 9.98025457041529e-06, | |
| "loss": 0.5174, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 0.1258702821546354, | |
| "grad_norm": 0.4129578447344159, | |
| "learning_rate": 9.979969632073678e-06, | |
| "loss": 0.5135, | |
| "step": 687 | |
| }, | |
| { | |
| "epoch": 0.1260534994503481, | |
| "grad_norm": 0.4553566655837448, | |
| "learning_rate": 9.97968265664445e-06, | |
| "loss": 0.5228, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 0.12623671674606082, | |
| "grad_norm": 0.4682536997157449, | |
| "learning_rate": 9.979393644244992e-06, | |
| "loss": 0.5039, | |
| "step": 689 | |
| }, | |
| { | |
| "epoch": 0.12641993404177354, | |
| "grad_norm": 0.42839595687214543, | |
| "learning_rate": 9.979102594993533e-06, | |
| "loss": 0.5128, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.12660315133748626, | |
| "grad_norm": 0.46092632925079513, | |
| "learning_rate": 9.978809509009121e-06, | |
| "loss": 0.5011, | |
| "step": 691 | |
| }, | |
| { | |
| "epoch": 0.12678636863319898, | |
| "grad_norm": 0.4486176186332823, | |
| "learning_rate": 9.97851438641165e-06, | |
| "loss": 0.5169, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 0.1269695859289117, | |
| "grad_norm": 0.47419826995019465, | |
| "learning_rate": 9.978217227321837e-06, | |
| "loss": 0.5131, | |
| "step": 693 | |
| }, | |
| { | |
| "epoch": 0.1271528032246244, | |
| "grad_norm": 0.4004091408746546, | |
| "learning_rate": 9.97791803186124e-06, | |
| "loss": 0.4743, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 0.12733602052033713, | |
| "grad_norm": 0.38849935615066844, | |
| "learning_rate": 9.977616800152248e-06, | |
| "loss": 0.4944, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 0.12751923781604985, | |
| "grad_norm": 0.44216099670793213, | |
| "learning_rate": 9.977313532318078e-06, | |
| "loss": 0.5072, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 0.12770245511176256, | |
| "grad_norm": 0.4462646919434802, | |
| "learning_rate": 9.977008228482785e-06, | |
| "loss": 0.4901, | |
| "step": 697 | |
| }, | |
| { | |
| "epoch": 0.12788567240747525, | |
| "grad_norm": 0.44769942596462214, | |
| "learning_rate": 9.976700888771259e-06, | |
| "loss": 0.5263, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 0.12806888970318797, | |
| "grad_norm": 0.5113829977752261, | |
| "learning_rate": 9.976391513309212e-06, | |
| "loss": 0.5202, | |
| "step": 699 | |
| }, | |
| { | |
| "epoch": 0.1282521069989007, | |
| "grad_norm": 0.4245344457429005, | |
| "learning_rate": 9.976080102223202e-06, | |
| "loss": 0.5182, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.1284353242946134, | |
| "grad_norm": 0.4305381223047699, | |
| "learning_rate": 9.97576665564061e-06, | |
| "loss": 0.507, | |
| "step": 701 | |
| }, | |
| { | |
| "epoch": 0.12861854159032612, | |
| "grad_norm": 0.47072785404283757, | |
| "learning_rate": 9.975451173689658e-06, | |
| "loss": 0.5166, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 0.12880175888603884, | |
| "grad_norm": 0.6675845713373886, | |
| "learning_rate": 9.975133656499392e-06, | |
| "loss": 0.47, | |
| "step": 703 | |
| }, | |
| { | |
| "epoch": 0.12898497618175156, | |
| "grad_norm": 0.4533261672443661, | |
| "learning_rate": 9.974814104199694e-06, | |
| "loss": 0.5362, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 0.12916819347746428, | |
| "grad_norm": 0.4503943455070683, | |
| "learning_rate": 9.97449251692128e-06, | |
| "loss": 0.5134, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 0.129351410773177, | |
| "grad_norm": 0.44545859946020006, | |
| "learning_rate": 9.974168894795698e-06, | |
| "loss": 0.4981, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 0.1295346280688897, | |
| "grad_norm": 0.430864283858029, | |
| "learning_rate": 9.973843237955328e-06, | |
| "loss": 0.5329, | |
| "step": 707 | |
| }, | |
| { | |
| "epoch": 0.12971784536460243, | |
| "grad_norm": 0.7865986917626206, | |
| "learning_rate": 9.973515546533379e-06, | |
| "loss": 0.504, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 0.12990106266031515, | |
| "grad_norm": 0.44976211133712124, | |
| "learning_rate": 9.973185820663897e-06, | |
| "loss": 0.4737, | |
| "step": 709 | |
| }, | |
| { | |
| "epoch": 0.13008427995602784, | |
| "grad_norm": 0.4549304394815992, | |
| "learning_rate": 9.97285406048176e-06, | |
| "loss": 0.5425, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.13026749725174055, | |
| "grad_norm": 0.5049990473394447, | |
| "learning_rate": 9.972520266122676e-06, | |
| "loss": 0.5117, | |
| "step": 711 | |
| }, | |
| { | |
| "epoch": 0.13045071454745327, | |
| "grad_norm": 0.42657669286902156, | |
| "learning_rate": 9.972184437723182e-06, | |
| "loss": 0.5057, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 0.130633931843166, | |
| "grad_norm": 0.5299863700854671, | |
| "learning_rate": 9.971846575420656e-06, | |
| "loss": 0.5262, | |
| "step": 713 | |
| }, | |
| { | |
| "epoch": 0.1308171491388787, | |
| "grad_norm": 0.45345740582998295, | |
| "learning_rate": 9.9715066793533e-06, | |
| "loss": 0.4997, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 0.13100036643459143, | |
| "grad_norm": 0.39393086616308026, | |
| "learning_rate": 9.971164749660149e-06, | |
| "loss": 0.4996, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 0.13118358373030414, | |
| "grad_norm": 0.4577611248522203, | |
| "learning_rate": 9.970820786481075e-06, | |
| "loss": 0.5018, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 0.13136680102601686, | |
| "grad_norm": 0.43279352533859367, | |
| "learning_rate": 9.970474789956775e-06, | |
| "loss": 0.4914, | |
| "step": 717 | |
| }, | |
| { | |
| "epoch": 0.13155001832172958, | |
| "grad_norm": 0.4366448132250816, | |
| "learning_rate": 9.970126760228785e-06, | |
| "loss": 0.4864, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 0.1317332356174423, | |
| "grad_norm": 0.43998158412052296, | |
| "learning_rate": 9.969776697439463e-06, | |
| "loss": 0.4834, | |
| "step": 719 | |
| }, | |
| { | |
| "epoch": 0.131916452913155, | |
| "grad_norm": 0.46940772392002544, | |
| "learning_rate": 9.969424601732011e-06, | |
| "loss": 0.501, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.1320996702088677, | |
| "grad_norm": 0.43117110167391715, | |
| "learning_rate": 9.96907047325045e-06, | |
| "loss": 0.5562, | |
| "step": 721 | |
| }, | |
| { | |
| "epoch": 0.13228288750458042, | |
| "grad_norm": 0.4119352967972661, | |
| "learning_rate": 9.968714312139642e-06, | |
| "loss": 0.4748, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 0.13246610480029314, | |
| "grad_norm": 0.5028443530663289, | |
| "learning_rate": 9.968356118545277e-06, | |
| "loss": 0.5344, | |
| "step": 723 | |
| }, | |
| { | |
| "epoch": 0.13264932209600586, | |
| "grad_norm": 0.43089419048593447, | |
| "learning_rate": 9.967995892613875e-06, | |
| "loss": 0.4704, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 0.13283253939171857, | |
| "grad_norm": 0.5044933769783081, | |
| "learning_rate": 9.967633634492788e-06, | |
| "loss": 0.4996, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 0.1330157566874313, | |
| "grad_norm": 0.442554097189932, | |
| "learning_rate": 9.967269344330201e-06, | |
| "loss": 0.5278, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 0.133198973983144, | |
| "grad_norm": 0.431094495901828, | |
| "learning_rate": 9.966903022275131e-06, | |
| "loss": 0.4943, | |
| "step": 727 | |
| }, | |
| { | |
| "epoch": 0.13338219127885673, | |
| "grad_norm": 0.6222105275898999, | |
| "learning_rate": 9.966534668477421e-06, | |
| "loss": 0.5215, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 0.13356540857456944, | |
| "grad_norm": 0.4562449049230116, | |
| "learning_rate": 9.96616428308775e-06, | |
| "loss": 0.5112, | |
| "step": 729 | |
| }, | |
| { | |
| "epoch": 0.13374862587028216, | |
| "grad_norm": 0.4249667668518143, | |
| "learning_rate": 9.965791866257626e-06, | |
| "loss": 0.5083, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.13393184316599488, | |
| "grad_norm": 0.42736565388331654, | |
| "learning_rate": 9.96541741813939e-06, | |
| "loss": 0.5078, | |
| "step": 731 | |
| }, | |
| { | |
| "epoch": 0.1341150604617076, | |
| "grad_norm": 0.42789645341508464, | |
| "learning_rate": 9.96504093888621e-06, | |
| "loss": 0.5148, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 0.1342982777574203, | |
| "grad_norm": 0.5018533252539279, | |
| "learning_rate": 9.964662428652088e-06, | |
| "loss": 0.4994, | |
| "step": 733 | |
| }, | |
| { | |
| "epoch": 0.134481495053133, | |
| "grad_norm": 0.4561559749816332, | |
| "learning_rate": 9.964281887591856e-06, | |
| "loss": 0.5036, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 0.13466471234884572, | |
| "grad_norm": 0.43874828747054045, | |
| "learning_rate": 9.963899315861176e-06, | |
| "loss": 0.4976, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 0.13484792964455844, | |
| "grad_norm": 0.44683273370213666, | |
| "learning_rate": 9.963514713616544e-06, | |
| "loss": 0.4837, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 0.13503114694027116, | |
| "grad_norm": 0.4575940082218496, | |
| "learning_rate": 9.963128081015282e-06, | |
| "loss": 0.531, | |
| "step": 737 | |
| }, | |
| { | |
| "epoch": 0.13521436423598387, | |
| "grad_norm": 0.4190671986873532, | |
| "learning_rate": 9.962739418215545e-06, | |
| "loss": 0.525, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 0.1353975815316966, | |
| "grad_norm": 0.41284775197321, | |
| "learning_rate": 9.962348725376318e-06, | |
| "loss": 0.519, | |
| "step": 739 | |
| }, | |
| { | |
| "epoch": 0.1355807988274093, | |
| "grad_norm": 0.41220938724190687, | |
| "learning_rate": 9.961956002657414e-06, | |
| "loss": 0.535, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.13576401612312203, | |
| "grad_norm": 0.4428272636340964, | |
| "learning_rate": 9.961561250219482e-06, | |
| "loss": 0.5287, | |
| "step": 741 | |
| }, | |
| { | |
| "epoch": 0.13594723341883475, | |
| "grad_norm": 0.42565025293852765, | |
| "learning_rate": 9.961164468223996e-06, | |
| "loss": 0.5329, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 0.13613045071454746, | |
| "grad_norm": 0.5963216630193675, | |
| "learning_rate": 9.960765656833263e-06, | |
| "loss": 0.5036, | |
| "step": 743 | |
| }, | |
| { | |
| "epoch": 0.13631366801026018, | |
| "grad_norm": 0.42494573499796934, | |
| "learning_rate": 9.96036481621042e-06, | |
| "loss": 0.4936, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 0.13649688530597287, | |
| "grad_norm": 0.39224716002216886, | |
| "learning_rate": 9.959961946519431e-06, | |
| "loss": 0.4704, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 0.1366801026016856, | |
| "grad_norm": 0.42574265198042266, | |
| "learning_rate": 9.959557047925095e-06, | |
| "loss": 0.4964, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 0.1368633198973983, | |
| "grad_norm": 0.457270415221301, | |
| "learning_rate": 9.959150120593035e-06, | |
| "loss": 0.5322, | |
| "step": 747 | |
| }, | |
| { | |
| "epoch": 0.13704653719311102, | |
| "grad_norm": 0.4245718410150897, | |
| "learning_rate": 9.95874116468971e-06, | |
| "loss": 0.4952, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 0.13722975448882374, | |
| "grad_norm": 0.47589556246220993, | |
| "learning_rate": 9.958330180382405e-06, | |
| "loss": 0.5124, | |
| "step": 749 | |
| }, | |
| { | |
| "epoch": 0.13741297178453646, | |
| "grad_norm": 0.45440270321018955, | |
| "learning_rate": 9.957917167839238e-06, | |
| "loss": 0.5109, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.13759618908024918, | |
| "grad_norm": 0.5232685566183007, | |
| "learning_rate": 9.95750212722915e-06, | |
| "loss": 0.5174, | |
| "step": 751 | |
| }, | |
| { | |
| "epoch": 0.1377794063759619, | |
| "grad_norm": 0.538012343254673, | |
| "learning_rate": 9.957085058721918e-06, | |
| "loss": 0.5105, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 0.1379626236716746, | |
| "grad_norm": 0.38959097348139504, | |
| "learning_rate": 9.956665962488148e-06, | |
| "loss": 0.4533, | |
| "step": 753 | |
| }, | |
| { | |
| "epoch": 0.13814584096738733, | |
| "grad_norm": 0.4404610898092818, | |
| "learning_rate": 9.956244838699271e-06, | |
| "loss": 0.4978, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 0.13832905826310005, | |
| "grad_norm": 0.47384922621236214, | |
| "learning_rate": 9.955821687527554e-06, | |
| "loss": 0.5309, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 0.13851227555881276, | |
| "grad_norm": 0.45266354792605684, | |
| "learning_rate": 9.955396509146084e-06, | |
| "loss": 0.5151, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 0.13869549285452545, | |
| "grad_norm": 0.4098036061670406, | |
| "learning_rate": 9.95496930372879e-06, | |
| "loss": 0.5, | |
| "step": 757 | |
| }, | |
| { | |
| "epoch": 0.13887871015023817, | |
| "grad_norm": 0.4520393902857722, | |
| "learning_rate": 9.954540071450418e-06, | |
| "loss": 0.4766, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 0.1390619274459509, | |
| "grad_norm": 0.41592431105518124, | |
| "learning_rate": 9.95410881248655e-06, | |
| "loss": 0.5279, | |
| "step": 759 | |
| }, | |
| { | |
| "epoch": 0.1392451447416636, | |
| "grad_norm": 0.9055926932881602, | |
| "learning_rate": 9.953675527013594e-06, | |
| "loss": 0.4544, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.13942836203737632, | |
| "grad_norm": 0.4230083797544526, | |
| "learning_rate": 9.953240215208787e-06, | |
| "loss": 0.5174, | |
| "step": 761 | |
| }, | |
| { | |
| "epoch": 0.13961157933308904, | |
| "grad_norm": 0.46167105853029766, | |
| "learning_rate": 9.9528028772502e-06, | |
| "loss": 0.5087, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 0.13979479662880176, | |
| "grad_norm": 0.4614988799168283, | |
| "learning_rate": 9.952363513316727e-06, | |
| "loss": 0.5035, | |
| "step": 763 | |
| }, | |
| { | |
| "epoch": 0.13997801392451448, | |
| "grad_norm": 0.4599378559524754, | |
| "learning_rate": 9.951922123588091e-06, | |
| "loss": 0.4982, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 0.1401612312202272, | |
| "grad_norm": 0.4169187563233499, | |
| "learning_rate": 9.951478708244847e-06, | |
| "loss": 0.5205, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 0.1403444485159399, | |
| "grad_norm": 1.4346541301623688, | |
| "learning_rate": 9.951033267468375e-06, | |
| "loss": 0.4704, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 0.14052766581165263, | |
| "grad_norm": 0.4477016515011828, | |
| "learning_rate": 9.950585801440889e-06, | |
| "loss": 0.489, | |
| "step": 767 | |
| }, | |
| { | |
| "epoch": 0.14071088310736535, | |
| "grad_norm": 0.4178701034776549, | |
| "learning_rate": 9.950136310345425e-06, | |
| "loss": 0.5147, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 0.14089410040307804, | |
| "grad_norm": 0.4893104037393506, | |
| "learning_rate": 9.949684794365848e-06, | |
| "loss": 0.4913, | |
| "step": 769 | |
| }, | |
| { | |
| "epoch": 0.14107731769879076, | |
| "grad_norm": 0.4018838415277579, | |
| "learning_rate": 9.949231253686857e-06, | |
| "loss": 0.464, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.14126053499450347, | |
| "grad_norm": 0.38608379924790914, | |
| "learning_rate": 9.948775688493974e-06, | |
| "loss": 0.5173, | |
| "step": 771 | |
| }, | |
| { | |
| "epoch": 0.1414437522902162, | |
| "grad_norm": 0.4162388206396457, | |
| "learning_rate": 9.948318098973552e-06, | |
| "loss": 0.4917, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 0.1416269695859289, | |
| "grad_norm": 0.3781950328417496, | |
| "learning_rate": 9.947858485312772e-06, | |
| "loss": 0.4446, | |
| "step": 773 | |
| }, | |
| { | |
| "epoch": 0.14181018688164163, | |
| "grad_norm": 0.45959051057998834, | |
| "learning_rate": 9.947396847699638e-06, | |
| "loss": 0.4918, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 0.14199340417735434, | |
| "grad_norm": 0.480317102541819, | |
| "learning_rate": 9.946933186322988e-06, | |
| "loss": 0.5369, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 0.14217662147306706, | |
| "grad_norm": 0.5008985493720574, | |
| "learning_rate": 9.946467501372485e-06, | |
| "loss": 0.4992, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 0.14235983876877978, | |
| "grad_norm": 0.4117591563183844, | |
| "learning_rate": 9.94599979303862e-06, | |
| "loss": 0.4818, | |
| "step": 777 | |
| }, | |
| { | |
| "epoch": 0.1425430560644925, | |
| "grad_norm": 0.531108956987235, | |
| "learning_rate": 9.945530061512714e-06, | |
| "loss": 0.517, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 0.1427262733602052, | |
| "grad_norm": 0.43817523582155865, | |
| "learning_rate": 9.945058306986911e-06, | |
| "loss": 0.5473, | |
| "step": 779 | |
| }, | |
| { | |
| "epoch": 0.14290949065591793, | |
| "grad_norm": 0.4747080227792923, | |
| "learning_rate": 9.944584529654187e-06, | |
| "loss": 0.4819, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.14309270795163062, | |
| "grad_norm": 0.4249215515551791, | |
| "learning_rate": 9.944108729708342e-06, | |
| "loss": 0.4979, | |
| "step": 781 | |
| }, | |
| { | |
| "epoch": 0.14327592524734334, | |
| "grad_norm": 0.43558257752578505, | |
| "learning_rate": 9.943630907344008e-06, | |
| "loss": 0.484, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 0.14345914254305606, | |
| "grad_norm": 0.4750219883418816, | |
| "learning_rate": 9.943151062756638e-06, | |
| "loss": 0.4825, | |
| "step": 783 | |
| }, | |
| { | |
| "epoch": 0.14364235983876877, | |
| "grad_norm": 0.4332476417990451, | |
| "learning_rate": 9.942669196142516e-06, | |
| "loss": 0.4929, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 0.1438255771344815, | |
| "grad_norm": 0.48307720479460303, | |
| "learning_rate": 9.942185307698754e-06, | |
| "loss": 0.5099, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 0.1440087944301942, | |
| "grad_norm": 0.43004630110605213, | |
| "learning_rate": 9.941699397623289e-06, | |
| "loss": 0.4435, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 0.14419201172590693, | |
| "grad_norm": 0.5526090978976514, | |
| "learning_rate": 9.941211466114883e-06, | |
| "loss": 0.4831, | |
| "step": 787 | |
| }, | |
| { | |
| "epoch": 0.14437522902161964, | |
| "grad_norm": 0.4299548315161719, | |
| "learning_rate": 9.94072151337313e-06, | |
| "loss": 0.5055, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 0.14455844631733236, | |
| "grad_norm": 0.46488795405005734, | |
| "learning_rate": 9.940229539598449e-06, | |
| "loss": 0.5247, | |
| "step": 789 | |
| }, | |
| { | |
| "epoch": 0.14474166361304508, | |
| "grad_norm": 0.45711196453956754, | |
| "learning_rate": 9.939735544992084e-06, | |
| "loss": 0.5155, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.1449248809087578, | |
| "grad_norm": 0.39377908902960373, | |
| "learning_rate": 9.939239529756106e-06, | |
| "loss": 0.4861, | |
| "step": 791 | |
| }, | |
| { | |
| "epoch": 0.14510809820447051, | |
| "grad_norm": 0.3780973854580167, | |
| "learning_rate": 9.938741494093413e-06, | |
| "loss": 0.4743, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 0.1452913155001832, | |
| "grad_norm": 0.4442667746426281, | |
| "learning_rate": 9.93824143820773e-06, | |
| "loss": 0.5124, | |
| "step": 793 | |
| }, | |
| { | |
| "epoch": 0.14547453279589592, | |
| "grad_norm": 0.47139079982459514, | |
| "learning_rate": 9.93773936230361e-06, | |
| "loss": 0.5095, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 0.14565775009160864, | |
| "grad_norm": 0.47478113201389704, | |
| "learning_rate": 9.937235266586425e-06, | |
| "loss": 0.5242, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 0.14584096738732136, | |
| "grad_norm": 0.4253566981856297, | |
| "learning_rate": 9.936729151262383e-06, | |
| "loss": 0.4816, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 0.14602418468303408, | |
| "grad_norm": 0.3995444614750483, | |
| "learning_rate": 9.936221016538514e-06, | |
| "loss": 0.5114, | |
| "step": 797 | |
| }, | |
| { | |
| "epoch": 0.1462074019787468, | |
| "grad_norm": 0.3958644449360531, | |
| "learning_rate": 9.935710862622671e-06, | |
| "loss": 0.5044, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 0.1463906192744595, | |
| "grad_norm": 0.39672728856696343, | |
| "learning_rate": 9.935198689723537e-06, | |
| "loss": 0.5079, | |
| "step": 799 | |
| }, | |
| { | |
| "epoch": 0.14657383657017223, | |
| "grad_norm": 0.4401216989939036, | |
| "learning_rate": 9.934684498050619e-06, | |
| "loss": 0.5169, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.14675705386588495, | |
| "grad_norm": 0.42531985045283716, | |
| "learning_rate": 9.93416828781425e-06, | |
| "loss": 0.5039, | |
| "step": 801 | |
| }, | |
| { | |
| "epoch": 0.14694027116159766, | |
| "grad_norm": 0.43641771981673, | |
| "learning_rate": 9.93365005922559e-06, | |
| "loss": 0.4705, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 0.14712348845731038, | |
| "grad_norm": 0.4154214727248315, | |
| "learning_rate": 9.933129812496623e-06, | |
| "loss": 0.4801, | |
| "step": 803 | |
| }, | |
| { | |
| "epoch": 0.1473067057530231, | |
| "grad_norm": 0.4668765888893056, | |
| "learning_rate": 9.932607547840161e-06, | |
| "loss": 0.5262, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 0.1474899230487358, | |
| "grad_norm": 0.45163409733644266, | |
| "learning_rate": 9.932083265469836e-06, | |
| "loss": 0.5009, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 0.1476731403444485, | |
| "grad_norm": 0.44602336917824525, | |
| "learning_rate": 9.93155696560011e-06, | |
| "loss": 0.5071, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 0.14785635764016122, | |
| "grad_norm": 0.40571563214546336, | |
| "learning_rate": 9.931028648446273e-06, | |
| "loss": 0.4867, | |
| "step": 807 | |
| }, | |
| { | |
| "epoch": 0.14803957493587394, | |
| "grad_norm": 0.412866633306789, | |
| "learning_rate": 9.930498314224433e-06, | |
| "loss": 0.5204, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 0.14822279223158666, | |
| "grad_norm": 0.4491417286505596, | |
| "learning_rate": 9.929965963151526e-06, | |
| "loss": 0.4959, | |
| "step": 809 | |
| }, | |
| { | |
| "epoch": 0.14840600952729938, | |
| "grad_norm": 0.483698524685917, | |
| "learning_rate": 9.929431595445315e-06, | |
| "loss": 0.4945, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.1485892268230121, | |
| "grad_norm": 0.45809625895317874, | |
| "learning_rate": 9.928895211324387e-06, | |
| "loss": 0.4795, | |
| "step": 811 | |
| }, | |
| { | |
| "epoch": 0.1487724441187248, | |
| "grad_norm": 0.4671855759216309, | |
| "learning_rate": 9.928356811008153e-06, | |
| "loss": 0.502, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 0.14895566141443753, | |
| "grad_norm": 0.49991252806247843, | |
| "learning_rate": 9.927816394716847e-06, | |
| "loss": 0.4989, | |
| "step": 813 | |
| }, | |
| { | |
| "epoch": 0.14913887871015025, | |
| "grad_norm": 0.4930888660574426, | |
| "learning_rate": 9.92727396267153e-06, | |
| "loss": 0.5256, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 0.14932209600586296, | |
| "grad_norm": 0.4301245288190715, | |
| "learning_rate": 9.926729515094092e-06, | |
| "loss": 0.5062, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 0.14950531330157568, | |
| "grad_norm": 0.4254345495226345, | |
| "learning_rate": 9.926183052207235e-06, | |
| "loss": 0.4885, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 0.14968853059728837, | |
| "grad_norm": 0.40854658960332646, | |
| "learning_rate": 9.925634574234499e-06, | |
| "loss": 0.4909, | |
| "step": 817 | |
| }, | |
| { | |
| "epoch": 0.1498717478930011, | |
| "grad_norm": 0.4307966660913799, | |
| "learning_rate": 9.925084081400241e-06, | |
| "loss": 0.4974, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 0.1500549651887138, | |
| "grad_norm": 0.44710983697734263, | |
| "learning_rate": 9.924531573929641e-06, | |
| "loss": 0.5107, | |
| "step": 819 | |
| }, | |
| { | |
| "epoch": 0.15023818248442652, | |
| "grad_norm": 0.47247983608563787, | |
| "learning_rate": 9.923977052048708e-06, | |
| "loss": 0.5185, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.15042139978013924, | |
| "grad_norm": 0.43408666873611645, | |
| "learning_rate": 9.923420515984272e-06, | |
| "loss": 0.5256, | |
| "step": 821 | |
| }, | |
| { | |
| "epoch": 0.15060461707585196, | |
| "grad_norm": 0.3618361050633503, | |
| "learning_rate": 9.922861965963987e-06, | |
| "loss": 0.4835, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 0.15078783437156468, | |
| "grad_norm": 0.478572088704784, | |
| "learning_rate": 9.922301402216334e-06, | |
| "loss": 0.4971, | |
| "step": 823 | |
| }, | |
| { | |
| "epoch": 0.1509710516672774, | |
| "grad_norm": 0.4956723559981853, | |
| "learning_rate": 9.921738824970611e-06, | |
| "loss": 0.4664, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 0.1511542689629901, | |
| "grad_norm": 0.43104833444065105, | |
| "learning_rate": 9.921174234456947e-06, | |
| "loss": 0.5056, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 0.15133748625870283, | |
| "grad_norm": 0.38636058698792763, | |
| "learning_rate": 9.920607630906289e-06, | |
| "loss": 0.509, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 0.15152070355441555, | |
| "grad_norm": 0.4638108326048624, | |
| "learning_rate": 9.920039014550413e-06, | |
| "loss": 0.4928, | |
| "step": 827 | |
| }, | |
| { | |
| "epoch": 0.15170392085012827, | |
| "grad_norm": 0.38962500919500936, | |
| "learning_rate": 9.919468385621912e-06, | |
| "loss": 0.5025, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 0.15188713814584096, | |
| "grad_norm": 0.423466480397551, | |
| "learning_rate": 9.918895744354204e-06, | |
| "loss": 0.4903, | |
| "step": 829 | |
| }, | |
| { | |
| "epoch": 0.15207035544155367, | |
| "grad_norm": 0.5945183810824295, | |
| "learning_rate": 9.918321090981537e-06, | |
| "loss": 0.4961, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.1522535727372664, | |
| "grad_norm": 0.3857346095853992, | |
| "learning_rate": 9.917744425738971e-06, | |
| "loss": 0.4903, | |
| "step": 831 | |
| }, | |
| { | |
| "epoch": 0.1524367900329791, | |
| "grad_norm": 0.4368019305898112, | |
| "learning_rate": 9.917165748862398e-06, | |
| "loss": 0.4985, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 0.15262000732869183, | |
| "grad_norm": 0.4622792781214721, | |
| "learning_rate": 9.916585060588526e-06, | |
| "loss": 0.5298, | |
| "step": 833 | |
| }, | |
| { | |
| "epoch": 0.15280322462440454, | |
| "grad_norm": 0.43464073300377387, | |
| "learning_rate": 9.916002361154894e-06, | |
| "loss": 0.4956, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 0.15298644192011726, | |
| "grad_norm": 0.37914154472744427, | |
| "learning_rate": 9.915417650799855e-06, | |
| "loss": 0.4914, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 0.15316965921582998, | |
| "grad_norm": 0.41110768627195465, | |
| "learning_rate": 9.914830929762588e-06, | |
| "loss": 0.4933, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 0.1533528765115427, | |
| "grad_norm": 0.3922989216974703, | |
| "learning_rate": 9.914242198283099e-06, | |
| "loss": 0.5, | |
| "step": 837 | |
| }, | |
| { | |
| "epoch": 0.1535360938072554, | |
| "grad_norm": 0.4310098182316404, | |
| "learning_rate": 9.91365145660221e-06, | |
| "loss": 0.4817, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 0.15371931110296813, | |
| "grad_norm": 0.40690680117071937, | |
| "learning_rate": 9.913058704961565e-06, | |
| "loss": 0.4778, | |
| "step": 839 | |
| }, | |
| { | |
| "epoch": 0.15390252839868085, | |
| "grad_norm": 0.4584415585967629, | |
| "learning_rate": 9.912463943603635e-06, | |
| "loss": 0.4585, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.15408574569439354, | |
| "grad_norm": 0.48734349658323495, | |
| "learning_rate": 9.911867172771711e-06, | |
| "loss": 0.5345, | |
| "step": 841 | |
| }, | |
| { | |
| "epoch": 0.15426896299010626, | |
| "grad_norm": 0.442686833828428, | |
| "learning_rate": 9.911268392709908e-06, | |
| "loss": 0.5134, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 0.15445218028581897, | |
| "grad_norm": 0.44506293028939, | |
| "learning_rate": 9.910667603663156e-06, | |
| "loss": 0.5158, | |
| "step": 843 | |
| }, | |
| { | |
| "epoch": 0.1546353975815317, | |
| "grad_norm": 0.4311992637968541, | |
| "learning_rate": 9.910064805877214e-06, | |
| "loss": 0.4837, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 0.1548186148772444, | |
| "grad_norm": 0.4493099076015729, | |
| "learning_rate": 9.90945999959866e-06, | |
| "loss": 0.5108, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 0.15500183217295713, | |
| "grad_norm": 0.45478365363321205, | |
| "learning_rate": 9.908853185074896e-06, | |
| "loss": 0.4998, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 0.15518504946866984, | |
| "grad_norm": 0.46301414888311854, | |
| "learning_rate": 9.90824436255414e-06, | |
| "loss": 0.5225, | |
| "step": 847 | |
| }, | |
| { | |
| "epoch": 0.15536826676438256, | |
| "grad_norm": 0.40118617155548936, | |
| "learning_rate": 9.907633532285435e-06, | |
| "loss": 0.4927, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 0.15555148406009528, | |
| "grad_norm": 0.49481295874709574, | |
| "learning_rate": 9.907020694518646e-06, | |
| "loss": 0.4933, | |
| "step": 849 | |
| }, | |
| { | |
| "epoch": 0.155734701355808, | |
| "grad_norm": 0.4102626498510104, | |
| "learning_rate": 9.90640584950446e-06, | |
| "loss": 0.5111, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.15591791865152071, | |
| "grad_norm": 0.46514690031713113, | |
| "learning_rate": 9.905788997494377e-06, | |
| "loss": 0.4728, | |
| "step": 851 | |
| }, | |
| { | |
| "epoch": 0.15610113594723343, | |
| "grad_norm": 0.4054441718781369, | |
| "learning_rate": 9.905170138740732e-06, | |
| "loss": 0.4828, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 0.15628435324294612, | |
| "grad_norm": 0.4400252577977126, | |
| "learning_rate": 9.904549273496666e-06, | |
| "loss": 0.501, | |
| "step": 853 | |
| }, | |
| { | |
| "epoch": 0.15646757053865884, | |
| "grad_norm": 0.42167618939817325, | |
| "learning_rate": 9.903926402016153e-06, | |
| "loss": 0.4922, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 0.15665078783437156, | |
| "grad_norm": 0.4093707700968775, | |
| "learning_rate": 9.90330152455398e-06, | |
| "loss": 0.5014, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 0.15683400513008428, | |
| "grad_norm": 0.4523522809487888, | |
| "learning_rate": 9.902674641365756e-06, | |
| "loss": 0.4829, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 0.157017222425797, | |
| "grad_norm": 0.49143897815029397, | |
| "learning_rate": 9.902045752707916e-06, | |
| "loss": 0.5074, | |
| "step": 857 | |
| }, | |
| { | |
| "epoch": 0.1572004397215097, | |
| "grad_norm": 0.43621740114283203, | |
| "learning_rate": 9.901414858837706e-06, | |
| "loss": 0.5182, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 0.15738365701722243, | |
| "grad_norm": 0.40732498942379025, | |
| "learning_rate": 9.9007819600132e-06, | |
| "loss": 0.4674, | |
| "step": 859 | |
| }, | |
| { | |
| "epoch": 0.15756687431293515, | |
| "grad_norm": 0.43675476681733244, | |
| "learning_rate": 9.900147056493285e-06, | |
| "loss": 0.5374, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.15775009160864786, | |
| "grad_norm": 0.43060074322101793, | |
| "learning_rate": 9.89951014853768e-06, | |
| "loss": 0.4635, | |
| "step": 861 | |
| }, | |
| { | |
| "epoch": 0.15793330890436058, | |
| "grad_norm": 0.41179202097945616, | |
| "learning_rate": 9.898871236406907e-06, | |
| "loss": 0.4951, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 0.1581165262000733, | |
| "grad_norm": 0.371370018002044, | |
| "learning_rate": 9.898230320362323e-06, | |
| "loss": 0.479, | |
| "step": 863 | |
| }, | |
| { | |
| "epoch": 0.158299743495786, | |
| "grad_norm": 0.4300134296591519, | |
| "learning_rate": 9.897587400666097e-06, | |
| "loss": 0.4868, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 0.1584829607914987, | |
| "grad_norm": 0.43589209027697184, | |
| "learning_rate": 9.896942477581221e-06, | |
| "loss": 0.5124, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 0.15866617808721142, | |
| "grad_norm": 0.575247632832188, | |
| "learning_rate": 9.896295551371503e-06, | |
| "loss": 0.5053, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 0.15884939538292414, | |
| "grad_norm": 0.4273872012355264, | |
| "learning_rate": 9.89564662230157e-06, | |
| "loss": 0.5138, | |
| "step": 867 | |
| }, | |
| { | |
| "epoch": 0.15903261267863686, | |
| "grad_norm": 0.4543163505662005, | |
| "learning_rate": 9.894995690636874e-06, | |
| "loss": 0.5229, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 0.15921582997434958, | |
| "grad_norm": 0.4491410597219586, | |
| "learning_rate": 9.894342756643684e-06, | |
| "loss": 0.5045, | |
| "step": 869 | |
| }, | |
| { | |
| "epoch": 0.1593990472700623, | |
| "grad_norm": 0.47087708750396473, | |
| "learning_rate": 9.893687820589081e-06, | |
| "loss": 0.5087, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.159582264565775, | |
| "grad_norm": 0.4240236693967998, | |
| "learning_rate": 9.893030882740977e-06, | |
| "loss": 0.5052, | |
| "step": 871 | |
| }, | |
| { | |
| "epoch": 0.15976548186148773, | |
| "grad_norm": 0.4287525963131784, | |
| "learning_rate": 9.892371943368092e-06, | |
| "loss": 0.527, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 0.15994869915720045, | |
| "grad_norm": 0.4131563755872441, | |
| "learning_rate": 9.891711002739971e-06, | |
| "loss": 0.5074, | |
| "step": 873 | |
| }, | |
| { | |
| "epoch": 0.16013191645291316, | |
| "grad_norm": 0.39231143806918656, | |
| "learning_rate": 9.891048061126975e-06, | |
| "loss": 0.5018, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 0.16031513374862588, | |
| "grad_norm": 0.4391493046066672, | |
| "learning_rate": 9.890383118800287e-06, | |
| "loss": 0.518, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 0.16049835104433857, | |
| "grad_norm": 0.468016398258625, | |
| "learning_rate": 9.889716176031903e-06, | |
| "loss": 0.5191, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 0.1606815683400513, | |
| "grad_norm": 0.4619425954928644, | |
| "learning_rate": 9.88904723309464e-06, | |
| "loss": 0.5114, | |
| "step": 877 | |
| }, | |
| { | |
| "epoch": 0.160864785635764, | |
| "grad_norm": 0.4581492770476195, | |
| "learning_rate": 9.888376290262134e-06, | |
| "loss": 0.5349, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 0.16104800293147672, | |
| "grad_norm": 0.390022001202663, | |
| "learning_rate": 9.887703347808838e-06, | |
| "loss": 0.507, | |
| "step": 879 | |
| }, | |
| { | |
| "epoch": 0.16123122022718944, | |
| "grad_norm": 0.38271876665722593, | |
| "learning_rate": 9.887028406010026e-06, | |
| "loss": 0.4987, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.16141443752290216, | |
| "grad_norm": 0.39991523006604696, | |
| "learning_rate": 9.886351465141785e-06, | |
| "loss": 0.4807, | |
| "step": 881 | |
| }, | |
| { | |
| "epoch": 0.16159765481861488, | |
| "grad_norm": 0.3867195675970219, | |
| "learning_rate": 9.88567252548102e-06, | |
| "loss": 0.515, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 0.1617808721143276, | |
| "grad_norm": 0.47566001886735476, | |
| "learning_rate": 9.884991587305459e-06, | |
| "loss": 0.4461, | |
| "step": 883 | |
| }, | |
| { | |
| "epoch": 0.1619640894100403, | |
| "grad_norm": 0.4453653597243864, | |
| "learning_rate": 9.884308650893642e-06, | |
| "loss": 0.5493, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 0.16214730670575303, | |
| "grad_norm": 0.38787040868110323, | |
| "learning_rate": 9.883623716524929e-06, | |
| "loss": 0.4998, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 0.16233052400146575, | |
| "grad_norm": 0.4570258592705414, | |
| "learning_rate": 9.882936784479498e-06, | |
| "loss": 0.5226, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 0.16251374129717847, | |
| "grad_norm": 0.4438200453897428, | |
| "learning_rate": 9.882247855038339e-06, | |
| "loss": 0.5073, | |
| "step": 887 | |
| }, | |
| { | |
| "epoch": 0.16269695859289116, | |
| "grad_norm": 0.3887912773501902, | |
| "learning_rate": 9.881556928483266e-06, | |
| "loss": 0.5213, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 0.16288017588860387, | |
| "grad_norm": 0.44048355093202696, | |
| "learning_rate": 9.880864005096906e-06, | |
| "loss": 0.511, | |
| "step": 889 | |
| }, | |
| { | |
| "epoch": 0.1630633931843166, | |
| "grad_norm": 0.44235079469227967, | |
| "learning_rate": 9.880169085162703e-06, | |
| "loss": 0.5223, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.1632466104800293, | |
| "grad_norm": 0.3877832289424, | |
| "learning_rate": 9.87947216896492e-06, | |
| "loss": 0.4537, | |
| "step": 891 | |
| }, | |
| { | |
| "epoch": 0.16342982777574203, | |
| "grad_norm": 0.44869938145307137, | |
| "learning_rate": 9.878773256788635e-06, | |
| "loss": 0.5013, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 0.16361304507145474, | |
| "grad_norm": 0.46375171983760244, | |
| "learning_rate": 9.878072348919738e-06, | |
| "loss": 0.489, | |
| "step": 893 | |
| }, | |
| { | |
| "epoch": 0.16379626236716746, | |
| "grad_norm": 0.49608852880970994, | |
| "learning_rate": 9.877369445644945e-06, | |
| "loss": 0.5177, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 0.16397947966288018, | |
| "grad_norm": 0.4082183549684993, | |
| "learning_rate": 9.876664547251781e-06, | |
| "loss": 0.5091, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 0.1641626969585929, | |
| "grad_norm": 0.4347968628628821, | |
| "learning_rate": 9.875957654028588e-06, | |
| "loss": 0.5242, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 0.16434591425430561, | |
| "grad_norm": 0.45545255399303036, | |
| "learning_rate": 9.875248766264527e-06, | |
| "loss": 0.5406, | |
| "step": 897 | |
| }, | |
| { | |
| "epoch": 0.16452913155001833, | |
| "grad_norm": 0.41358644063630506, | |
| "learning_rate": 9.874537884249574e-06, | |
| "loss": 0.511, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 0.16471234884573105, | |
| "grad_norm": 0.4576667619206321, | |
| "learning_rate": 9.873825008274514e-06, | |
| "loss": 0.5073, | |
| "step": 899 | |
| }, | |
| { | |
| "epoch": 0.16489556614144374, | |
| "grad_norm": 0.47721781865779744, | |
| "learning_rate": 9.87311013863096e-06, | |
| "loss": 0.5044, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.16507878343715646, | |
| "grad_norm": 0.4622288619794578, | |
| "learning_rate": 9.872393275611329e-06, | |
| "loss": 0.5096, | |
| "step": 901 | |
| }, | |
| { | |
| "epoch": 0.16526200073286917, | |
| "grad_norm": 0.41177688817899083, | |
| "learning_rate": 9.871674419508864e-06, | |
| "loss": 0.523, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 0.1654452180285819, | |
| "grad_norm": 0.371054810469729, | |
| "learning_rate": 9.87095357061761e-06, | |
| "loss": 0.4897, | |
| "step": 903 | |
| }, | |
| { | |
| "epoch": 0.1656284353242946, | |
| "grad_norm": 0.41574608839188226, | |
| "learning_rate": 9.87023072923244e-06, | |
| "loss": 0.4823, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 0.16581165262000733, | |
| "grad_norm": 0.4218915251343208, | |
| "learning_rate": 9.869505895649036e-06, | |
| "loss": 0.5079, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 0.16599486991572004, | |
| "grad_norm": 0.5549831381708766, | |
| "learning_rate": 9.868779070163895e-06, | |
| "loss": 0.484, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 0.16617808721143276, | |
| "grad_norm": 0.3891473564827362, | |
| "learning_rate": 9.868050253074328e-06, | |
| "loss": 0.4903, | |
| "step": 907 | |
| }, | |
| { | |
| "epoch": 0.16636130450714548, | |
| "grad_norm": 0.4268945705632106, | |
| "learning_rate": 9.867319444678465e-06, | |
| "loss": 0.5024, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 0.1665445218028582, | |
| "grad_norm": 0.40332206359603545, | |
| "learning_rate": 9.866586645275247e-06, | |
| "loss": 0.4477, | |
| "step": 909 | |
| }, | |
| { | |
| "epoch": 0.16672773909857092, | |
| "grad_norm": 0.3720747589583317, | |
| "learning_rate": 9.86585185516443e-06, | |
| "loss": 0.469, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.16691095639428363, | |
| "grad_norm": 0.451113552639029, | |
| "learning_rate": 9.865115074646583e-06, | |
| "loss": 0.5221, | |
| "step": 911 | |
| }, | |
| { | |
| "epoch": 0.16709417368999632, | |
| "grad_norm": 0.4454998332020719, | |
| "learning_rate": 9.864376304023092e-06, | |
| "loss": 0.495, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 0.16727739098570904, | |
| "grad_norm": 0.5363910151131063, | |
| "learning_rate": 9.863635543596156e-06, | |
| "loss": 0.5233, | |
| "step": 913 | |
| }, | |
| { | |
| "epoch": 0.16746060828142176, | |
| "grad_norm": 0.4392403856880424, | |
| "learning_rate": 9.862892793668787e-06, | |
| "loss": 0.4998, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 0.16764382557713448, | |
| "grad_norm": 0.42093143672832734, | |
| "learning_rate": 9.862148054544812e-06, | |
| "loss": 0.4977, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 0.1678270428728472, | |
| "grad_norm": 0.39275787265485074, | |
| "learning_rate": 9.86140132652887e-06, | |
| "loss": 0.4694, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 0.1680102601685599, | |
| "grad_norm": 0.4159908861548446, | |
| "learning_rate": 9.860652609926417e-06, | |
| "loss": 0.4886, | |
| "step": 917 | |
| }, | |
| { | |
| "epoch": 0.16819347746427263, | |
| "grad_norm": 0.38579180273261776, | |
| "learning_rate": 9.859901905043718e-06, | |
| "loss": 0.48, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 0.16837669475998535, | |
| "grad_norm": 0.4273621763370794, | |
| "learning_rate": 9.859149212187855e-06, | |
| "loss": 0.512, | |
| "step": 919 | |
| }, | |
| { | |
| "epoch": 0.16855991205569806, | |
| "grad_norm": 0.45913559298162226, | |
| "learning_rate": 9.85839453166672e-06, | |
| "loss": 0.4877, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.16874312935141078, | |
| "grad_norm": 0.3847363797846655, | |
| "learning_rate": 9.85763786378902e-06, | |
| "loss": 0.4586, | |
| "step": 921 | |
| }, | |
| { | |
| "epoch": 0.1689263466471235, | |
| "grad_norm": 0.4872256895625686, | |
| "learning_rate": 9.856879208864277e-06, | |
| "loss": 0.5035, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 0.16910956394283622, | |
| "grad_norm": 0.47756329412725707, | |
| "learning_rate": 9.85611856720282e-06, | |
| "loss": 0.5031, | |
| "step": 923 | |
| }, | |
| { | |
| "epoch": 0.1692927812385489, | |
| "grad_norm": 0.474431940864528, | |
| "learning_rate": 9.8553559391158e-06, | |
| "loss": 0.512, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 0.16947599853426162, | |
| "grad_norm": 0.41972238109665655, | |
| "learning_rate": 9.85459132491517e-06, | |
| "loss": 0.5386, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 0.16965921582997434, | |
| "grad_norm": 0.4369771091332424, | |
| "learning_rate": 9.8538247249137e-06, | |
| "loss": 0.4909, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 0.16984243312568706, | |
| "grad_norm": 0.4523772832392917, | |
| "learning_rate": 9.853056139424974e-06, | |
| "loss": 0.4951, | |
| "step": 927 | |
| }, | |
| { | |
| "epoch": 0.17002565042139978, | |
| "grad_norm": 0.40235086982371876, | |
| "learning_rate": 9.852285568763387e-06, | |
| "loss": 0.4828, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 0.1702088677171125, | |
| "grad_norm": 0.4150844402567988, | |
| "learning_rate": 9.851513013244144e-06, | |
| "loss": 0.4933, | |
| "step": 929 | |
| }, | |
| { | |
| "epoch": 0.1703920850128252, | |
| "grad_norm": 0.3941602147072389, | |
| "learning_rate": 9.850738473183266e-06, | |
| "loss": 0.4855, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.17057530230853793, | |
| "grad_norm": 0.47118394280764797, | |
| "learning_rate": 9.849961948897582e-06, | |
| "loss": 0.5024, | |
| "step": 931 | |
| }, | |
| { | |
| "epoch": 0.17075851960425065, | |
| "grad_norm": 0.3912878663413955, | |
| "learning_rate": 9.849183440704735e-06, | |
| "loss": 0.5041, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 0.17094173689996336, | |
| "grad_norm": 0.4001961484144372, | |
| "learning_rate": 9.848402948923177e-06, | |
| "loss": 0.4753, | |
| "step": 933 | |
| }, | |
| { | |
| "epoch": 0.17112495419567608, | |
| "grad_norm": 0.4441137892348275, | |
| "learning_rate": 9.847620473872172e-06, | |
| "loss": 0.5102, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 0.1713081714913888, | |
| "grad_norm": 0.40101758764349804, | |
| "learning_rate": 9.846836015871802e-06, | |
| "loss": 0.4824, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 0.1714913887871015, | |
| "grad_norm": 0.41075944565503664, | |
| "learning_rate": 9.846049575242949e-06, | |
| "loss": 0.5235, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 0.1716746060828142, | |
| "grad_norm": 0.3780837940291423, | |
| "learning_rate": 9.845261152307312e-06, | |
| "loss": 0.5211, | |
| "step": 937 | |
| }, | |
| { | |
| "epoch": 0.17185782337852692, | |
| "grad_norm": 0.45070991069476113, | |
| "learning_rate": 9.844470747387403e-06, | |
| "loss": 0.4926, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 0.17204104067423964, | |
| "grad_norm": 0.40836878453605646, | |
| "learning_rate": 9.843678360806542e-06, | |
| "loss": 0.5173, | |
| "step": 939 | |
| }, | |
| { | |
| "epoch": 0.17222425796995236, | |
| "grad_norm": 0.36676941914948374, | |
| "learning_rate": 9.842883992888855e-06, | |
| "loss": 0.5101, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.17240747526566508, | |
| "grad_norm": 0.42894488109200807, | |
| "learning_rate": 9.842087643959288e-06, | |
| "loss": 0.5398, | |
| "step": 941 | |
| }, | |
| { | |
| "epoch": 0.1725906925613778, | |
| "grad_norm": 0.4004768449690441, | |
| "learning_rate": 9.841289314343591e-06, | |
| "loss": 0.4967, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 0.1727739098570905, | |
| "grad_norm": 0.3781076276341992, | |
| "learning_rate": 9.840489004368325e-06, | |
| "loss": 0.4731, | |
| "step": 943 | |
| }, | |
| { | |
| "epoch": 0.17295712715280323, | |
| "grad_norm": 0.45307945315267256, | |
| "learning_rate": 9.839686714360864e-06, | |
| "loss": 0.4737, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 0.17314034444851595, | |
| "grad_norm": 0.4079247794136399, | |
| "learning_rate": 9.838882444649387e-06, | |
| "loss": 0.4927, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 0.17332356174422867, | |
| "grad_norm": 0.47171018664243136, | |
| "learning_rate": 9.838076195562886e-06, | |
| "loss": 0.5009, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 0.17350677903994138, | |
| "grad_norm": 0.44572108382734166, | |
| "learning_rate": 9.837267967431164e-06, | |
| "loss": 0.4858, | |
| "step": 947 | |
| }, | |
| { | |
| "epoch": 0.17368999633565407, | |
| "grad_norm": 0.40649730883742624, | |
| "learning_rate": 9.83645776058483e-06, | |
| "loss": 0.4943, | |
| "step": 948 | |
| }, | |
| { | |
| "epoch": 0.1738732136313668, | |
| "grad_norm": 0.43002759908477306, | |
| "learning_rate": 9.835645575355304e-06, | |
| "loss": 0.4872, | |
| "step": 949 | |
| }, | |
| { | |
| "epoch": 0.1740564309270795, | |
| "grad_norm": 0.39064612647120167, | |
| "learning_rate": 9.834831412074816e-06, | |
| "loss": 0.4409, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.17423964822279223, | |
| "grad_norm": 0.3761573603114394, | |
| "learning_rate": 9.834015271076405e-06, | |
| "loss": 0.459, | |
| "step": 951 | |
| }, | |
| { | |
| "epoch": 0.17442286551850494, | |
| "grad_norm": 0.42796519720566795, | |
| "learning_rate": 9.83319715269392e-06, | |
| "loss": 0.4906, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 0.17460608281421766, | |
| "grad_norm": 0.5091807084893948, | |
| "learning_rate": 9.832377057262015e-06, | |
| "loss": 0.5516, | |
| "step": 953 | |
| }, | |
| { | |
| "epoch": 0.17478930010993038, | |
| "grad_norm": 0.48065067696295527, | |
| "learning_rate": 9.831554985116155e-06, | |
| "loss": 0.5097, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 0.1749725174056431, | |
| "grad_norm": 0.4310364985277951, | |
| "learning_rate": 9.830730936592615e-06, | |
| "loss": 0.5025, | |
| "step": 955 | |
| }, | |
| { | |
| "epoch": 0.17515573470135581, | |
| "grad_norm": 0.39964086093662415, | |
| "learning_rate": 9.829904912028477e-06, | |
| "loss": 0.494, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 0.17533895199706853, | |
| "grad_norm": 0.40935380850458175, | |
| "learning_rate": 9.829076911761631e-06, | |
| "loss": 0.5182, | |
| "step": 957 | |
| }, | |
| { | |
| "epoch": 0.17552216929278125, | |
| "grad_norm": 0.454766087870644, | |
| "learning_rate": 9.828246936130777e-06, | |
| "loss": 0.5113, | |
| "step": 958 | |
| }, | |
| { | |
| "epoch": 0.17570538658849397, | |
| "grad_norm": 0.5087972352515657, | |
| "learning_rate": 9.827414985475419e-06, | |
| "loss": 0.4797, | |
| "step": 959 | |
| }, | |
| { | |
| "epoch": 0.17588860388420666, | |
| "grad_norm": 0.4255562501722622, | |
| "learning_rate": 9.826581060135873e-06, | |
| "loss": 0.5045, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.17607182117991937, | |
| "grad_norm": 0.45583787528367603, | |
| "learning_rate": 9.825745160453264e-06, | |
| "loss": 0.4755, | |
| "step": 961 | |
| }, | |
| { | |
| "epoch": 0.1762550384756321, | |
| "grad_norm": 0.45648883029913284, | |
| "learning_rate": 9.824907286769519e-06, | |
| "loss": 0.4736, | |
| "step": 962 | |
| }, | |
| { | |
| "epoch": 0.1764382557713448, | |
| "grad_norm": 0.436071698669034, | |
| "learning_rate": 9.824067439427374e-06, | |
| "loss": 0.5036, | |
| "step": 963 | |
| }, | |
| { | |
| "epoch": 0.17662147306705753, | |
| "grad_norm": 0.41502000197263017, | |
| "learning_rate": 9.823225618770378e-06, | |
| "loss": 0.5121, | |
| "step": 964 | |
| }, | |
| { | |
| "epoch": 0.17680469036277024, | |
| "grad_norm": 0.450252173902017, | |
| "learning_rate": 9.822381825142879e-06, | |
| "loss": 0.506, | |
| "step": 965 | |
| }, | |
| { | |
| "epoch": 0.17698790765848296, | |
| "grad_norm": 0.43561778776659554, | |
| "learning_rate": 9.82153605889004e-06, | |
| "loss": 0.5304, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 0.17717112495419568, | |
| "grad_norm": 0.4306520452468297, | |
| "learning_rate": 9.820688320357823e-06, | |
| "loss": 0.5112, | |
| "step": 967 | |
| }, | |
| { | |
| "epoch": 0.1773543422499084, | |
| "grad_norm": 0.39680977809710444, | |
| "learning_rate": 9.819838609893005e-06, | |
| "loss": 0.4904, | |
| "step": 968 | |
| }, | |
| { | |
| "epoch": 0.17753755954562112, | |
| "grad_norm": 0.4225898008101114, | |
| "learning_rate": 9.81898692784316e-06, | |
| "loss": 0.5137, | |
| "step": 969 | |
| }, | |
| { | |
| "epoch": 0.17772077684133383, | |
| "grad_norm": 0.4267062187327317, | |
| "learning_rate": 9.818133274556679e-06, | |
| "loss": 0.517, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.17790399413704655, | |
| "grad_norm": 0.4450452397955529, | |
| "learning_rate": 9.81727765038275e-06, | |
| "loss": 0.4949, | |
| "step": 971 | |
| }, | |
| { | |
| "epoch": 0.17808721143275924, | |
| "grad_norm": 0.42730228271929965, | |
| "learning_rate": 9.816420055671374e-06, | |
| "loss": 0.5013, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 0.17827042872847196, | |
| "grad_norm": 0.4421383626296405, | |
| "learning_rate": 9.815560490773356e-06, | |
| "loss": 0.4679, | |
| "step": 973 | |
| }, | |
| { | |
| "epoch": 0.17845364602418468, | |
| "grad_norm": 0.43307265096730885, | |
| "learning_rate": 9.814698956040305e-06, | |
| "loss": 0.4976, | |
| "step": 974 | |
| }, | |
| { | |
| "epoch": 0.1786368633198974, | |
| "grad_norm": 0.40376464412932705, | |
| "learning_rate": 9.813835451824636e-06, | |
| "loss": 0.5021, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 0.1788200806156101, | |
| "grad_norm": 0.44775776339538953, | |
| "learning_rate": 9.812969978479573e-06, | |
| "loss": 0.4824, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 0.17900329791132283, | |
| "grad_norm": 0.468036097947629, | |
| "learning_rate": 9.812102536359142e-06, | |
| "loss": 0.4966, | |
| "step": 977 | |
| }, | |
| { | |
| "epoch": 0.17918651520703555, | |
| "grad_norm": 0.49832149697786493, | |
| "learning_rate": 9.811233125818176e-06, | |
| "loss": 0.4853, | |
| "step": 978 | |
| }, | |
| { | |
| "epoch": 0.17936973250274826, | |
| "grad_norm": 0.44754723833693727, | |
| "learning_rate": 9.810361747212313e-06, | |
| "loss": 0.4999, | |
| "step": 979 | |
| }, | |
| { | |
| "epoch": 0.17955294979846098, | |
| "grad_norm": 0.4747011696315986, | |
| "learning_rate": 9.809488400897996e-06, | |
| "loss": 0.5108, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.1797361670941737, | |
| "grad_norm": 0.4055842519238836, | |
| "learning_rate": 9.808613087232473e-06, | |
| "loss": 0.4786, | |
| "step": 981 | |
| }, | |
| { | |
| "epoch": 0.17991938438988642, | |
| "grad_norm": 0.3971701443501286, | |
| "learning_rate": 9.807735806573795e-06, | |
| "loss": 0.4925, | |
| "step": 982 | |
| }, | |
| { | |
| "epoch": 0.18010260168559913, | |
| "grad_norm": 0.3784480421793096, | |
| "learning_rate": 9.806856559280819e-06, | |
| "loss": 0.4762, | |
| "step": 983 | |
| }, | |
| { | |
| "epoch": 0.18028581898131182, | |
| "grad_norm": 0.41465183173286063, | |
| "learning_rate": 9.80597534571321e-06, | |
| "loss": 0.5037, | |
| "step": 984 | |
| }, | |
| { | |
| "epoch": 0.18046903627702454, | |
| "grad_norm": 0.4713550462839489, | |
| "learning_rate": 9.80509216623143e-06, | |
| "loss": 0.4816, | |
| "step": 985 | |
| }, | |
| { | |
| "epoch": 0.18065225357273726, | |
| "grad_norm": 0.34923786309805893, | |
| "learning_rate": 9.804207021196751e-06, | |
| "loss": 0.4903, | |
| "step": 986 | |
| }, | |
| { | |
| "epoch": 0.18083547086844998, | |
| "grad_norm": 0.4359203385963395, | |
| "learning_rate": 9.803319910971248e-06, | |
| "loss": 0.5252, | |
| "step": 987 | |
| }, | |
| { | |
| "epoch": 0.1810186881641627, | |
| "grad_norm": 0.40286824933567683, | |
| "learning_rate": 9.802430835917796e-06, | |
| "loss": 0.5248, | |
| "step": 988 | |
| }, | |
| { | |
| "epoch": 0.1812019054598754, | |
| "grad_norm": 0.44314680454994215, | |
| "learning_rate": 9.801539796400078e-06, | |
| "loss": 0.5071, | |
| "step": 989 | |
| }, | |
| { | |
| "epoch": 0.18138512275558813, | |
| "grad_norm": 0.44074612784921385, | |
| "learning_rate": 9.80064679278258e-06, | |
| "loss": 0.5057, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.18156834005130085, | |
| "grad_norm": 0.40968360376086815, | |
| "learning_rate": 9.799751825430592e-06, | |
| "loss": 0.4919, | |
| "step": 991 | |
| }, | |
| { | |
| "epoch": 0.18175155734701356, | |
| "grad_norm": 0.4204735967781832, | |
| "learning_rate": 9.798854894710202e-06, | |
| "loss": 0.546, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 0.18193477464272628, | |
| "grad_norm": 0.36210624498761007, | |
| "learning_rate": 9.79795600098831e-06, | |
| "loss": 0.4904, | |
| "step": 993 | |
| }, | |
| { | |
| "epoch": 0.182117991938439, | |
| "grad_norm": 0.45193717949202633, | |
| "learning_rate": 9.797055144632609e-06, | |
| "loss": 0.5292, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 0.18230120923415172, | |
| "grad_norm": 0.415630496229026, | |
| "learning_rate": 9.796152326011604e-06, | |
| "loss": 0.533, | |
| "step": 995 | |
| }, | |
| { | |
| "epoch": 0.1824844265298644, | |
| "grad_norm": 0.39372729416385926, | |
| "learning_rate": 9.795247545494594e-06, | |
| "loss": 0.5147, | |
| "step": 996 | |
| }, | |
| { | |
| "epoch": 0.18266764382557713, | |
| "grad_norm": 0.43540226077312505, | |
| "learning_rate": 9.794340803451692e-06, | |
| "loss": 0.49, | |
| "step": 997 | |
| }, | |
| { | |
| "epoch": 0.18285086112128984, | |
| "grad_norm": 0.39372791981341043, | |
| "learning_rate": 9.7934321002538e-06, | |
| "loss": 0.4776, | |
| "step": 998 | |
| }, | |
| { | |
| "epoch": 0.18303407841700256, | |
| "grad_norm": 0.40381508343023165, | |
| "learning_rate": 9.792521436272633e-06, | |
| "loss": 0.4566, | |
| "step": 999 | |
| }, | |
| { | |
| "epoch": 0.18321729571271528, | |
| "grad_norm": 0.47729065322343966, | |
| "learning_rate": 9.791608811880702e-06, | |
| "loss": 0.5013, | |
| "step": 1000 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 5458, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 1000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 178891845599232.0, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |