| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.24997764863656682, |
| "eval_steps": 500, |
| "global_step": 1864, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.00013410818059901653, |
| "grad_norm": 6.53010672133618, |
| "learning_rate": 5.999999760325567e-07, |
| "loss": 1.7583, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.00026821636119803307, |
| "grad_norm": 3.9699106842198337, |
| "learning_rate": 5.999999041302309e-07, |
| "loss": 1.6802, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.0004023245417970496, |
| "grad_norm": 1.9790778060230643, |
| "learning_rate": 5.999997842930357e-07, |
| "loss": 1.7683, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.0005364327223960661, |
| "grad_norm": 4.227652802101559, |
| "learning_rate": 5.999996165209921e-07, |
| "loss": 1.7059, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.0006705409029950827, |
| "grad_norm": 3.569278124536831, |
| "learning_rate": 5.9999940081413e-07, |
| "loss": 1.7249, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.0008046490835940993, |
| "grad_norm": 4.901107992602518, |
| "learning_rate": 5.999991371724877e-07, |
| "loss": 1.7577, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.0009387572641931158, |
| "grad_norm": 1.7387118214106754, |
| "learning_rate": 5.999988255961119e-07, |
| "loss": 1.7158, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.0010728654447921323, |
| "grad_norm": 1.809495324631513, |
| "learning_rate": 5.99998466085058e-07, |
| "loss": 1.7287, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.0012069736253911489, |
| "grad_norm": 2.596203473961021, |
| "learning_rate": 5.999980586393898e-07, |
| "loss": 1.7724, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.0013410818059901655, |
| "grad_norm": 2.024872932500911, |
| "learning_rate": 5.999976032591797e-07, |
| "loss": 1.7405, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.001475189986589182, |
| "grad_norm": 1.8852498791545222, |
| "learning_rate": 5.999970999445085e-07, |
| "loss": 1.8083, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.0016092981671881985, |
| "grad_norm": 2.789618405199575, |
| "learning_rate": 5.999965486954655e-07, |
| "loss": 1.7057, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.0017434063477872151, |
| "grad_norm": 5.494954113770268, |
| "learning_rate": 5.999959495121485e-07, |
| "loss": 1.7091, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.0018775145283862315, |
| "grad_norm": 2.79414084165035, |
| "learning_rate": 5.999953023946642e-07, |
| "loss": 1.7631, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.002011622708985248, |
| "grad_norm": 4.805471519443609, |
| "learning_rate": 5.999946073431272e-07, |
| "loss": 1.8484, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.0021457308895842645, |
| "grad_norm": 1.3908444469943815, |
| "learning_rate": 5.99993864357661e-07, |
| "loss": 1.7106, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.002279839070183281, |
| "grad_norm": 1.9101877569067494, |
| "learning_rate": 5.999930734383974e-07, |
| "loss": 1.7213, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.0024139472507822978, |
| "grad_norm": 1.547605953954358, |
| "learning_rate": 5.999922345854771e-07, |
| "loss": 1.7222, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.0025480554313813144, |
| "grad_norm": 1.4545546480798652, |
| "learning_rate": 5.999913477990486e-07, |
| "loss": 1.6248, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.002682163611980331, |
| "grad_norm": 1.3664919966665414, |
| "learning_rate": 5.999904130792696e-07, |
| "loss": 1.7481, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.002816271792579347, |
| "grad_norm": 1.2263286406299385, |
| "learning_rate": 5.999894304263061e-07, |
| "loss": 1.731, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.002950379973178364, |
| "grad_norm": 1.4441675215823284, |
| "learning_rate": 5.999883998403325e-07, |
| "loss": 1.7489, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.0030844881537773804, |
| "grad_norm": 2.076565725996637, |
| "learning_rate": 5.999873213215316e-07, |
| "loss": 1.609, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.003218596334376397, |
| "grad_norm": 2.056874040951704, |
| "learning_rate": 5.999861948700952e-07, |
| "loss": 1.7387, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.0033527045149754136, |
| "grad_norm": 1.196618920130671, |
| "learning_rate": 5.99985020486223e-07, |
| "loss": 1.6522, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.0034868126955744302, |
| "grad_norm": 1.4295779403436433, |
| "learning_rate": 5.999837981701236e-07, |
| "loss": 1.7226, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.0036209208761734464, |
| "grad_norm": 1.22926449530156, |
| "learning_rate": 5.99982527922014e-07, |
| "loss": 1.699, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.003755029056772463, |
| "grad_norm": 1.324357519929758, |
| "learning_rate": 5.999812097421198e-07, |
| "loss": 1.784, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.0038891372373714797, |
| "grad_norm": 1.351746272995911, |
| "learning_rate": 5.999798436306748e-07, |
| "loss": 1.7094, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.004023245417970496, |
| "grad_norm": 1.3696717018122837, |
| "learning_rate": 5.999784295879217e-07, |
| "loss": 1.8113, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.0041573535985695124, |
| "grad_norm": 1.2950751514861556, |
| "learning_rate": 5.999769676141116e-07, |
| "loss": 1.7043, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.004291461779168529, |
| "grad_norm": 1.510791624383582, |
| "learning_rate": 5.99975457709504e-07, |
| "loss": 1.7247, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.004425569959767546, |
| "grad_norm": 1.205151919537117, |
| "learning_rate": 5.999738998743669e-07, |
| "loss": 1.7102, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.004559678140366562, |
| "grad_norm": 1.2313460275237813, |
| "learning_rate": 5.999722941089769e-07, |
| "loss": 1.6194, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.004693786320965579, |
| "grad_norm": 1.2769810504677248, |
| "learning_rate": 5.999706404136191e-07, |
| "loss": 1.6776, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.0048278945015645955, |
| "grad_norm": 1.210621301547261, |
| "learning_rate": 5.99968938788587e-07, |
| "loss": 1.658, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.004962002682163612, |
| "grad_norm": 1.3309399301655989, |
| "learning_rate": 5.99967189234183e-07, |
| "loss": 1.598, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.005096110862762629, |
| "grad_norm": 1.2698125491932901, |
| "learning_rate": 5.999653917507173e-07, |
| "loss": 1.6783, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.005230219043361645, |
| "grad_norm": 1.269690475054205, |
| "learning_rate": 5.999635463385092e-07, |
| "loss": 1.7118, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.005364327223960662, |
| "grad_norm": 1.239195449838068, |
| "learning_rate": 5.999616529978864e-07, |
| "loss": 1.7552, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.005498435404559678, |
| "grad_norm": 1.213245919091097, |
| "learning_rate": 5.999597117291851e-07, |
| "loss": 1.6195, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.005632543585158694, |
| "grad_norm": 1.472546911008587, |
| "learning_rate": 5.999577225327498e-07, |
| "loss": 1.7151, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.005766651765757711, |
| "grad_norm": 1.1739645532291967, |
| "learning_rate": 5.999556854089335e-07, |
| "loss": 1.6848, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.005900759946356728, |
| "grad_norm": 1.6603998730539062, |
| "learning_rate": 5.999536003580982e-07, |
| "loss": 1.7987, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.006034868126955744, |
| "grad_norm": 1.2267822489395797, |
| "learning_rate": 5.999514673806138e-07, |
| "loss": 1.7743, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.006168976307554761, |
| "grad_norm": 1.182672696849382, |
| "learning_rate": 5.999492864768594e-07, |
| "loss": 1.7007, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.006303084488153777, |
| "grad_norm": 1.3725982814639008, |
| "learning_rate": 5.999470576472216e-07, |
| "loss": 1.6453, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.006437192668752794, |
| "grad_norm": 1.2302523441661959, |
| "learning_rate": 5.999447808920965e-07, |
| "loss": 1.668, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.006571300849351811, |
| "grad_norm": 1.5825139985036842, |
| "learning_rate": 5.999424562118882e-07, |
| "loss": 1.677, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.006705409029950827, |
| "grad_norm": 1.3441000281769755, |
| "learning_rate": 5.999400836070092e-07, |
| "loss": 1.7907, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.006839517210549844, |
| "grad_norm": 1.2662568205784916, |
| "learning_rate": 5.999376630778812e-07, |
| "loss": 1.7948, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.0069736253911488605, |
| "grad_norm": 1.6969919156319755, |
| "learning_rate": 5.999351946249336e-07, |
| "loss": 1.704, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.007107733571747876, |
| "grad_norm": 1.3702701009027687, |
| "learning_rate": 5.999326782486047e-07, |
| "loss": 1.7596, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.007241841752346893, |
| "grad_norm": 1.2008226357772018, |
| "learning_rate": 5.999301139493413e-07, |
| "loss": 1.7446, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.0073759499329459095, |
| "grad_norm": 1.1610594693793954, |
| "learning_rate": 5.999275017275985e-07, |
| "loss": 1.6545, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.007510058113544926, |
| "grad_norm": 1.2318851837588591, |
| "learning_rate": 5.999248415838404e-07, |
| "loss": 1.6945, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.007644166294143943, |
| "grad_norm": 1.3623097314650943, |
| "learning_rate": 5.99922133518539e-07, |
| "loss": 1.7576, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.007778274474742959, |
| "grad_norm": 1.263711259426924, |
| "learning_rate": 5.999193775321749e-07, |
| "loss": 1.7202, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.007912382655341976, |
| "grad_norm": 1.266618530800646, |
| "learning_rate": 5.999165736252378e-07, |
| "loss": 1.7277, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.008046490835940992, |
| "grad_norm": 1.20263409583272, |
| "learning_rate": 5.999137217982253e-07, |
| "loss": 1.7287, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.00818059901654001, |
| "grad_norm": 1.3137021476149842, |
| "learning_rate": 5.999108220516439e-07, |
| "loss": 1.7524, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.008314707197139025, |
| "grad_norm": 1.2381760472328087, |
| "learning_rate": 5.999078743860079e-07, |
| "loss": 1.6713, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.008448815377738042, |
| "grad_norm": 1.1488246018603008, |
| "learning_rate": 5.999048788018412e-07, |
| "loss": 1.61, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.008582923558337058, |
| "grad_norm": 1.1657309327731467, |
| "learning_rate": 5.999018352996753e-07, |
| "loss": 1.7329, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.008717031738936076, |
| "grad_norm": 1.4859993327682761, |
| "learning_rate": 5.998987438800507e-07, |
| "loss": 1.7751, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.008851139919535091, |
| "grad_norm": 1.2336235778167894, |
| "learning_rate": 5.99895604543516e-07, |
| "loss": 1.7698, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.008985248100134109, |
| "grad_norm": 1.2063484420298083, |
| "learning_rate": 5.998924172906287e-07, |
| "loss": 1.6674, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.009119356280733125, |
| "grad_norm": 1.144489164232074, |
| "learning_rate": 5.998891821219549e-07, |
| "loss": 1.6727, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.009253464461332142, |
| "grad_norm": 1.1661711232482204, |
| "learning_rate": 5.998858990380685e-07, |
| "loss": 1.72, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.009387572641931158, |
| "grad_norm": 1.8657773898969878, |
| "learning_rate": 5.998825680395526e-07, |
| "loss": 1.67, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.009521680822530174, |
| "grad_norm": 1.2765420086009807, |
| "learning_rate": 5.998791891269986e-07, |
| "loss": 1.7016, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.009655789003129191, |
| "grad_norm": 1.1153772140374385, |
| "learning_rate": 5.998757623010063e-07, |
| "loss": 1.707, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.009789897183728207, |
| "grad_norm": 1.1669261546443137, |
| "learning_rate": 5.998722875621842e-07, |
| "loss": 1.6859, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.009924005364327224, |
| "grad_norm": 1.1777142907854627, |
| "learning_rate": 5.99868764911149e-07, |
| "loss": 1.616, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.01005811354492624, |
| "grad_norm": 1.1386560612646601, |
| "learning_rate": 5.998651943485263e-07, |
| "loss": 1.7086, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.010192221725525258, |
| "grad_norm": 1.1396265347862253, |
| "learning_rate": 5.998615758749499e-07, |
| "loss": 1.6094, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.010326329906124273, |
| "grad_norm": 1.1418930865866173, |
| "learning_rate": 5.998579094910623e-07, |
| "loss": 1.5653, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.01046043808672329, |
| "grad_norm": 1.2012675736770206, |
| "learning_rate": 5.998541951975143e-07, |
| "loss": 1.749, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.010594546267322306, |
| "grad_norm": 1.1829649799589437, |
| "learning_rate": 5.998504329949654e-07, |
| "loss": 1.741, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.010728654447921324, |
| "grad_norm": 1.1137771771242837, |
| "learning_rate": 5.998466228840834e-07, |
| "loss": 1.7467, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.01086276262852034, |
| "grad_norm": 1.2213171478733171, |
| "learning_rate": 5.998427648655449e-07, |
| "loss": 1.7411, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.010996870809119355, |
| "grad_norm": 1.2565644926554131, |
| "learning_rate": 5.998388589400348e-07, |
| "loss": 1.5334, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.011130978989718373, |
| "grad_norm": 1.1677953640865506, |
| "learning_rate": 5.998349051082467e-07, |
| "loss": 1.6292, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.011265087170317389, |
| "grad_norm": 1.1735267116017247, |
| "learning_rate": 5.998309033708821e-07, |
| "loss": 1.7093, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.011399195350916406, |
| "grad_norm": 1.1800930162312424, |
| "learning_rate": 5.998268537286519e-07, |
| "loss": 1.6931, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.011533303531515422, |
| "grad_norm": 1.267648133239451, |
| "learning_rate": 5.998227561822748e-07, |
| "loss": 1.7372, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.01166741171211444, |
| "grad_norm": 1.322566507024075, |
| "learning_rate": 5.998186107324783e-07, |
| "loss": 1.729, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.011801519892713455, |
| "grad_norm": 1.2115409550398644, |
| "learning_rate": 5.998144173799985e-07, |
| "loss": 1.8509, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.011935628073312473, |
| "grad_norm": 1.2085609394825974, |
| "learning_rate": 5.998101761255799e-07, |
| "loss": 1.6913, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.012069736253911488, |
| "grad_norm": 1.290801409771777, |
| "learning_rate": 5.998058869699753e-07, |
| "loss": 1.7102, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.012203844434510506, |
| "grad_norm": 1.1367739383903264, |
| "learning_rate": 5.998015499139461e-07, |
| "loss": 1.6836, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.012337952615109522, |
| "grad_norm": 1.1670495306196762, |
| "learning_rate": 5.997971649582626e-07, |
| "loss": 1.7664, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.012472060795708539, |
| "grad_norm": 1.1506012664004979, |
| "learning_rate": 5.99792732103703e-07, |
| "loss": 1.6477, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.012606168976307555, |
| "grad_norm": 1.1708715291743035, |
| "learning_rate": 5.997882513510546e-07, |
| "loss": 1.6524, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.01274027715690657, |
| "grad_norm": 1.151933225888518, |
| "learning_rate": 5.997837227011127e-07, |
| "loss": 1.7245, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.012874385337505588, |
| "grad_norm": 1.1690902149158897, |
| "learning_rate": 5.997791461546813e-07, |
| "loss": 1.7276, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.013008493518104604, |
| "grad_norm": 1.2091849738704115, |
| "learning_rate": 5.997745217125728e-07, |
| "loss": 1.6816, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.013142601698703621, |
| "grad_norm": 1.154532285060635, |
| "learning_rate": 5.997698493756085e-07, |
| "loss": 1.7065, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.013276709879302637, |
| "grad_norm": 1.084556225295123, |
| "learning_rate": 5.997651291446176e-07, |
| "loss": 1.6972, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.013410818059901655, |
| "grad_norm": 1.0844384684144817, |
| "learning_rate": 5.997603610204383e-07, |
| "loss": 1.6011, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.01354492624050067, |
| "grad_norm": 1.1349833362519353, |
| "learning_rate": 5.997555450039173e-07, |
| "loss": 1.7058, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.013679034421099688, |
| "grad_norm": 1.161646012371061, |
| "learning_rate": 5.997506810959091e-07, |
| "loss": 1.7284, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.013813142601698703, |
| "grad_norm": 1.1931085385755509, |
| "learning_rate": 5.997457692972776e-07, |
| "loss": 1.6889, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.013947250782297721, |
| "grad_norm": 1.1274496052792788, |
| "learning_rate": 5.997408096088949e-07, |
| "loss": 1.6966, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.014081358962896737, |
| "grad_norm": 1.181021137421778, |
| "learning_rate": 5.997358020316412e-07, |
| "loss": 1.6328, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.014215467143495752, |
| "grad_norm": 1.1775178821818613, |
| "learning_rate": 5.997307465664057e-07, |
| "loss": 1.776, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.01434957532409477, |
| "grad_norm": 1.1589504262285564, |
| "learning_rate": 5.99725643214086e-07, |
| "loss": 1.7587, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.014483683504693786, |
| "grad_norm": 1.0988787787594243, |
| "learning_rate": 5.99720491975588e-07, |
| "loss": 1.6803, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.014617791685292803, |
| "grad_norm": 1.1461688756871193, |
| "learning_rate": 5.997152928518265e-07, |
| "loss": 1.607, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.014751899865891819, |
| "grad_norm": 1.152474644239047, |
| "learning_rate": 5.99710045843724e-07, |
| "loss": 1.7633, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.014886008046490836, |
| "grad_norm": 1.1059120772328972, |
| "learning_rate": 5.997047509522127e-07, |
| "loss": 1.6747, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.015020116227089852, |
| "grad_norm": 1.313489457814451, |
| "learning_rate": 5.996994081782321e-07, |
| "loss": 1.7596, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.01515422440768887, |
| "grad_norm": 1.111253336672023, |
| "learning_rate": 5.99694017522731e-07, |
| "loss": 1.6808, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.015288332588287885, |
| "grad_norm": 1.1886992881117084, |
| "learning_rate": 5.996885789866662e-07, |
| "loss": 1.7115, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.015422440768886903, |
| "grad_norm": 1.3209352003575652, |
| "learning_rate": 5.996830925710036e-07, |
| "loss": 1.6806, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.015556548949485919, |
| "grad_norm": 1.1206995765571244, |
| "learning_rate": 5.99677558276717e-07, |
| "loss": 1.7454, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.015690657130084936, |
| "grad_norm": 1.5425145699155092, |
| "learning_rate": 5.996719761047891e-07, |
| "loss": 1.7396, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.015824765310683952, |
| "grad_norm": 1.1362376633432387, |
| "learning_rate": 5.996663460562107e-07, |
| "loss": 1.7999, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.015958873491282968, |
| "grad_norm": 1.3633221428865825, |
| "learning_rate": 5.996606681319816e-07, |
| "loss": 1.7351, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.016092981671881983, |
| "grad_norm": 1.3214169620385536, |
| "learning_rate": 5.996549423331097e-07, |
| "loss": 1.8187, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.016227089852481003, |
| "grad_norm": 1.127623956399482, |
| "learning_rate": 5.996491686606115e-07, |
| "loss": 1.7869, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.01636119803308002, |
| "grad_norm": 1.1391849506858633, |
| "learning_rate": 5.996433471155121e-07, |
| "loss": 1.6692, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.016495306213679034, |
| "grad_norm": 1.2689844428227393, |
| "learning_rate": 5.99637477698845e-07, |
| "loss": 1.7503, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.01662941439427805, |
| "grad_norm": 1.1304906129070134, |
| "learning_rate": 5.996315604116523e-07, |
| "loss": 1.7342, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.01676352257487707, |
| "grad_norm": 1.114839424591474, |
| "learning_rate": 5.996255952549846e-07, |
| "loss": 1.7152, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.016897630755476085, |
| "grad_norm": 1.1209913395354725, |
| "learning_rate": 5.996195822299007e-07, |
| "loss": 1.7016, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.0170317389360751, |
| "grad_norm": 1.2030735367344376, |
| "learning_rate": 5.996135213374683e-07, |
| "loss": 1.6916, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.017165847116674116, |
| "grad_norm": 1.1576104199692667, |
| "learning_rate": 5.996074125787635e-07, |
| "loss": 1.6998, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.017299955297273132, |
| "grad_norm": 1.1589080789600115, |
| "learning_rate": 5.996012559548706e-07, |
| "loss": 1.7135, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.01743406347787215, |
| "grad_norm": 1.1305459345535596, |
| "learning_rate": 5.995950514668828e-07, |
| "loss": 1.7388, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.017568171658471167, |
| "grad_norm": 1.1845858349294451, |
| "learning_rate": 5.995887991159015e-07, |
| "loss": 1.6555, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.017702279839070183, |
| "grad_norm": 1.1323032600098655, |
| "learning_rate": 5.99582498903037e-07, |
| "loss": 1.7391, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.0178363880196692, |
| "grad_norm": 1.1284637339427535, |
| "learning_rate": 5.995761508294074e-07, |
| "loss": 1.7362, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.017970496200268218, |
| "grad_norm": 1.1056404062639804, |
| "learning_rate": 5.995697548961401e-07, |
| "loss": 1.6097, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.018104604380867233, |
| "grad_norm": 1.168355336516219, |
| "learning_rate": 5.995633111043703e-07, |
| "loss": 1.6254, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.01823871256146625, |
| "grad_norm": 1.1077402595262742, |
| "learning_rate": 5.995568194552422e-07, |
| "loss": 1.6421, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.018372820742065265, |
| "grad_norm": 1.1281814027534607, |
| "learning_rate": 5.995502799499084e-07, |
| "loss": 1.6564, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.018506928922664284, |
| "grad_norm": 1.1954713277317879, |
| "learning_rate": 5.995436925895296e-07, |
| "loss": 1.7595, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.0186410371032633, |
| "grad_norm": 1.0981557354609233, |
| "learning_rate": 5.995370573752754e-07, |
| "loss": 1.7267, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.018775145283862316, |
| "grad_norm": 1.1055449164193234, |
| "learning_rate": 5.99530374308324e-07, |
| "loss": 1.7206, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.01890925346446133, |
| "grad_norm": 1.1553645507386814, |
| "learning_rate": 5.995236433898617e-07, |
| "loss": 1.7575, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.019043361645060347, |
| "grad_norm": 1.153673293502894, |
| "learning_rate": 5.995168646210836e-07, |
| "loss": 1.6141, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.019177469825659366, |
| "grad_norm": 1.2666080786381764, |
| "learning_rate": 5.995100380031929e-07, |
| "loss": 1.6959, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.019311578006258382, |
| "grad_norm": 1.1798276231576013, |
| "learning_rate": 5.99503163537402e-07, |
| "loss": 1.6898, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.019445686186857398, |
| "grad_norm": 1.1774834485948251, |
| "learning_rate": 5.99496241224931e-07, |
| "loss": 1.6964, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.019579794367456414, |
| "grad_norm": 1.1714422777051698, |
| "learning_rate": 5.994892710670092e-07, |
| "loss": 1.7554, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.019713902548055433, |
| "grad_norm": 1.1423261455221443, |
| "learning_rate": 5.994822530648737e-07, |
| "loss": 1.6261, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.01984801072865445, |
| "grad_norm": 1.2304343117411098, |
| "learning_rate": 5.994751872197707e-07, |
| "loss": 1.6867, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.019982118909253464, |
| "grad_norm": 1.6067971505148326, |
| "learning_rate": 5.994680735329545e-07, |
| "loss": 1.7063, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.02011622708985248, |
| "grad_norm": 1.2298413663973762, |
| "learning_rate": 5.994609120056881e-07, |
| "loss": 1.8201, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.0202503352704515, |
| "grad_norm": 1.3415909829046686, |
| "learning_rate": 5.994537026392431e-07, |
| "loss": 1.7761, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.020384443451050515, |
| "grad_norm": 1.092009599817307, |
| "learning_rate": 5.994464454348991e-07, |
| "loss": 1.6873, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.02051855163164953, |
| "grad_norm": 1.123624476668709, |
| "learning_rate": 5.994391403939447e-07, |
| "loss": 1.6261, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.020652659812248546, |
| "grad_norm": 1.194551927187871, |
| "learning_rate": 5.994317875176768e-07, |
| "loss": 1.6832, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.020786767992847562, |
| "grad_norm": 1.2676660386871186, |
| "learning_rate": 5.99424386807401e-07, |
| "loss": 1.7296, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.02092087617344658, |
| "grad_norm": 1.1316668463703698, |
| "learning_rate": 5.994169382644308e-07, |
| "loss": 1.5888, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.021054984354045597, |
| "grad_norm": 1.1959893911689907, |
| "learning_rate": 5.994094418900889e-07, |
| "loss": 1.75, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.021189092534644613, |
| "grad_norm": 1.1591910643371741, |
| "learning_rate": 5.994018976857061e-07, |
| "loss": 1.6475, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.02132320071524363, |
| "grad_norm": 1.1845979417363135, |
| "learning_rate": 5.993943056526216e-07, |
| "loss": 1.6961, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.021457308895842648, |
| "grad_norm": 1.1240443814893686, |
| "learning_rate": 5.993866657921835e-07, |
| "loss": 1.6806, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.021591417076441664, |
| "grad_norm": 1.24112432554422, |
| "learning_rate": 5.99378978105748e-07, |
| "loss": 1.7856, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.02172552525704068, |
| "grad_norm": 1.1097493699883791, |
| "learning_rate": 5.993712425946801e-07, |
| "loss": 1.6526, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.021859633437639695, |
| "grad_norm": 1.1808847880790212, |
| "learning_rate": 5.99363459260353e-07, |
| "loss": 1.6635, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.02199374161823871, |
| "grad_norm": 1.1260977048233447, |
| "learning_rate": 5.993556281041487e-07, |
| "loss": 1.6883, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.02212784979883773, |
| "grad_norm": 1.1355214317178735, |
| "learning_rate": 5.993477491274572e-07, |
| "loss": 1.7197, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.022261957979436746, |
| "grad_norm": 1.1667677632223183, |
| "learning_rate": 5.993398223316776e-07, |
| "loss": 1.652, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.02239606616003576, |
| "grad_norm": 1.2054751712250278, |
| "learning_rate": 5.993318477182171e-07, |
| "loss": 1.7181, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.022530174340634777, |
| "grad_norm": 1.102112367147099, |
| "learning_rate": 5.993238252884914e-07, |
| "loss": 1.7064, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.022664282521233797, |
| "grad_norm": 1.1174237172322072, |
| "learning_rate": 5.99315755043925e-07, |
| "loss": 1.7088, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.022798390701832812, |
| "grad_norm": 1.1526425806154745, |
| "learning_rate": 5.993076369859505e-07, |
| "loss": 1.6713, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.022932498882431828, |
| "grad_norm": 1.189041831016279, |
| "learning_rate": 5.992994711160089e-07, |
| "loss": 1.796, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.023066607063030844, |
| "grad_norm": 1.1020745587716836, |
| "learning_rate": 5.992912574355505e-07, |
| "loss": 1.7036, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.023200715243629863, |
| "grad_norm": 1.1024337259717305, |
| "learning_rate": 5.992829959460332e-07, |
| "loss": 1.7183, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.02333482342422888, |
| "grad_norm": 1.2334229037351967, |
| "learning_rate": 5.992746866489237e-07, |
| "loss": 1.7278, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.023468931604827895, |
| "grad_norm": 1.1942148623032185, |
| "learning_rate": 5.992663295456972e-07, |
| "loss": 1.7127, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.02360303978542691, |
| "grad_norm": 1.154550051904798, |
| "learning_rate": 5.992579246378375e-07, |
| "loss": 1.7259, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.023737147966025926, |
| "grad_norm": 1.139454652919351, |
| "learning_rate": 5.992494719268369e-07, |
| "loss": 1.8202, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.023871256146624945, |
| "grad_norm": 1.1334182773252635, |
| "learning_rate": 5.992409714141957e-07, |
| "loss": 1.7458, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.02400536432722396, |
| "grad_norm": 1.175452351416824, |
| "learning_rate": 5.992324231014234e-07, |
| "loss": 1.7343, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.024139472507822977, |
| "grad_norm": 1.15495982844933, |
| "learning_rate": 5.992238269900374e-07, |
| "loss": 1.6397, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.024273580688421992, |
| "grad_norm": 1.222036602203619, |
| "learning_rate": 5.992151830815639e-07, |
| "loss": 1.6585, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.02440768886902101, |
| "grad_norm": 1.1103145700032067, |
| "learning_rate": 5.992064913775376e-07, |
| "loss": 1.6729, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.024541797049620027, |
| "grad_norm": 1.1627847561281206, |
| "learning_rate": 5.991977518795014e-07, |
| "loss": 1.6693, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.024675905230219043, |
| "grad_norm": 1.2021941957895712, |
| "learning_rate": 5.991889645890071e-07, |
| "loss": 1.7692, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.02481001341081806, |
| "grad_norm": 1.0987338031386753, |
| "learning_rate": 5.991801295076147e-07, |
| "loss": 1.7378, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.024944121591417078, |
| "grad_norm": 1.1764726234102538, |
| "learning_rate": 5.991712466368927e-07, |
| "loss": 1.7519, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.025078229772016094, |
| "grad_norm": 1.1211959879636015, |
| "learning_rate": 5.991623159784181e-07, |
| "loss": 1.6915, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.02521233795261511, |
| "grad_norm": 1.183694924138999, |
| "learning_rate": 5.991533375337764e-07, |
| "loss": 1.6992, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.025346446133214125, |
| "grad_norm": 1.1093630411034636, |
| "learning_rate": 5.991443113045618e-07, |
| "loss": 1.7517, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.02548055431381314, |
| "grad_norm": 1.0851009440926755, |
| "learning_rate": 5.991352372923766e-07, |
| "loss": 1.6776, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.02561466249441216, |
| "grad_norm": 1.1395885180659286, |
| "learning_rate": 5.99126115498832e-07, |
| "loss": 1.6924, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.025748770675011176, |
| "grad_norm": 1.177643032023232, |
| "learning_rate": 5.99116945925547e-07, |
| "loss": 1.8049, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.025882878855610192, |
| "grad_norm": 1.2055741329571488, |
| "learning_rate": 5.9910772857415e-07, |
| "loss": 1.7318, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.026016987036209208, |
| "grad_norm": 1.0540261983643227, |
| "learning_rate": 5.990984634462772e-07, |
| "loss": 1.6957, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.026151095216808227, |
| "grad_norm": 1.1229012489144132, |
| "learning_rate": 5.990891505435736e-07, |
| "loss": 1.6655, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.026285203397407243, |
| "grad_norm": 1.244124126818224, |
| "learning_rate": 5.990797898676924e-07, |
| "loss": 1.6651, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.02641931157800626, |
| "grad_norm": 1.153272959704337, |
| "learning_rate": 5.990703814202957e-07, |
| "loss": 1.614, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.026553419758605274, |
| "grad_norm": 1.1097663064103196, |
| "learning_rate": 5.990609252030535e-07, |
| "loss": 1.6663, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.02668752793920429, |
| "grad_norm": 1.1863665422420122, |
| "learning_rate": 5.990514212176451e-07, |
| "loss": 1.6996, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.02682163611980331, |
| "grad_norm": 1.19855062119957, |
| "learning_rate": 5.990418694657574e-07, |
| "loss": 1.6788, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.026955744300402325, |
| "grad_norm": 2.240646939516989, |
| "learning_rate": 5.990322699490864e-07, |
| "loss": 1.6072, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.02708985248100134, |
| "grad_norm": 1.16787040451677, |
| "learning_rate": 5.990226226693363e-07, |
| "loss": 1.7495, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.027223960661600356, |
| "grad_norm": 1.1801746959435724, |
| "learning_rate": 5.990129276282199e-07, |
| "loss": 1.7816, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.027358068842199375, |
| "grad_norm": 1.0865741581201773, |
| "learning_rate": 5.990031848274582e-07, |
| "loss": 1.6386, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.02749217702279839, |
| "grad_norm": 1.1195448058003792, |
| "learning_rate": 5.989933942687813e-07, |
| "loss": 1.7666, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.027626285203397407, |
| "grad_norm": 1.1595509114049103, |
| "learning_rate": 5.989835559539271e-07, |
| "loss": 1.7783, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.027760393383996423, |
| "grad_norm": 1.132633530996875, |
| "learning_rate": 5.989736698846422e-07, |
| "loss": 1.7369, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.027894501564595442, |
| "grad_norm": 1.2238390397270622, |
| "learning_rate": 5.98963736062682e-07, |
| "loss": 1.77, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.028028609745194458, |
| "grad_norm": 1.1148263262442593, |
| "learning_rate": 5.989537544898099e-07, |
| "loss": 1.7091, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.028162717925793473, |
| "grad_norm": 1.8988797886120061, |
| "learning_rate": 5.989437251677981e-07, |
| "loss": 1.7075, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.02829682610639249, |
| "grad_norm": 1.1460869915607401, |
| "learning_rate": 5.989336480984271e-07, |
| "loss": 1.7101, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.028430934286991505, |
| "grad_norm": 1.1467483507445029, |
| "learning_rate": 5.989235232834861e-07, |
| "loss": 1.826, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.028565042467590524, |
| "grad_norm": 1.1300279144587981, |
| "learning_rate": 5.989133507247724e-07, |
| "loss": 1.6014, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.02869915064818954, |
| "grad_norm": 1.1992643920221002, |
| "learning_rate": 5.989031304240922e-07, |
| "loss": 1.7145, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.028833258828788556, |
| "grad_norm": 1.1299143353929064, |
| "learning_rate": 5.988928623832598e-07, |
| "loss": 1.7769, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.02896736700938757, |
| "grad_norm": 1.2042592418402756, |
| "learning_rate": 5.988825466040984e-07, |
| "loss": 1.7626, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.02910147518998659, |
| "grad_norm": 1.0995902853233575, |
| "learning_rate": 5.988721830884392e-07, |
| "loss": 1.6348, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.029235583370585606, |
| "grad_norm": 1.6143410051222686, |
| "learning_rate": 5.988617718381222e-07, |
| "loss": 1.6693, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.029369691551184622, |
| "grad_norm": 1.1356912583442442, |
| "learning_rate": 5.988513128549958e-07, |
| "loss": 1.8413, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.029503799731783638, |
| "grad_norm": 1.0893609511374684, |
| "learning_rate": 5.988408061409167e-07, |
| "loss": 1.7344, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.029637907912382657, |
| "grad_norm": 1.7248790007955832, |
| "learning_rate": 5.988302516977504e-07, |
| "loss": 1.6685, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.029772016092981673, |
| "grad_norm": 1.2197670257203657, |
| "learning_rate": 5.988196495273707e-07, |
| "loss": 1.7656, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.02990612427358069, |
| "grad_norm": 1.0570007929897236, |
| "learning_rate": 5.988089996316597e-07, |
| "loss": 1.6939, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.030040232454179704, |
| "grad_norm": 1.2787842409441683, |
| "learning_rate": 5.987983020125083e-07, |
| "loss": 1.6764, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.03017434063477872, |
| "grad_norm": 1.1358825590170436, |
| "learning_rate": 5.987875566718158e-07, |
| "loss": 1.6609, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.03030844881537774, |
| "grad_norm": 1.118237942922342, |
| "learning_rate": 5.987767636114897e-07, |
| "loss": 1.7554, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.030442556995976755, |
| "grad_norm": 1.091737931283322, |
| "learning_rate": 5.987659228334462e-07, |
| "loss": 1.7449, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.03057666517657577, |
| "grad_norm": 1.1839355406865255, |
| "learning_rate": 5.9875503433961e-07, |
| "loss": 1.5726, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.030710773357174787, |
| "grad_norm": 1.1337421280370006, |
| "learning_rate": 5.987440981319141e-07, |
| "loss": 1.7921, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.030844881537773806, |
| "grad_norm": 1.1412449749582727, |
| "learning_rate": 5.987331142123003e-07, |
| "loss": 1.74, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.03097898971837282, |
| "grad_norm": 1.153189714483035, |
| "learning_rate": 5.987220825827184e-07, |
| "loss": 1.8381, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.031113097898971837, |
| "grad_norm": 1.5918789493838401, |
| "learning_rate": 5.98711003245127e-07, |
| "loss": 1.775, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.031247206079570853, |
| "grad_norm": 1.1156741804185832, |
| "learning_rate": 5.986998762014931e-07, |
| "loss": 1.7849, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.03138131426016987, |
| "grad_norm": 1.3525186481687417, |
| "learning_rate": 5.986887014537923e-07, |
| "loss": 1.6405, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.03151542244076889, |
| "grad_norm": 1.158420443205213, |
| "learning_rate": 5.986774790040083e-07, |
| "loss": 1.7375, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.031649530621367904, |
| "grad_norm": 1.123395074640784, |
| "learning_rate": 5.986662088541335e-07, |
| "loss": 1.7682, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.03178363880196692, |
| "grad_norm": 1.1675872323082288, |
| "learning_rate": 5.98654891006169e-07, |
| "loss": 1.7364, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.031917746982565935, |
| "grad_norm": 1.0814715571489928, |
| "learning_rate": 5.986435254621239e-07, |
| "loss": 1.5985, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.03205185516316495, |
| "grad_norm": 3.0737070295965427, |
| "learning_rate": 5.986321122240162e-07, |
| "loss": 1.7085, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.03218596334376397, |
| "grad_norm": 1.1671133111581686, |
| "learning_rate": 5.986206512938719e-07, |
| "loss": 1.6533, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.03232007152436299, |
| "grad_norm": 1.145018806372248, |
| "learning_rate": 5.98609142673726e-07, |
| "loss": 1.7335, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.032454179704962005, |
| "grad_norm": 1.159474229307987, |
| "learning_rate": 5.985975863656216e-07, |
| "loss": 1.7531, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.03258828788556102, |
| "grad_norm": 1.2078048688870913, |
| "learning_rate": 5.985859823716102e-07, |
| "loss": 1.7911, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.03272239606616004, |
| "grad_norm": 1.123182359654964, |
| "learning_rate": 5.985743306937522e-07, |
| "loss": 1.7939, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.03285650424675905, |
| "grad_norm": 1.2328138827190458, |
| "learning_rate": 5.985626313341161e-07, |
| "loss": 1.7224, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.03299061242735807, |
| "grad_norm": 1.148111739587274, |
| "learning_rate": 5.98550884294779e-07, |
| "loss": 1.7458, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.033124720607957084, |
| "grad_norm": 1.1781302748488391, |
| "learning_rate": 5.985390895778263e-07, |
| "loss": 1.7283, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.0332588287885561, |
| "grad_norm": 1.1649269851093655, |
| "learning_rate": 5.985272471853521e-07, |
| "loss": 1.7535, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.033392936969155115, |
| "grad_norm": 1.1003523240939477, |
| "learning_rate": 5.985153571194589e-07, |
| "loss": 1.7422, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.03352704514975414, |
| "grad_norm": 1.1239095176492149, |
| "learning_rate": 5.985034193822575e-07, |
| "loss": 1.7838, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.033661153330353154, |
| "grad_norm": 1.1810699355311947, |
| "learning_rate": 5.984914339758673e-07, |
| "loss": 1.6863, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.03379526151095217, |
| "grad_norm": 1.1136505916452646, |
| "learning_rate": 5.984794009024162e-07, |
| "loss": 1.7424, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.033929369691551185, |
| "grad_norm": 1.1748644896008424, |
| "learning_rate": 5.984673201640406e-07, |
| "loss": 1.7273, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.0340634778721502, |
| "grad_norm": 1.1728309803897534, |
| "learning_rate": 5.98455191762885e-07, |
| "loss": 1.7322, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.03419758605274922, |
| "grad_norm": 1.1617256887218326, |
| "learning_rate": 5.984430157011031e-07, |
| "loss": 1.6426, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.03433169423334823, |
| "grad_norm": 1.0944959568956085, |
| "learning_rate": 5.984307919808561e-07, |
| "loss": 1.6643, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.03446580241394725, |
| "grad_norm": 1.1692415951338644, |
| "learning_rate": 5.984185206043145e-07, |
| "loss": 1.6584, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.034599910594546264, |
| "grad_norm": 4.382957589748632, |
| "learning_rate": 5.984062015736567e-07, |
| "loss": 1.7101, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.03473401877514529, |
| "grad_norm": 1.1567530728762943, |
| "learning_rate": 5.983938348910698e-07, |
| "loss": 1.643, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.0348681269557443, |
| "grad_norm": 1.215341418188577, |
| "learning_rate": 5.983814205587494e-07, |
| "loss": 1.7239, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.03500223513634332, |
| "grad_norm": 1.0746883114524803, |
| "learning_rate": 5.983689585788997e-07, |
| "loss": 1.6076, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.035136343316942334, |
| "grad_norm": 1.0844612292689275, |
| "learning_rate": 5.983564489537329e-07, |
| "loss": 1.6903, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.03527045149754135, |
| "grad_norm": 1.2255887165848134, |
| "learning_rate": 5.983438916854698e-07, |
| "loss": 1.6497, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.035404559678140365, |
| "grad_norm": 1.1308380556818496, |
| "learning_rate": 5.983312867763402e-07, |
| "loss": 1.7412, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.03553866785873938, |
| "grad_norm": 1.1248240455028355, |
| "learning_rate": 5.983186342285815e-07, |
| "loss": 1.6542, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.0356727760393384, |
| "grad_norm": 1.127913908764272, |
| "learning_rate": 5.983059340444401e-07, |
| "loss": 1.7996, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.03580688421993742, |
| "grad_norm": 1.1345562808363212, |
| "learning_rate": 5.98293186226171e-07, |
| "loss": 1.7426, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.035940992400536435, |
| "grad_norm": 1.1100506727991573, |
| "learning_rate": 5.982803907760373e-07, |
| "loss": 1.6947, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.03607510058113545, |
| "grad_norm": 1.1397892876092324, |
| "learning_rate": 5.982675476963105e-07, |
| "loss": 1.7525, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.03620920876173447, |
| "grad_norm": 1.0980888601137475, |
| "learning_rate": 5.982546569892707e-07, |
| "loss": 1.6763, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.03634331694233348, |
| "grad_norm": 1.1179358157267492, |
| "learning_rate": 5.982417186572067e-07, |
| "loss": 1.8195, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.0364774251229325, |
| "grad_norm": 1.15212876523653, |
| "learning_rate": 5.982287327024153e-07, |
| "loss": 1.7003, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.036611533303531514, |
| "grad_norm": 1.0898032141275467, |
| "learning_rate": 5.982156991272021e-07, |
| "loss": 1.7347, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.03674564148413053, |
| "grad_norm": 1.2234098091068482, |
| "learning_rate": 5.982026179338812e-07, |
| "loss": 1.71, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.036879749664729546, |
| "grad_norm": 1.2077801818134501, |
| "learning_rate": 5.981894891247747e-07, |
| "loss": 1.7966, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.03701385784532857, |
| "grad_norm": 1.1190450985953022, |
| "learning_rate": 5.981763127022135e-07, |
| "loss": 1.6619, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.037147966025927584, |
| "grad_norm": 1.235343710444344, |
| "learning_rate": 5.981630886685369e-07, |
| "loss": 1.7484, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.0372820742065266, |
| "grad_norm": 1.2266668117138695, |
| "learning_rate": 5.98149817026093e-07, |
| "loss": 1.6734, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.037416182387125616, |
| "grad_norm": 1.4154140120426957, |
| "learning_rate": 5.981364977772374e-07, |
| "loss": 1.7073, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.03755029056772463, |
| "grad_norm": 1.2222936436898488, |
| "learning_rate": 5.981231309243353e-07, |
| "loss": 1.7837, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.03768439874832365, |
| "grad_norm": 1.1519207095634527, |
| "learning_rate": 5.981097164697594e-07, |
| "loss": 1.7349, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.03781850692892266, |
| "grad_norm": 1.172450505222872, |
| "learning_rate": 5.980962544158915e-07, |
| "loss": 1.7005, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.03795261510952168, |
| "grad_norm": 1.2857156876454048, |
| "learning_rate": 5.980827447651216e-07, |
| "loss": 1.561, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.038086723290120694, |
| "grad_norm": 1.2389387482561154, |
| "learning_rate": 5.98069187519848e-07, |
| "loss": 1.7068, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.03822083147071972, |
| "grad_norm": 1.163985598391861, |
| "learning_rate": 5.980555826824778e-07, |
| "loss": 1.7442, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.03835493965131873, |
| "grad_norm": 1.1048173896847064, |
| "learning_rate": 5.980419302554261e-07, |
| "loss": 1.685, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.03848904783191775, |
| "grad_norm": 1.472564099104008, |
| "learning_rate": 5.98028230241117e-07, |
| "loss": 1.6997, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.038623156012516764, |
| "grad_norm": 1.287728938848147, |
| "learning_rate": 5.980144826419825e-07, |
| "loss": 1.7084, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.03875726419311578, |
| "grad_norm": 1.124267938500328, |
| "learning_rate": 5.980006874604635e-07, |
| "loss": 1.7134, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.038891372373714796, |
| "grad_norm": 1.1218572497983328, |
| "learning_rate": 5.979868446990091e-07, |
| "loss": 1.6841, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.03902548055431381, |
| "grad_norm": 1.1011749075237598, |
| "learning_rate": 5.979729543600769e-07, |
| "loss": 1.7323, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.03915958873491283, |
| "grad_norm": 1.100745780533083, |
| "learning_rate": 5.979590164461328e-07, |
| "loss": 1.6788, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.03929369691551184, |
| "grad_norm": 1.1613502217053182, |
| "learning_rate": 5.979450309596514e-07, |
| "loss": 1.6776, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.039427805096110866, |
| "grad_norm": 1.089657509345998, |
| "learning_rate": 5.979309979031158e-07, |
| "loss": 1.7068, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.03956191327670988, |
| "grad_norm": 1.1436391576530838, |
| "learning_rate": 5.97916917279017e-07, |
| "loss": 1.7388, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.0396960214573089, |
| "grad_norm": 1.1145075933124646, |
| "learning_rate": 5.979027890898551e-07, |
| "loss": 1.7004, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.03983012963790791, |
| "grad_norm": 1.0907272047712597, |
| "learning_rate": 5.978886133381384e-07, |
| "loss": 1.679, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.03996423781850693, |
| "grad_norm": 1.12558267559901, |
| "learning_rate": 5.978743900263835e-07, |
| "loss": 1.6608, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.040098345999105944, |
| "grad_norm": 1.136659951867088, |
| "learning_rate": 5.978601191571155e-07, |
| "loss": 1.6383, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.04023245417970496, |
| "grad_norm": 1.2441133556300974, |
| "learning_rate": 5.978458007328682e-07, |
| "loss": 1.7697, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.040366562360303976, |
| "grad_norm": 1.216051798039534, |
| "learning_rate": 5.978314347561835e-07, |
| "loss": 1.7656, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.040500670540903, |
| "grad_norm": 1.1193332609304543, |
| "learning_rate": 5.978170212296118e-07, |
| "loss": 1.7034, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.040634778721502014, |
| "grad_norm": 1.1450830933525635, |
| "learning_rate": 5.978025601557124e-07, |
| "loss": 1.6769, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.04076888690210103, |
| "grad_norm": 1.1570981861957024, |
| "learning_rate": 5.977880515370523e-07, |
| "loss": 1.7491, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.040902995082700046, |
| "grad_norm": 1.103432713835437, |
| "learning_rate": 5.977734953762075e-07, |
| "loss": 1.6544, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.04103710326329906, |
| "grad_norm": 1.134144784637958, |
| "learning_rate": 5.97758891675762e-07, |
| "loss": 1.7084, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.04117121144389808, |
| "grad_norm": 1.07738843402297, |
| "learning_rate": 5.977442404383088e-07, |
| "loss": 1.7369, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.04130531962449709, |
| "grad_norm": 1.1164259724731038, |
| "learning_rate": 5.977295416664489e-07, |
| "loss": 1.6785, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.04143942780509611, |
| "grad_norm": 1.2001430339127754, |
| "learning_rate": 5.977147953627918e-07, |
| "loss": 1.6496, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.041573535985695124, |
| "grad_norm": 1.1849867153137015, |
| "learning_rate": 5.977000015299557e-07, |
| "loss": 1.6736, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.04170764416629415, |
| "grad_norm": 1.1582589308770772, |
| "learning_rate": 5.976851601705669e-07, |
| "loss": 1.6775, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.04184175234689316, |
| "grad_norm": 1.1033822470615744, |
| "learning_rate": 5.976702712872603e-07, |
| "loss": 1.6598, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.04197586052749218, |
| "grad_norm": 1.1682634791444901, |
| "learning_rate": 5.976553348826793e-07, |
| "loss": 1.7557, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.042109968708091194, |
| "grad_norm": 1.0838004153530265, |
| "learning_rate": 5.976403509594756e-07, |
| "loss": 1.6741, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.04224407688869021, |
| "grad_norm": 1.121835854661048, |
| "learning_rate": 5.976253195203092e-07, |
| "loss": 1.7262, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.042378185069289226, |
| "grad_norm": 1.1243699312065234, |
| "learning_rate": 5.976102405678491e-07, |
| "loss": 1.7902, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.04251229324988824, |
| "grad_norm": 1.0991499127058322, |
| "learning_rate": 5.975951141047721e-07, |
| "loss": 1.6865, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.04264640143048726, |
| "grad_norm": 1.126580502499325, |
| "learning_rate": 5.975799401337638e-07, |
| "loss": 1.6798, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.04278050961108627, |
| "grad_norm": 1.1221949135632994, |
| "learning_rate": 5.975647186575182e-07, |
| "loss": 1.7491, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.042914617791685296, |
| "grad_norm": 1.14926550813679, |
| "learning_rate": 5.975494496787376e-07, |
| "loss": 1.6549, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.04304872597228431, |
| "grad_norm": 1.12638348214928, |
| "learning_rate": 5.975341332001328e-07, |
| "loss": 1.5897, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.04318283415288333, |
| "grad_norm": 1.1725295960645503, |
| "learning_rate": 5.97518769224423e-07, |
| "loss": 1.695, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.04331694233348234, |
| "grad_norm": 1.0904790236385375, |
| "learning_rate": 5.975033577543359e-07, |
| "loss": 1.6841, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.04345105051408136, |
| "grad_norm": 1.1090846497862015, |
| "learning_rate": 5.974878987926075e-07, |
| "loss": 1.6075, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.043585158694680375, |
| "grad_norm": 1.2329654322486787, |
| "learning_rate": 5.974723923419827e-07, |
| "loss": 1.7124, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.04371926687527939, |
| "grad_norm": 1.1520738825385615, |
| "learning_rate": 5.974568384052139e-07, |
| "loss": 1.7492, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.043853375055878406, |
| "grad_norm": 1.107509031801798, |
| "learning_rate": 5.974412369850631e-07, |
| "loss": 1.7233, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.04398748323647742, |
| "grad_norm": 1.9987713290159552, |
| "learning_rate": 5.974255880842995e-07, |
| "loss": 1.7005, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.044121591417076444, |
| "grad_norm": 1.1227927295658309, |
| "learning_rate": 5.974098917057019e-07, |
| "loss": 1.8204, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.04425569959767546, |
| "grad_norm": 1.1208739563830832, |
| "learning_rate": 5.973941478520565e-07, |
| "loss": 1.7393, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.044389807778274476, |
| "grad_norm": 1.0722310163444908, |
| "learning_rate": 5.973783565261589e-07, |
| "loss": 1.6568, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.04452391595887349, |
| "grad_norm": 1.1809997483096673, |
| "learning_rate": 5.973625177308124e-07, |
| "loss": 1.7233, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.04465802413947251, |
| "grad_norm": 1.0854965350422932, |
| "learning_rate": 5.973466314688289e-07, |
| "loss": 1.5838, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.04479213232007152, |
| "grad_norm": 1.0394749005048125, |
| "learning_rate": 5.973306977430288e-07, |
| "loss": 1.6982, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.04492624050067054, |
| "grad_norm": 1.1372698128741796, |
| "learning_rate": 5.973147165562409e-07, |
| "loss": 1.7363, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.045060348681269555, |
| "grad_norm": 1.0872018588712997, |
| "learning_rate": 5.972986879113027e-07, |
| "loss": 1.7134, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.04519445686186858, |
| "grad_norm": 1.136573181976626, |
| "learning_rate": 5.972826118110597e-07, |
| "loss": 1.6747, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.04532856504246759, |
| "grad_norm": 1.1438807799337474, |
| "learning_rate": 5.972664882583659e-07, |
| "loss": 1.7632, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.04546267322306661, |
| "grad_norm": 1.1746151029086915, |
| "learning_rate": 5.97250317256084e-07, |
| "loss": 1.5568, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.045596781403665625, |
| "grad_norm": 1.067551171735795, |
| "learning_rate": 5.972340988070848e-07, |
| "loss": 1.7722, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.04573088958426464, |
| "grad_norm": 1.100004825990679, |
| "learning_rate": 5.972178329142476e-07, |
| "loss": 1.7111, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.045864997764863656, |
| "grad_norm": 1.3130274389549708, |
| "learning_rate": 5.972015195804604e-07, |
| "loss": 1.7768, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.04599910594546267, |
| "grad_norm": 1.1532781776242376, |
| "learning_rate": 5.971851588086195e-07, |
| "loss": 1.7096, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.04613321412606169, |
| "grad_norm": 1.1087417118719138, |
| "learning_rate": 5.971687506016292e-07, |
| "loss": 1.6085, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.0462673223066607, |
| "grad_norm": 1.105566689388399, |
| "learning_rate": 5.971522949624028e-07, |
| "loss": 1.6791, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.046401430487259726, |
| "grad_norm": 1.090277130406352, |
| "learning_rate": 5.971357918938616e-07, |
| "loss": 1.6585, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.04653553866785874, |
| "grad_norm": 1.1679080769492398, |
| "learning_rate": 5.971192413989357e-07, |
| "loss": 1.6861, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.04666964684845776, |
| "grad_norm": 1.1647454348028623, |
| "learning_rate": 5.971026434805633e-07, |
| "loss": 1.7167, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.04680375502905677, |
| "grad_norm": 1.1324717330275416, |
| "learning_rate": 5.970859981416911e-07, |
| "loss": 1.6656, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.04693786320965579, |
| "grad_norm": 1.0895090583275637, |
| "learning_rate": 5.970693053852743e-07, |
| "loss": 1.7932, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.047071971390254805, |
| "grad_norm": 1.0846672521830747, |
| "learning_rate": 5.970525652142767e-07, |
| "loss": 1.568, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.04720607957085382, |
| "grad_norm": 1.0946401497383844, |
| "learning_rate": 5.970357776316699e-07, |
| "loss": 1.6717, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.047340187751452836, |
| "grad_norm": 1.203590152178876, |
| "learning_rate": 5.970189426404346e-07, |
| "loss": 1.6852, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.04747429593205185, |
| "grad_norm": 1.1550529538782315, |
| "learning_rate": 5.970020602435594e-07, |
| "loss": 1.7621, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.047608404112650875, |
| "grad_norm": 1.096867626156823, |
| "learning_rate": 5.969851304440418e-07, |
| "loss": 1.7309, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.04774251229324989, |
| "grad_norm": 1.166383772927886, |
| "learning_rate": 5.969681532448872e-07, |
| "loss": 1.7181, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.047876620473848906, |
| "grad_norm": 1.1239983839028163, |
| "learning_rate": 5.9695112864911e-07, |
| "loss": 1.6855, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.04801072865444792, |
| "grad_norm": 1.146063042749729, |
| "learning_rate": 5.969340566597323e-07, |
| "loss": 1.7481, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.04814483683504694, |
| "grad_norm": 1.1888010033263623, |
| "learning_rate": 5.969169372797852e-07, |
| "loss": 1.7679, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.048278945015645953, |
| "grad_norm": 1.1182477969412692, |
| "learning_rate": 5.96899770512308e-07, |
| "loss": 1.703, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.04841305319624497, |
| "grad_norm": 1.1404473863138842, |
| "learning_rate": 5.968825563603486e-07, |
| "loss": 1.7899, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.048547161376843985, |
| "grad_norm": 1.1404415220346715, |
| "learning_rate": 5.968652948269629e-07, |
| "loss": 1.6586, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.048681269557443, |
| "grad_norm": 1.0188482574967557, |
| "learning_rate": 5.968479859152155e-07, |
| "loss": 1.6772, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.04881537773804202, |
| "grad_norm": 1.1444032147790508, |
| "learning_rate": 5.968306296281794e-07, |
| "loss": 1.7235, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.04894948591864104, |
| "grad_norm": 1.147526204803139, |
| "learning_rate": 5.968132259689361e-07, |
| "loss": 1.6656, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.049083594099240055, |
| "grad_norm": 1.094173771252459, |
| "learning_rate": 5.967957749405751e-07, |
| "loss": 1.6133, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.04921770227983907, |
| "grad_norm": 1.1560369729609308, |
| "learning_rate": 5.967782765461948e-07, |
| "loss": 1.7796, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.049351810460438086, |
| "grad_norm": 1.1696121017752343, |
| "learning_rate": 5.967607307889018e-07, |
| "loss": 1.65, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.0494859186410371, |
| "grad_norm": 1.134918792559745, |
| "learning_rate": 5.967431376718111e-07, |
| "loss": 1.717, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.04962002682163612, |
| "grad_norm": 1.0765623022573645, |
| "learning_rate": 5.967254971980461e-07, |
| "loss": 1.7028, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.049754135002235134, |
| "grad_norm": 1.1093533051376567, |
| "learning_rate": 5.967078093707387e-07, |
| "loss": 1.687, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.049888243182834156, |
| "grad_norm": 1.0724867576763264, |
| "learning_rate": 5.966900741930289e-07, |
| "loss": 1.709, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.05002235136343317, |
| "grad_norm": 1.1870703976775374, |
| "learning_rate": 5.966722916680656e-07, |
| "loss": 1.7623, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.05015645954403219, |
| "grad_norm": 1.1118336624167122, |
| "learning_rate": 5.966544617990058e-07, |
| "loss": 1.713, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.050290567724631204, |
| "grad_norm": 1.1147242423912, |
| "learning_rate": 5.966365845890149e-07, |
| "loss": 1.5956, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.05042467590523022, |
| "grad_norm": 1.1489546583821737, |
| "learning_rate": 5.966186600412668e-07, |
| "loss": 1.7536, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.050558784085829235, |
| "grad_norm": 1.0985836995809481, |
| "learning_rate": 5.966006881589437e-07, |
| "loss": 1.6415, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.05069289226642825, |
| "grad_norm": 1.5210499221056473, |
| "learning_rate": 5.965826689452363e-07, |
| "loss": 1.7034, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.05082700044702727, |
| "grad_norm": 1.1770747351660449, |
| "learning_rate": 5.965646024033437e-07, |
| "loss": 1.7998, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.05096110862762628, |
| "grad_norm": 1.103353857870669, |
| "learning_rate": 5.965464885364734e-07, |
| "loss": 1.677, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.051095216808225305, |
| "grad_norm": 1.1279052370658624, |
| "learning_rate": 5.965283273478411e-07, |
| "loss": 1.7125, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.05122932498882432, |
| "grad_norm": 1.1260317026536582, |
| "learning_rate": 5.965101188406713e-07, |
| "loss": 1.713, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.051363433169423336, |
| "grad_norm": 1.1217115939734228, |
| "learning_rate": 5.964918630181966e-07, |
| "loss": 1.7513, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.05149754135002235, |
| "grad_norm": 1.0938140494838644, |
| "learning_rate": 5.964735598836581e-07, |
| "loss": 1.6722, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.05163164953062137, |
| "grad_norm": 1.5746119243016816, |
| "learning_rate": 5.964552094403051e-07, |
| "loss": 1.7249, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.051765757711220384, |
| "grad_norm": 1.1376993855927013, |
| "learning_rate": 5.964368116913957e-07, |
| "loss": 1.7292, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.0518998658918194, |
| "grad_norm": 1.1288484886032422, |
| "learning_rate": 5.96418366640196e-07, |
| "loss": 1.7373, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.052033974072418415, |
| "grad_norm": 1.0912837401536597, |
| "learning_rate": 5.963998742899809e-07, |
| "loss": 1.6279, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.05216808225301743, |
| "grad_norm": 1.080399914264917, |
| "learning_rate": 5.963813346440332e-07, |
| "loss": 1.6828, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.052302190433616454, |
| "grad_norm": 1.18296526148637, |
| "learning_rate": 5.963627477056445e-07, |
| "loss": 1.7037, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.05243629861421547, |
| "grad_norm": 1.0700933148095726, |
| "learning_rate": 5.963441134781147e-07, |
| "loss": 1.6773, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.052570406794814485, |
| "grad_norm": 1.5541605676471624, |
| "learning_rate": 5.963254319647519e-07, |
| "loss": 1.5786, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.0527045149754135, |
| "grad_norm": 1.154992915725033, |
| "learning_rate": 5.96306703168873e-07, |
| "loss": 1.7743, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.05283862315601252, |
| "grad_norm": 1.117612338423665, |
| "learning_rate": 5.962879270938028e-07, |
| "loss": 1.723, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.05297273133661153, |
| "grad_norm": 1.0907791376426386, |
| "learning_rate": 5.96269103742875e-07, |
| "loss": 1.73, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.05310683951721055, |
| "grad_norm": 1.1325939188472074, |
| "learning_rate": 5.962502331194311e-07, |
| "loss": 1.6756, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.053240947697809564, |
| "grad_norm": 1.0925915487497773, |
| "learning_rate": 5.962313152268218e-07, |
| "loss": 1.7166, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.05337505587840858, |
| "grad_norm": 1.1102789558363542, |
| "learning_rate": 5.96212350068405e-07, |
| "loss": 1.6697, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.0535091640590076, |
| "grad_norm": 1.1054817006563584, |
| "learning_rate": 5.961933376475485e-07, |
| "loss": 1.7231, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.05364327223960662, |
| "grad_norm": 1.307573555314525, |
| "learning_rate": 5.961742779676272e-07, |
| "loss": 1.7651, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.053777380420205634, |
| "grad_norm": 1.1445042759796842, |
| "learning_rate": 5.961551710320251e-07, |
| "loss": 1.6765, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.05391148860080465, |
| "grad_norm": 1.0762583158173675, |
| "learning_rate": 5.961360168441342e-07, |
| "loss": 1.6481, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.054045596781403665, |
| "grad_norm": 1.1084304546525765, |
| "learning_rate": 5.961168154073553e-07, |
| "loss": 1.7338, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.05417970496200268, |
| "grad_norm": 1.0982232521403124, |
| "learning_rate": 5.960975667250972e-07, |
| "loss": 1.6638, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.0543138131426017, |
| "grad_norm": 1.2140530141548174, |
| "learning_rate": 5.960782708007773e-07, |
| "loss": 1.7516, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.05444792132320071, |
| "grad_norm": 1.5212193377424008, |
| "learning_rate": 5.960589276378213e-07, |
| "loss": 1.7427, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.054582029503799735, |
| "grad_norm": 1.11412919662803, |
| "learning_rate": 5.960395372396633e-07, |
| "loss": 1.6931, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.05471613768439875, |
| "grad_norm": 1.0851895981130018, |
| "learning_rate": 5.960200996097458e-07, |
| "loss": 1.6913, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.05485024586499777, |
| "grad_norm": 1.1246816244588258, |
| "learning_rate": 5.960006147515199e-07, |
| "loss": 1.7152, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.05498435404559678, |
| "grad_norm": 1.0772018259030958, |
| "learning_rate": 5.959810826684446e-07, |
| "loss": 1.7227, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.0551184622261958, |
| "grad_norm": 1.1172898063954977, |
| "learning_rate": 5.959615033639877e-07, |
| "loss": 1.6459, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.055252570406794814, |
| "grad_norm": 1.190430020238442, |
| "learning_rate": 5.959418768416252e-07, |
| "loss": 1.7491, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.05538667858739383, |
| "grad_norm": 1.0954974858449955, |
| "learning_rate": 5.959222031048417e-07, |
| "loss": 1.7136, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.055520786767992845, |
| "grad_norm": 1.1287823535303052, |
| "learning_rate": 5.959024821571296e-07, |
| "loss": 1.7765, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.05565489494859186, |
| "grad_norm": 1.0561812337694518, |
| "learning_rate": 5.958827140019905e-07, |
| "loss": 1.6913, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.055789003129190884, |
| "grad_norm": 1.1085682708952787, |
| "learning_rate": 5.958628986429338e-07, |
| "loss": 1.7022, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.0559231113097899, |
| "grad_norm": 1.145351387138441, |
| "learning_rate": 5.958430360834773e-07, |
| "loss": 1.7236, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.056057219490388915, |
| "grad_norm": 1.0897443627255616, |
| "learning_rate": 5.958231263271476e-07, |
| "loss": 1.6012, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.05619132767098793, |
| "grad_norm": 1.1200731868604838, |
| "learning_rate": 5.958031693774794e-07, |
| "loss": 1.7389, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.05632543585158695, |
| "grad_norm": 1.1038585013517133, |
| "learning_rate": 5.957831652380156e-07, |
| "loss": 1.583, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.05645954403218596, |
| "grad_norm": 1.4548045332193216, |
| "learning_rate": 5.95763113912308e-07, |
| "loss": 1.7524, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.05659365221278498, |
| "grad_norm": 1.1692222790883888, |
| "learning_rate": 5.95743015403916e-07, |
| "loss": 1.6299, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.056727760393383994, |
| "grad_norm": 1.1247764368969244, |
| "learning_rate": 5.95722869716408e-07, |
| "loss": 1.5839, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.05686186857398301, |
| "grad_norm": 1.1555568325620067, |
| "learning_rate": 5.957026768533605e-07, |
| "loss": 1.7239, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.05699597675458203, |
| "grad_norm": 1.1216899351148046, |
| "learning_rate": 5.956824368183589e-07, |
| "loss": 1.7256, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.05713008493518105, |
| "grad_norm": 1.145568323616433, |
| "learning_rate": 5.956621496149961e-07, |
| "loss": 1.6824, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.057264193115780064, |
| "grad_norm": 1.0986327998626733, |
| "learning_rate": 5.956418152468739e-07, |
| "loss": 1.6288, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.05739830129637908, |
| "grad_norm": 1.107394613480044, |
| "learning_rate": 5.956214337176026e-07, |
| "loss": 1.7525, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.057532409476978096, |
| "grad_norm": 1.1530636510188206, |
| "learning_rate": 5.956010050308003e-07, |
| "loss": 1.6703, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.05766651765757711, |
| "grad_norm": 1.2684443748494443, |
| "learning_rate": 5.955805291900944e-07, |
| "loss": 1.7255, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.05780062583817613, |
| "grad_norm": 1.1216850925610182, |
| "learning_rate": 5.955600061991196e-07, |
| "loss": 1.6833, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.05793473401877514, |
| "grad_norm": 1.1163294449512198, |
| "learning_rate": 5.955394360615196e-07, |
| "loss": 1.6738, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.05806884219937416, |
| "grad_norm": 1.0993928108999345, |
| "learning_rate": 5.955188187809465e-07, |
| "loss": 1.575, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.05820295037997318, |
| "grad_norm": 1.199099074821361, |
| "learning_rate": 5.954981543610606e-07, |
| "loss": 1.7117, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.0583370585605722, |
| "grad_norm": 1.1208106037393502, |
| "learning_rate": 5.954774428055305e-07, |
| "loss": 1.7093, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.05847116674117121, |
| "grad_norm": 1.2627670829161222, |
| "learning_rate": 5.954566841180332e-07, |
| "loss": 1.6188, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.05860527492177023, |
| "grad_norm": 1.0799814850943354, |
| "learning_rate": 5.954358783022543e-07, |
| "loss": 1.7059, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.058739383102369244, |
| "grad_norm": 1.1341395954441937, |
| "learning_rate": 5.954150253618875e-07, |
| "loss": 1.5712, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.05887349128296826, |
| "grad_norm": 1.1117856654912641, |
| "learning_rate": 5.95394125300635e-07, |
| "loss": 1.6777, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.059007599463567276, |
| "grad_norm": 1.0923581672387388, |
| "learning_rate": 5.953731781222071e-07, |
| "loss": 1.7159, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.05914170764416629, |
| "grad_norm": 1.0600443650637132, |
| "learning_rate": 5.953521838303231e-07, |
| "loss": 1.7249, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.059275815824765314, |
| "grad_norm": 1.2138612225345329, |
| "learning_rate": 5.9533114242871e-07, |
| "loss": 1.7013, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.05940992400536433, |
| "grad_norm": 1.0419430689297875, |
| "learning_rate": 5.953100539211034e-07, |
| "loss": 1.7552, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.059544032185963346, |
| "grad_norm": 1.1237438417872123, |
| "learning_rate": 5.952889183112474e-07, |
| "loss": 1.7112, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.05967814036656236, |
| "grad_norm": 1.2319625967973615, |
| "learning_rate": 5.952677356028943e-07, |
| "loss": 1.7093, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.05981224854716138, |
| "grad_norm": 1.086955577183242, |
| "learning_rate": 5.952465057998049e-07, |
| "loss": 1.6358, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.05994635672776039, |
| "grad_norm": 1.1264500428377913, |
| "learning_rate": 5.952252289057481e-07, |
| "loss": 1.7178, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.06008046490835941, |
| "grad_norm": 1.128811841099524, |
| "learning_rate": 5.952039049245012e-07, |
| "loss": 1.7591, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.060214573088958424, |
| "grad_norm": 1.1110504835526924, |
| "learning_rate": 5.951825338598503e-07, |
| "loss": 1.6403, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.06034868126955744, |
| "grad_norm": 1.2271379194246814, |
| "learning_rate": 5.951611157155895e-07, |
| "loss": 1.7213, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.06048278945015646, |
| "grad_norm": 1.1228932913870193, |
| "learning_rate": 5.951396504955212e-07, |
| "loss": 1.5935, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.06061689763075548, |
| "grad_norm": 1.11062455626935, |
| "learning_rate": 5.951181382034563e-07, |
| "loss": 1.6998, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.060751005811354494, |
| "grad_norm": 1.0990862927657152, |
| "learning_rate": 5.950965788432139e-07, |
| "loss": 1.6468, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.06088511399195351, |
| "grad_norm": 1.2688756973522501, |
| "learning_rate": 5.950749724186219e-07, |
| "loss": 1.741, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.061019222172552526, |
| "grad_norm": 1.2895801173515846, |
| "learning_rate": 5.950533189335158e-07, |
| "loss": 1.6955, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.06115333035315154, |
| "grad_norm": 1.077512840039689, |
| "learning_rate": 5.950316183917403e-07, |
| "loss": 1.641, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.06128743853375056, |
| "grad_norm": 1.0847961133378894, |
| "learning_rate": 5.950098707971477e-07, |
| "loss": 1.83, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.06142154671434957, |
| "grad_norm": 1.1936301482363822, |
| "learning_rate": 5.949880761535992e-07, |
| "loss": 1.8029, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.06155565489494859, |
| "grad_norm": 1.1712115230746196, |
| "learning_rate": 5.949662344649641e-07, |
| "loss": 1.7041, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.06168976307554761, |
| "grad_norm": 1.1207575353150439, |
| "learning_rate": 5.9494434573512e-07, |
| "loss": 1.8268, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.06182387125614663, |
| "grad_norm": 1.0875570889732413, |
| "learning_rate": 5.949224099679532e-07, |
| "loss": 1.7194, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.06195797943674564, |
| "grad_norm": 1.0917010226696162, |
| "learning_rate": 5.949004271673578e-07, |
| "loss": 1.7354, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.06209208761734466, |
| "grad_norm": 1.0997856156670267, |
| "learning_rate": 5.948783973372368e-07, |
| "loss": 1.7529, |
| "step": 463 |
| }, |
| { |
| "epoch": 0.062226195797943674, |
| "grad_norm": 1.0621713053596278, |
| "learning_rate": 5.948563204815011e-07, |
| "loss": 1.6898, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.06236030397854269, |
| "grad_norm": 1.0614544715813865, |
| "learning_rate": 5.948341966040703e-07, |
| "loss": 1.7044, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.062494412159141706, |
| "grad_norm": 1.154295913834985, |
| "learning_rate": 5.948120257088721e-07, |
| "loss": 1.739, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.06262852033974073, |
| "grad_norm": 1.6321838989867514, |
| "learning_rate": 5.947898077998429e-07, |
| "loss": 1.6571, |
| "step": 467 |
| }, |
| { |
| "epoch": 0.06276262852033974, |
| "grad_norm": 1.1020818061209965, |
| "learning_rate": 5.947675428809268e-07, |
| "loss": 1.7457, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.06289673670093876, |
| "grad_norm": 1.1541190378330166, |
| "learning_rate": 5.947452309560767e-07, |
| "loss": 1.7659, |
| "step": 469 |
| }, |
| { |
| "epoch": 0.06303084488153778, |
| "grad_norm": 1.084642443791217, |
| "learning_rate": 5.947228720292541e-07, |
| "loss": 1.7144, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.06316495306213679, |
| "grad_norm": 1.1145594614023564, |
| "learning_rate": 5.947004661044283e-07, |
| "loss": 1.6729, |
| "step": 471 |
| }, |
| { |
| "epoch": 0.06329906124273581, |
| "grad_norm": 1.115158449397951, |
| "learning_rate": 5.946780131855772e-07, |
| "loss": 1.7349, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.06343316942333482, |
| "grad_norm": 1.1366035122661107, |
| "learning_rate": 5.94655513276687e-07, |
| "loss": 1.7005, |
| "step": 473 |
| }, |
| { |
| "epoch": 0.06356727760393384, |
| "grad_norm": 1.1207240569861627, |
| "learning_rate": 5.946329663817522e-07, |
| "loss": 1.6988, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.06370138578453285, |
| "grad_norm": 1.0633079931171385, |
| "learning_rate": 5.946103725047759e-07, |
| "loss": 1.6861, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.06383549396513187, |
| "grad_norm": 1.148420369678469, |
| "learning_rate": 5.945877316497692e-07, |
| "loss": 1.7186, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.06396960214573089, |
| "grad_norm": 1.1296345116481292, |
| "learning_rate": 5.945650438207517e-07, |
| "loss": 1.7515, |
| "step": 477 |
| }, |
| { |
| "epoch": 0.0641037103263299, |
| "grad_norm": 1.1072132368875205, |
| "learning_rate": 5.945423090217512e-07, |
| "loss": 1.7498, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.06423781850692892, |
| "grad_norm": 1.0636459120097348, |
| "learning_rate": 5.945195272568042e-07, |
| "loss": 1.6705, |
| "step": 479 |
| }, |
| { |
| "epoch": 0.06437192668752793, |
| "grad_norm": 1.1184722760153458, |
| "learning_rate": 5.944966985299551e-07, |
| "loss": 1.74, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.06450603486812695, |
| "grad_norm": 1.09226255206473, |
| "learning_rate": 5.944738228452569e-07, |
| "loss": 1.7125, |
| "step": 481 |
| }, |
| { |
| "epoch": 0.06464014304872598, |
| "grad_norm": 1.0980507704132523, |
| "learning_rate": 5.94450900206771e-07, |
| "loss": 1.7187, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.064774251229325, |
| "grad_norm": 1.0944716620001702, |
| "learning_rate": 5.944279306185668e-07, |
| "loss": 1.5932, |
| "step": 483 |
| }, |
| { |
| "epoch": 0.06490835940992401, |
| "grad_norm": 1.1136224916178525, |
| "learning_rate": 5.944049140847224e-07, |
| "loss": 1.6976, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.06504246759052303, |
| "grad_norm": 1.1013486929558047, |
| "learning_rate": 5.943818506093239e-07, |
| "loss": 1.6864, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.06517657577112204, |
| "grad_norm": 1.1430455689049595, |
| "learning_rate": 5.943587401964661e-07, |
| "loss": 1.6274, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.06531068395172106, |
| "grad_norm": 1.1269355413734778, |
| "learning_rate": 5.943355828502519e-07, |
| "loss": 1.7389, |
| "step": 487 |
| }, |
| { |
| "epoch": 0.06544479213232007, |
| "grad_norm": 1.1442671190598854, |
| "learning_rate": 5.943123785747925e-07, |
| "loss": 1.6724, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.06557890031291909, |
| "grad_norm": 1.1006441895975216, |
| "learning_rate": 5.942891273742075e-07, |
| "loss": 1.687, |
| "step": 489 |
| }, |
| { |
| "epoch": 0.0657130084935181, |
| "grad_norm": 1.1130024103107554, |
| "learning_rate": 5.94265829252625e-07, |
| "loss": 1.6774, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.06584711667411712, |
| "grad_norm": 1.10665029408129, |
| "learning_rate": 5.942424842141811e-07, |
| "loss": 1.7053, |
| "step": 491 |
| }, |
| { |
| "epoch": 0.06598122485471614, |
| "grad_norm": 1.0895398255696098, |
| "learning_rate": 5.942190922630204e-07, |
| "loss": 1.6816, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.06611533303531515, |
| "grad_norm": 1.0952133118391503, |
| "learning_rate": 5.941956534032961e-07, |
| "loss": 1.58, |
| "step": 493 |
| }, |
| { |
| "epoch": 0.06624944121591417, |
| "grad_norm": 1.104962374424092, |
| "learning_rate": 5.941721676391691e-07, |
| "loss": 1.758, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.06638354939651318, |
| "grad_norm": 1.1134158734370636, |
| "learning_rate": 5.941486349748091e-07, |
| "loss": 1.7508, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.0665176575771122, |
| "grad_norm": 1.175784721072215, |
| "learning_rate": 5.94125055414394e-07, |
| "loss": 1.7113, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.06665176575771121, |
| "grad_norm": 1.0778973456587042, |
| "learning_rate": 5.941014289621102e-07, |
| "loss": 1.7558, |
| "step": 497 |
| }, |
| { |
| "epoch": 0.06678587393831023, |
| "grad_norm": 1.11982522730228, |
| "learning_rate": 5.940777556221521e-07, |
| "loss": 1.6791, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.06691998211890926, |
| "grad_norm": 1.1807400353238904, |
| "learning_rate": 5.940540353987225e-07, |
| "loss": 1.7484, |
| "step": 499 |
| }, |
| { |
| "epoch": 0.06705409029950828, |
| "grad_norm": 1.1987690536433178, |
| "learning_rate": 5.940302682960328e-07, |
| "loss": 1.59, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.06718819848010729, |
| "grad_norm": 1.1093357389120035, |
| "learning_rate": 5.940064543183026e-07, |
| "loss": 1.8238, |
| "step": 501 |
| }, |
| { |
| "epoch": 0.06732230666070631, |
| "grad_norm": 1.2404864761664665, |
| "learning_rate": 5.939825934697594e-07, |
| "loss": 1.6965, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.06745641484130532, |
| "grad_norm": 1.1369155507476978, |
| "learning_rate": 5.939586857546397e-07, |
| "loss": 1.7284, |
| "step": 503 |
| }, |
| { |
| "epoch": 0.06759052302190434, |
| "grad_norm": 1.0747025812432756, |
| "learning_rate": 5.939347311771877e-07, |
| "loss": 1.6029, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.06772463120250335, |
| "grad_norm": 1.2065817260719833, |
| "learning_rate": 5.939107297416566e-07, |
| "loss": 1.7937, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.06785873938310237, |
| "grad_norm": 1.072195510416472, |
| "learning_rate": 5.938866814523073e-07, |
| "loss": 1.6844, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.06799284756370139, |
| "grad_norm": 1.0788223308291087, |
| "learning_rate": 5.938625863134092e-07, |
| "loss": 1.7651, |
| "step": 507 |
| }, |
| { |
| "epoch": 0.0681269557443004, |
| "grad_norm": 1.1125709389242076, |
| "learning_rate": 5.938384443292403e-07, |
| "loss": 1.6723, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.06826106392489942, |
| "grad_norm": 1.2370173408194798, |
| "learning_rate": 5.938142555040863e-07, |
| "loss": 1.6491, |
| "step": 509 |
| }, |
| { |
| "epoch": 0.06839517210549843, |
| "grad_norm": 1.0646655039063193, |
| "learning_rate": 5.93790019842242e-07, |
| "loss": 1.7609, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.06852928028609745, |
| "grad_norm": 1.137655615576816, |
| "learning_rate": 5.9376573734801e-07, |
| "loss": 1.6971, |
| "step": 511 |
| }, |
| { |
| "epoch": 0.06866338846669647, |
| "grad_norm": 1.1610648719854884, |
| "learning_rate": 5.937414080257011e-07, |
| "loss": 1.7563, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.06879749664729548, |
| "grad_norm": 1.022128030652968, |
| "learning_rate": 5.93717031879635e-07, |
| "loss": 1.6585, |
| "step": 513 |
| }, |
| { |
| "epoch": 0.0689316048278945, |
| "grad_norm": 1.1094802666159138, |
| "learning_rate": 5.936926089141391e-07, |
| "loss": 1.6963, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.06906571300849351, |
| "grad_norm": 1.0491463968940271, |
| "learning_rate": 5.936681391335494e-07, |
| "loss": 1.653, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.06919982118909253, |
| "grad_norm": 1.1153617117594175, |
| "learning_rate": 5.936436225422104e-07, |
| "loss": 1.6738, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.06933392936969156, |
| "grad_norm": 1.1150239468835819, |
| "learning_rate": 5.936190591444744e-07, |
| "loss": 1.726, |
| "step": 517 |
| }, |
| { |
| "epoch": 0.06946803755029057, |
| "grad_norm": 1.1299338290201733, |
| "learning_rate": 5.935944489447026e-07, |
| "loss": 1.6814, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.06960214573088959, |
| "grad_norm": 1.0925086075502406, |
| "learning_rate": 5.935697919472639e-07, |
| "loss": 1.6141, |
| "step": 519 |
| }, |
| { |
| "epoch": 0.0697362539114886, |
| "grad_norm": 1.1136653572074133, |
| "learning_rate": 5.93545088156536e-07, |
| "loss": 1.6752, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.06987036209208762, |
| "grad_norm": 1.086968726752448, |
| "learning_rate": 5.935203375769048e-07, |
| "loss": 1.6593, |
| "step": 521 |
| }, |
| { |
| "epoch": 0.07000447027268664, |
| "grad_norm": 1.0785790431427873, |
| "learning_rate": 5.934955402127642e-07, |
| "loss": 1.7806, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.07013857845328565, |
| "grad_norm": 1.061202101435773, |
| "learning_rate": 5.934706960685168e-07, |
| "loss": 1.6015, |
| "step": 523 |
| }, |
| { |
| "epoch": 0.07027268663388467, |
| "grad_norm": 1.1217377555129306, |
| "learning_rate": 5.934458051485734e-07, |
| "loss": 1.6836, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.07040679481448368, |
| "grad_norm": 1.1634463467399316, |
| "learning_rate": 5.934208674573529e-07, |
| "loss": 1.641, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.0705409029950827, |
| "grad_norm": 1.1853874452885456, |
| "learning_rate": 5.933958829992828e-07, |
| "loss": 1.6501, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.07067501117568172, |
| "grad_norm": 1.0827543649368265, |
| "learning_rate": 5.933708517787985e-07, |
| "loss": 1.6664, |
| "step": 527 |
| }, |
| { |
| "epoch": 0.07080911935628073, |
| "grad_norm": 1.1171619381364966, |
| "learning_rate": 5.933457738003443e-07, |
| "loss": 1.6758, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.07094322753687975, |
| "grad_norm": 1.2171560054678998, |
| "learning_rate": 5.933206490683722e-07, |
| "loss": 1.6914, |
| "step": 529 |
| }, |
| { |
| "epoch": 0.07107733571747876, |
| "grad_norm": 1.130266539632813, |
| "learning_rate": 5.932954775873429e-07, |
| "loss": 1.6301, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.07121144389807778, |
| "grad_norm": 1.1814157624655244, |
| "learning_rate": 5.932702593617252e-07, |
| "loss": 1.689, |
| "step": 531 |
| }, |
| { |
| "epoch": 0.0713455520786768, |
| "grad_norm": 1.1423293526842793, |
| "learning_rate": 5.932449943959963e-07, |
| "loss": 1.7379, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.07147966025927581, |
| "grad_norm": 1.0830256450215578, |
| "learning_rate": 5.932196826946416e-07, |
| "loss": 1.6752, |
| "step": 533 |
| }, |
| { |
| "epoch": 0.07161376843987484, |
| "grad_norm": 1.2254212102036337, |
| "learning_rate": 5.931943242621548e-07, |
| "loss": 1.7602, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.07174787662047385, |
| "grad_norm": 1.1254407305546181, |
| "learning_rate": 5.931689191030381e-07, |
| "loss": 1.7144, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.07188198480107287, |
| "grad_norm": 1.7531628186363164, |
| "learning_rate": 5.931434672218018e-07, |
| "loss": 1.7868, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.07201609298167189, |
| "grad_norm": 1.1530768773395477, |
| "learning_rate": 5.931179686229645e-07, |
| "loss": 1.7128, |
| "step": 537 |
| }, |
| { |
| "epoch": 0.0721502011622709, |
| "grad_norm": 1.0869645546426585, |
| "learning_rate": 5.930924233110532e-07, |
| "loss": 1.626, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.07228430934286992, |
| "grad_norm": 1.2196040558075754, |
| "learning_rate": 5.930668312906031e-07, |
| "loss": 1.7148, |
| "step": 539 |
| }, |
| { |
| "epoch": 0.07241841752346893, |
| "grad_norm": 1.1904076173283444, |
| "learning_rate": 5.930411925661577e-07, |
| "loss": 1.6981, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.07255252570406795, |
| "grad_norm": 1.5987820485565098, |
| "learning_rate": 5.930155071422687e-07, |
| "loss": 1.7351, |
| "step": 541 |
| }, |
| { |
| "epoch": 0.07268663388466697, |
| "grad_norm": 1.101070130998752, |
| "learning_rate": 5.929897750234963e-07, |
| "loss": 1.6313, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.07282074206526598, |
| "grad_norm": 1.0908625387826942, |
| "learning_rate": 5.929639962144091e-07, |
| "loss": 1.5891, |
| "step": 543 |
| }, |
| { |
| "epoch": 0.072954850245865, |
| "grad_norm": 1.0986511244523132, |
| "learning_rate": 5.929381707195834e-07, |
| "loss": 1.6991, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.07308895842646401, |
| "grad_norm": 1.055356610594688, |
| "learning_rate": 5.929122985436045e-07, |
| "loss": 1.7331, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.07322306660706303, |
| "grad_norm": 1.035590332821026, |
| "learning_rate": 5.928863796910655e-07, |
| "loss": 1.5682, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.07335717478766204, |
| "grad_norm": 1.0783361793793855, |
| "learning_rate": 5.928604141665679e-07, |
| "loss": 1.6092, |
| "step": 547 |
| }, |
| { |
| "epoch": 0.07349128296826106, |
| "grad_norm": 1.090736305001705, |
| "learning_rate": 5.928344019747217e-07, |
| "loss": 1.7072, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.07362539114886008, |
| "grad_norm": 1.4276709820636466, |
| "learning_rate": 5.928083431201449e-07, |
| "loss": 1.6789, |
| "step": 549 |
| }, |
| { |
| "epoch": 0.07375949932945909, |
| "grad_norm": 1.0906054014326296, |
| "learning_rate": 5.927822376074639e-07, |
| "loss": 1.7215, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.0738936075100581, |
| "grad_norm": 1.364150462787829, |
| "learning_rate": 5.927560854413134e-07, |
| "loss": 1.6841, |
| "step": 551 |
| }, |
| { |
| "epoch": 0.07402771569065714, |
| "grad_norm": 1.1159870574206099, |
| "learning_rate": 5.927298866263363e-07, |
| "loss": 1.7298, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.07416182387125615, |
| "grad_norm": 1.1812983592653572, |
| "learning_rate": 5.92703641167184e-07, |
| "loss": 1.7091, |
| "step": 553 |
| }, |
| { |
| "epoch": 0.07429593205185517, |
| "grad_norm": 1.0688687878186984, |
| "learning_rate": 5.926773490685159e-07, |
| "loss": 1.8398, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.07443004023245418, |
| "grad_norm": 1.2894858274000411, |
| "learning_rate": 5.92651010335e-07, |
| "loss": 1.6902, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.0745641484130532, |
| "grad_norm": 1.1464943136824657, |
| "learning_rate": 5.926246249713121e-07, |
| "loss": 1.7249, |
| "step": 556 |
| }, |
| { |
| "epoch": 0.07469825659365222, |
| "grad_norm": 1.3070568856631266, |
| "learning_rate": 5.925981929821368e-07, |
| "loss": 1.6741, |
| "step": 557 |
| }, |
| { |
| "epoch": 0.07483236477425123, |
| "grad_norm": 1.1646332582267231, |
| "learning_rate": 5.925717143721665e-07, |
| "loss": 1.6975, |
| "step": 558 |
| }, |
| { |
| "epoch": 0.07496647295485025, |
| "grad_norm": 1.213733563154542, |
| "learning_rate": 5.925451891461026e-07, |
| "loss": 1.6688, |
| "step": 559 |
| }, |
| { |
| "epoch": 0.07510058113544926, |
| "grad_norm": 1.1250145434758787, |
| "learning_rate": 5.925186173086538e-07, |
| "loss": 1.7044, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.07523468931604828, |
| "grad_norm": 1.0865739045197238, |
| "learning_rate": 5.924919988645377e-07, |
| "loss": 1.6663, |
| "step": 561 |
| }, |
| { |
| "epoch": 0.0753687974966473, |
| "grad_norm": 1.1159580863498637, |
| "learning_rate": 5.924653338184801e-07, |
| "loss": 1.5986, |
| "step": 562 |
| }, |
| { |
| "epoch": 0.07550290567724631, |
| "grad_norm": 1.0795350956359355, |
| "learning_rate": 5.924386221752151e-07, |
| "loss": 1.7059, |
| "step": 563 |
| }, |
| { |
| "epoch": 0.07563701385784533, |
| "grad_norm": 1.059523546111381, |
| "learning_rate": 5.924118639394849e-07, |
| "loss": 1.6525, |
| "step": 564 |
| }, |
| { |
| "epoch": 0.07577112203844434, |
| "grad_norm": 1.0995795687250527, |
| "learning_rate": 5.923850591160401e-07, |
| "loss": 1.6524, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.07590523021904336, |
| "grad_norm": 1.1092841538303688, |
| "learning_rate": 5.923582077096395e-07, |
| "loss": 1.7758, |
| "step": 566 |
| }, |
| { |
| "epoch": 0.07603933839964237, |
| "grad_norm": 2.6979584052916503, |
| "learning_rate": 5.923313097250504e-07, |
| "loss": 1.6593, |
| "step": 567 |
| }, |
| { |
| "epoch": 0.07617344658024139, |
| "grad_norm": 1.0621178435726715, |
| "learning_rate": 5.923043651670478e-07, |
| "loss": 1.6983, |
| "step": 568 |
| }, |
| { |
| "epoch": 0.07630755476084042, |
| "grad_norm": 1.1573135825405225, |
| "learning_rate": 5.922773740404157e-07, |
| "loss": 1.7572, |
| "step": 569 |
| }, |
| { |
| "epoch": 0.07644166294143943, |
| "grad_norm": 1.3034930029837637, |
| "learning_rate": 5.922503363499457e-07, |
| "loss": 1.7229, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.07657577112203845, |
| "grad_norm": 1.063644093194536, |
| "learning_rate": 5.922232521004384e-07, |
| "loss": 1.6373, |
| "step": 571 |
| }, |
| { |
| "epoch": 0.07670987930263747, |
| "grad_norm": 1.0799490002557715, |
| "learning_rate": 5.921961212967018e-07, |
| "loss": 1.7291, |
| "step": 572 |
| }, |
| { |
| "epoch": 0.07684398748323648, |
| "grad_norm": 1.1456297613060256, |
| "learning_rate": 5.921689439435529e-07, |
| "loss": 1.6715, |
| "step": 573 |
| }, |
| { |
| "epoch": 0.0769780956638355, |
| "grad_norm": 1.1064438116765838, |
| "learning_rate": 5.921417200458166e-07, |
| "loss": 1.6324, |
| "step": 574 |
| }, |
| { |
| "epoch": 0.07711220384443451, |
| "grad_norm": 1.2537502156532783, |
| "learning_rate": 5.921144496083261e-07, |
| "loss": 1.6255, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.07724631202503353, |
| "grad_norm": 1.1130457826739977, |
| "learning_rate": 5.920871326359228e-07, |
| "loss": 1.7305, |
| "step": 576 |
| }, |
| { |
| "epoch": 0.07738042020563254, |
| "grad_norm": 1.1106269047087995, |
| "learning_rate": 5.920597691334568e-07, |
| "loss": 1.7839, |
| "step": 577 |
| }, |
| { |
| "epoch": 0.07751452838623156, |
| "grad_norm": 1.1308110312275523, |
| "learning_rate": 5.920323591057858e-07, |
| "loss": 1.702, |
| "step": 578 |
| }, |
| { |
| "epoch": 0.07764863656683058, |
| "grad_norm": 1.1274236401107995, |
| "learning_rate": 5.920049025577762e-07, |
| "loss": 1.6345, |
| "step": 579 |
| }, |
| { |
| "epoch": 0.07778274474742959, |
| "grad_norm": 1.1274894849868589, |
| "learning_rate": 5.919773994943026e-07, |
| "loss": 1.6358, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.07791685292802861, |
| "grad_norm": 1.203139388656472, |
| "learning_rate": 5.919498499202476e-07, |
| "loss": 1.7228, |
| "step": 581 |
| }, |
| { |
| "epoch": 0.07805096110862762, |
| "grad_norm": 1.1343472094184475, |
| "learning_rate": 5.919222538405025e-07, |
| "loss": 1.5995, |
| "step": 582 |
| }, |
| { |
| "epoch": 0.07818506928922664, |
| "grad_norm": 1.1211098856442396, |
| "learning_rate": 5.918946112599665e-07, |
| "loss": 1.7545, |
| "step": 583 |
| }, |
| { |
| "epoch": 0.07831917746982565, |
| "grad_norm": 1.3590410455725328, |
| "learning_rate": 5.918669221835472e-07, |
| "loss": 1.6658, |
| "step": 584 |
| }, |
| { |
| "epoch": 0.07845328565042467, |
| "grad_norm": 1.1368973789149184, |
| "learning_rate": 5.918391866161604e-07, |
| "loss": 1.6578, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.07858739383102369, |
| "grad_norm": 1.144480010176944, |
| "learning_rate": 5.918114045627301e-07, |
| "loss": 1.687, |
| "step": 586 |
| }, |
| { |
| "epoch": 0.07872150201162272, |
| "grad_norm": 1.1079667555369228, |
| "learning_rate": 5.91783576028189e-07, |
| "loss": 1.6571, |
| "step": 587 |
| }, |
| { |
| "epoch": 0.07885561019222173, |
| "grad_norm": 1.1172832381186681, |
| "learning_rate": 5.917557010174771e-07, |
| "loss": 1.6347, |
| "step": 588 |
| }, |
| { |
| "epoch": 0.07898971837282075, |
| "grad_norm": 1.1477730537939723, |
| "learning_rate": 5.917277795355436e-07, |
| "loss": 1.696, |
| "step": 589 |
| }, |
| { |
| "epoch": 0.07912382655341976, |
| "grad_norm": 1.1124249695741149, |
| "learning_rate": 5.916998115873455e-07, |
| "loss": 1.7316, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.07925793473401878, |
| "grad_norm": 1.2132332214863524, |
| "learning_rate": 5.916717971778482e-07, |
| "loss": 1.7529, |
| "step": 591 |
| }, |
| { |
| "epoch": 0.0793920429146178, |
| "grad_norm": 1.1308959961423235, |
| "learning_rate": 5.916437363120253e-07, |
| "loss": 1.6713, |
| "step": 592 |
| }, |
| { |
| "epoch": 0.07952615109521681, |
| "grad_norm": 1.1204029361778143, |
| "learning_rate": 5.916156289948584e-07, |
| "loss": 1.6751, |
| "step": 593 |
| }, |
| { |
| "epoch": 0.07966025927581583, |
| "grad_norm": 1.1836584994154395, |
| "learning_rate": 5.91587475231338e-07, |
| "loss": 1.7145, |
| "step": 594 |
| }, |
| { |
| "epoch": 0.07979436745641484, |
| "grad_norm": 1.0952029272098618, |
| "learning_rate": 5.91559275026462e-07, |
| "loss": 1.6849, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.07992847563701386, |
| "grad_norm": 1.2564246490346886, |
| "learning_rate": 5.915310283852372e-07, |
| "loss": 1.6352, |
| "step": 596 |
| }, |
| { |
| "epoch": 0.08006258381761287, |
| "grad_norm": 1.1465710959467506, |
| "learning_rate": 5.915027353126783e-07, |
| "loss": 1.6647, |
| "step": 597 |
| }, |
| { |
| "epoch": 0.08019669199821189, |
| "grad_norm": 1.1382835508015974, |
| "learning_rate": 5.914743958138086e-07, |
| "loss": 1.7106, |
| "step": 598 |
| }, |
| { |
| "epoch": 0.0803308001788109, |
| "grad_norm": 1.1192071556571492, |
| "learning_rate": 5.91446009893659e-07, |
| "loss": 1.706, |
| "step": 599 |
| }, |
| { |
| "epoch": 0.08046490835940992, |
| "grad_norm": 1.1629696564337242, |
| "learning_rate": 5.914175775572693e-07, |
| "loss": 1.676, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.08059901654000894, |
| "grad_norm": 1.1336751221713581, |
| "learning_rate": 5.913890988096872e-07, |
| "loss": 1.7061, |
| "step": 601 |
| }, |
| { |
| "epoch": 0.08073312472060795, |
| "grad_norm": 1.063751409329425, |
| "learning_rate": 5.913605736559689e-07, |
| "loss": 1.6276, |
| "step": 602 |
| }, |
| { |
| "epoch": 0.08086723290120697, |
| "grad_norm": 1.7847493987152905, |
| "learning_rate": 5.913320021011784e-07, |
| "loss": 1.7643, |
| "step": 603 |
| }, |
| { |
| "epoch": 0.081001341081806, |
| "grad_norm": 1.1752588010758491, |
| "learning_rate": 5.913033841503882e-07, |
| "loss": 1.7136, |
| "step": 604 |
| }, |
| { |
| "epoch": 0.08113544926240501, |
| "grad_norm": 1.092151629247411, |
| "learning_rate": 5.912747198086793e-07, |
| "loss": 1.6921, |
| "step": 605 |
| }, |
| { |
| "epoch": 0.08126955744300403, |
| "grad_norm": 1.1813450877374088, |
| "learning_rate": 5.912460090811404e-07, |
| "loss": 1.5961, |
| "step": 606 |
| }, |
| { |
| "epoch": 0.08140366562360304, |
| "grad_norm": 1.1386503634209713, |
| "learning_rate": 5.912172519728691e-07, |
| "loss": 1.6936, |
| "step": 607 |
| }, |
| { |
| "epoch": 0.08153777380420206, |
| "grad_norm": 1.1478659529471829, |
| "learning_rate": 5.911884484889702e-07, |
| "loss": 1.7133, |
| "step": 608 |
| }, |
| { |
| "epoch": 0.08167188198480108, |
| "grad_norm": 1.2776303627444894, |
| "learning_rate": 5.911595986345579e-07, |
| "loss": 1.686, |
| "step": 609 |
| }, |
| { |
| "epoch": 0.08180599016540009, |
| "grad_norm": 1.0774582052806807, |
| "learning_rate": 5.91130702414754e-07, |
| "loss": 1.8028, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.08194009834599911, |
| "grad_norm": 1.0810859242279176, |
| "learning_rate": 5.911017598346885e-07, |
| "loss": 1.6044, |
| "step": 611 |
| }, |
| { |
| "epoch": 0.08207420652659812, |
| "grad_norm": 1.1594727731031893, |
| "learning_rate": 5.910727708994998e-07, |
| "loss": 1.7686, |
| "step": 612 |
| }, |
| { |
| "epoch": 0.08220831470719714, |
| "grad_norm": 1.1321005040254193, |
| "learning_rate": 5.910437356143345e-07, |
| "loss": 1.6522, |
| "step": 613 |
| }, |
| { |
| "epoch": 0.08234242288779615, |
| "grad_norm": 1.0653919163589205, |
| "learning_rate": 5.910146539843476e-07, |
| "loss": 1.7465, |
| "step": 614 |
| }, |
| { |
| "epoch": 0.08247653106839517, |
| "grad_norm": 1.1128916496114905, |
| "learning_rate": 5.90985526014702e-07, |
| "loss": 1.6125, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.08261063924899419, |
| "grad_norm": 1.4081204838899852, |
| "learning_rate": 5.90956351710569e-07, |
| "loss": 1.7639, |
| "step": 616 |
| }, |
| { |
| "epoch": 0.0827447474295932, |
| "grad_norm": 1.1683592035720405, |
| "learning_rate": 5.909271310771279e-07, |
| "loss": 1.637, |
| "step": 617 |
| }, |
| { |
| "epoch": 0.08287885561019222, |
| "grad_norm": 1.115793940661641, |
| "learning_rate": 5.90897864119567e-07, |
| "loss": 1.6118, |
| "step": 618 |
| }, |
| { |
| "epoch": 0.08301296379079123, |
| "grad_norm": 1.0879479857779484, |
| "learning_rate": 5.908685508430816e-07, |
| "loss": 1.6846, |
| "step": 619 |
| }, |
| { |
| "epoch": 0.08314707197139025, |
| "grad_norm": 1.1428114800136786, |
| "learning_rate": 5.908391912528764e-07, |
| "loss": 1.6949, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.08328118015198926, |
| "grad_norm": 1.11661524840305, |
| "learning_rate": 5.908097853541634e-07, |
| "loss": 1.754, |
| "step": 621 |
| }, |
| { |
| "epoch": 0.0834152883325883, |
| "grad_norm": 1.0762293742420466, |
| "learning_rate": 5.907803331521635e-07, |
| "loss": 1.7609, |
| "step": 622 |
| }, |
| { |
| "epoch": 0.08354939651318731, |
| "grad_norm": 1.0719203407555025, |
| "learning_rate": 5.907508346521054e-07, |
| "loss": 1.6981, |
| "step": 623 |
| }, |
| { |
| "epoch": 0.08368350469378633, |
| "grad_norm": 1.1553772926251566, |
| "learning_rate": 5.907212898592263e-07, |
| "loss": 1.7024, |
| "step": 624 |
| }, |
| { |
| "epoch": 0.08381761287438534, |
| "grad_norm": 1.1270260996688657, |
| "learning_rate": 5.906916987787713e-07, |
| "loss": 1.6906, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.08395172105498436, |
| "grad_norm": 1.1229658996843206, |
| "learning_rate": 5.90662061415994e-07, |
| "loss": 1.694, |
| "step": 626 |
| }, |
| { |
| "epoch": 0.08408582923558337, |
| "grad_norm": 1.1277068299424584, |
| "learning_rate": 5.906323777761561e-07, |
| "loss": 1.5693, |
| "step": 627 |
| }, |
| { |
| "epoch": 0.08421993741618239, |
| "grad_norm": 1.1180105581479995, |
| "learning_rate": 5.906026478645276e-07, |
| "loss": 1.7247, |
| "step": 628 |
| }, |
| { |
| "epoch": 0.0843540455967814, |
| "grad_norm": 1.2224062872746266, |
| "learning_rate": 5.905728716863865e-07, |
| "loss": 1.6829, |
| "step": 629 |
| }, |
| { |
| "epoch": 0.08448815377738042, |
| "grad_norm": 1.1085889629398797, |
| "learning_rate": 5.905430492470195e-07, |
| "loss": 1.7271, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.08462226195797944, |
| "grad_norm": 1.1451977446739299, |
| "learning_rate": 5.905131805517207e-07, |
| "loss": 1.5877, |
| "step": 631 |
| }, |
| { |
| "epoch": 0.08475637013857845, |
| "grad_norm": 1.1422915014499277, |
| "learning_rate": 5.904832656057932e-07, |
| "loss": 1.6977, |
| "step": 632 |
| }, |
| { |
| "epoch": 0.08489047831917747, |
| "grad_norm": 1.131510544315339, |
| "learning_rate": 5.904533044145479e-07, |
| "loss": 1.5513, |
| "step": 633 |
| }, |
| { |
| "epoch": 0.08502458649977648, |
| "grad_norm": 1.2432140035573447, |
| "learning_rate": 5.904232969833039e-07, |
| "loss": 1.6835, |
| "step": 634 |
| }, |
| { |
| "epoch": 0.0851586946803755, |
| "grad_norm": 1.0744643011300827, |
| "learning_rate": 5.90393243317389e-07, |
| "loss": 1.6052, |
| "step": 635 |
| }, |
| { |
| "epoch": 0.08529280286097451, |
| "grad_norm": 1.3098823736310086, |
| "learning_rate": 5.903631434221384e-07, |
| "loss": 1.7622, |
| "step": 636 |
| }, |
| { |
| "epoch": 0.08542691104157353, |
| "grad_norm": 1.1182788647555526, |
| "learning_rate": 5.903329973028961e-07, |
| "loss": 1.7497, |
| "step": 637 |
| }, |
| { |
| "epoch": 0.08556101922217255, |
| "grad_norm": 1.305543631329334, |
| "learning_rate": 5.903028049650141e-07, |
| "loss": 1.6732, |
| "step": 638 |
| }, |
| { |
| "epoch": 0.08569512740277158, |
| "grad_norm": 1.1108546390310376, |
| "learning_rate": 5.902725664138528e-07, |
| "loss": 1.7271, |
| "step": 639 |
| }, |
| { |
| "epoch": 0.08582923558337059, |
| "grad_norm": 1.0769425748182762, |
| "learning_rate": 5.902422816547804e-07, |
| "loss": 1.666, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.08596334376396961, |
| "grad_norm": 1.0710915573180522, |
| "learning_rate": 5.902119506931739e-07, |
| "loss": 1.7208, |
| "step": 641 |
| }, |
| { |
| "epoch": 0.08609745194456862, |
| "grad_norm": 1.1265338939849623, |
| "learning_rate": 5.901815735344178e-07, |
| "loss": 1.713, |
| "step": 642 |
| }, |
| { |
| "epoch": 0.08623156012516764, |
| "grad_norm": 1.1032977967977797, |
| "learning_rate": 5.901511501839053e-07, |
| "loss": 1.655, |
| "step": 643 |
| }, |
| { |
| "epoch": 0.08636566830576665, |
| "grad_norm": 1.067089553405501, |
| "learning_rate": 5.901206806470377e-07, |
| "loss": 1.6794, |
| "step": 644 |
| }, |
| { |
| "epoch": 0.08649977648636567, |
| "grad_norm": 1.1924702814140196, |
| "learning_rate": 5.900901649292243e-07, |
| "loss": 1.6186, |
| "step": 645 |
| }, |
| { |
| "epoch": 0.08663388466696469, |
| "grad_norm": 1.1000064746041005, |
| "learning_rate": 5.900596030358831e-07, |
| "loss": 1.7316, |
| "step": 646 |
| }, |
| { |
| "epoch": 0.0867679928475637, |
| "grad_norm": 1.16787242186727, |
| "learning_rate": 5.900289949724397e-07, |
| "loss": 1.6475, |
| "step": 647 |
| }, |
| { |
| "epoch": 0.08690210102816272, |
| "grad_norm": 1.153036807295657, |
| "learning_rate": 5.899983407443281e-07, |
| "loss": 1.604, |
| "step": 648 |
| }, |
| { |
| "epoch": 0.08703620920876173, |
| "grad_norm": 1.1418227950695776, |
| "learning_rate": 5.899676403569906e-07, |
| "loss": 1.7925, |
| "step": 649 |
| }, |
| { |
| "epoch": 0.08717031738936075, |
| "grad_norm": 1.1018946533270777, |
| "learning_rate": 5.899368938158777e-07, |
| "loss": 1.5998, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.08730442556995976, |
| "grad_norm": 1.0898779658636957, |
| "learning_rate": 5.899061011264481e-07, |
| "loss": 1.6772, |
| "step": 651 |
| }, |
| { |
| "epoch": 0.08743853375055878, |
| "grad_norm": 1.1828085767178107, |
| "learning_rate": 5.898752622941684e-07, |
| "loss": 1.6564, |
| "step": 652 |
| }, |
| { |
| "epoch": 0.0875726419311578, |
| "grad_norm": 1.123777742875525, |
| "learning_rate": 5.89844377324514e-07, |
| "loss": 1.7173, |
| "step": 653 |
| }, |
| { |
| "epoch": 0.08770675011175681, |
| "grad_norm": 1.1137884706219183, |
| "learning_rate": 5.898134462229677e-07, |
| "loss": 1.705, |
| "step": 654 |
| }, |
| { |
| "epoch": 0.08784085829235583, |
| "grad_norm": 1.0736901627301867, |
| "learning_rate": 5.89782468995021e-07, |
| "loss": 1.6673, |
| "step": 655 |
| }, |
| { |
| "epoch": 0.08797496647295484, |
| "grad_norm": 1.1006296755478988, |
| "learning_rate": 5.897514456461737e-07, |
| "loss": 1.662, |
| "step": 656 |
| }, |
| { |
| "epoch": 0.08810907465355387, |
| "grad_norm": 1.0993086803454002, |
| "learning_rate": 5.897203761819334e-07, |
| "loss": 1.7671, |
| "step": 657 |
| }, |
| { |
| "epoch": 0.08824318283415289, |
| "grad_norm": 1.1555576950225783, |
| "learning_rate": 5.896892606078163e-07, |
| "loss": 1.6558, |
| "step": 658 |
| }, |
| { |
| "epoch": 0.0883772910147519, |
| "grad_norm": 1.1044269950107921, |
| "learning_rate": 5.896580989293461e-07, |
| "loss": 1.6538, |
| "step": 659 |
| }, |
| { |
| "epoch": 0.08851139919535092, |
| "grad_norm": 1.1293808136662087, |
| "learning_rate": 5.896268911520556e-07, |
| "loss": 1.6734, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.08864550737594994, |
| "grad_norm": 1.0799327058316142, |
| "learning_rate": 5.895956372814851e-07, |
| "loss": 1.7258, |
| "step": 661 |
| }, |
| { |
| "epoch": 0.08877961555654895, |
| "grad_norm": 1.2412270489033748, |
| "learning_rate": 5.895643373231834e-07, |
| "loss": 1.7033, |
| "step": 662 |
| }, |
| { |
| "epoch": 0.08891372373714797, |
| "grad_norm": 1.2660732052099137, |
| "learning_rate": 5.895329912827074e-07, |
| "loss": 1.6607, |
| "step": 663 |
| }, |
| { |
| "epoch": 0.08904783191774698, |
| "grad_norm": 1.0851423150565935, |
| "learning_rate": 5.895015991656218e-07, |
| "loss": 1.7365, |
| "step": 664 |
| }, |
| { |
| "epoch": 0.089181940098346, |
| "grad_norm": 1.0926935688632777, |
| "learning_rate": 5.894701609775004e-07, |
| "loss": 1.723, |
| "step": 665 |
| }, |
| { |
| "epoch": 0.08931604827894501, |
| "grad_norm": 1.1335362217269433, |
| "learning_rate": 5.894386767239243e-07, |
| "loss": 1.7482, |
| "step": 666 |
| }, |
| { |
| "epoch": 0.08945015645954403, |
| "grad_norm": 1.0690769483519065, |
| "learning_rate": 5.894071464104832e-07, |
| "loss": 1.7083, |
| "step": 667 |
| }, |
| { |
| "epoch": 0.08958426464014305, |
| "grad_norm": 1.144239086274215, |
| "learning_rate": 5.893755700427749e-07, |
| "loss": 1.6672, |
| "step": 668 |
| }, |
| { |
| "epoch": 0.08971837282074206, |
| "grad_norm": 1.154969050751237, |
| "learning_rate": 5.893439476264053e-07, |
| "loss": 1.5992, |
| "step": 669 |
| }, |
| { |
| "epoch": 0.08985248100134108, |
| "grad_norm": 1.1692487930022055, |
| "learning_rate": 5.893122791669886e-07, |
| "loss": 1.6895, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.0899865891819401, |
| "grad_norm": 1.1445503009803197, |
| "learning_rate": 5.892805646701471e-07, |
| "loss": 1.6176, |
| "step": 671 |
| }, |
| { |
| "epoch": 0.09012069736253911, |
| "grad_norm": 1.0860602124973238, |
| "learning_rate": 5.892488041415113e-07, |
| "loss": 1.7431, |
| "step": 672 |
| }, |
| { |
| "epoch": 0.09025480554313813, |
| "grad_norm": 1.1840804859528216, |
| "learning_rate": 5.892169975867196e-07, |
| "loss": 1.5377, |
| "step": 673 |
| }, |
| { |
| "epoch": 0.09038891372373715, |
| "grad_norm": 1.0925936180668785, |
| "learning_rate": 5.891851450114193e-07, |
| "loss": 1.693, |
| "step": 674 |
| }, |
| { |
| "epoch": 0.09052302190433617, |
| "grad_norm": 1.1412736395289622, |
| "learning_rate": 5.891532464212651e-07, |
| "loss": 1.6782, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.09065713008493519, |
| "grad_norm": 1.1014154222006858, |
| "learning_rate": 5.891213018219203e-07, |
| "loss": 1.6661, |
| "step": 676 |
| }, |
| { |
| "epoch": 0.0907912382655342, |
| "grad_norm": 1.1028682933773437, |
| "learning_rate": 5.89089311219056e-07, |
| "loss": 1.6283, |
| "step": 677 |
| }, |
| { |
| "epoch": 0.09092534644613322, |
| "grad_norm": 1.0999221111301187, |
| "learning_rate": 5.89057274618352e-07, |
| "loss": 1.6288, |
| "step": 678 |
| }, |
| { |
| "epoch": 0.09105945462673223, |
| "grad_norm": 1.0929215008817739, |
| "learning_rate": 5.890251920254958e-07, |
| "loss": 1.6966, |
| "step": 679 |
| }, |
| { |
| "epoch": 0.09119356280733125, |
| "grad_norm": 1.0995793357287673, |
| "learning_rate": 5.889930634461832e-07, |
| "loss": 1.7086, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.09132767098793027, |
| "grad_norm": 1.0809381415190136, |
| "learning_rate": 5.889608888861182e-07, |
| "loss": 1.6829, |
| "step": 681 |
| }, |
| { |
| "epoch": 0.09146177916852928, |
| "grad_norm": 1.0548227913499995, |
| "learning_rate": 5.889286683510132e-07, |
| "loss": 1.6826, |
| "step": 682 |
| }, |
| { |
| "epoch": 0.0915958873491283, |
| "grad_norm": 1.1106859513783915, |
| "learning_rate": 5.888964018465883e-07, |
| "loss": 1.6544, |
| "step": 683 |
| }, |
| { |
| "epoch": 0.09172999552972731, |
| "grad_norm": 1.0878369148062472, |
| "learning_rate": 5.88864089378572e-07, |
| "loss": 1.6342, |
| "step": 684 |
| }, |
| { |
| "epoch": 0.09186410371032633, |
| "grad_norm": 1.128955444803477, |
| "learning_rate": 5.888317309527009e-07, |
| "loss": 1.6121, |
| "step": 685 |
| }, |
| { |
| "epoch": 0.09199821189092534, |
| "grad_norm": 1.246867762194091, |
| "learning_rate": 5.887993265747201e-07, |
| "loss": 1.6819, |
| "step": 686 |
| }, |
| { |
| "epoch": 0.09213232007152436, |
| "grad_norm": 1.1533855664708184, |
| "learning_rate": 5.887668762503822e-07, |
| "loss": 1.7429, |
| "step": 687 |
| }, |
| { |
| "epoch": 0.09226642825212338, |
| "grad_norm": 1.0405450268075809, |
| "learning_rate": 5.887343799854485e-07, |
| "loss": 1.6759, |
| "step": 688 |
| }, |
| { |
| "epoch": 0.09240053643272239, |
| "grad_norm": 1.1507085139636744, |
| "learning_rate": 5.887018377856884e-07, |
| "loss": 1.8036, |
| "step": 689 |
| }, |
| { |
| "epoch": 0.0925346446133214, |
| "grad_norm": 6.743658343986094, |
| "learning_rate": 5.886692496568789e-07, |
| "loss": 1.6027, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.09266875279392042, |
| "grad_norm": 1.0641784107760024, |
| "learning_rate": 5.886366156048061e-07, |
| "loss": 1.6558, |
| "step": 691 |
| }, |
| { |
| "epoch": 0.09280286097451945, |
| "grad_norm": 1.0922990524942957, |
| "learning_rate": 5.886039356352634e-07, |
| "loss": 1.7383, |
| "step": 692 |
| }, |
| { |
| "epoch": 0.09293696915511847, |
| "grad_norm": 1.1742618579401762, |
| "learning_rate": 5.885712097540529e-07, |
| "loss": 1.5927, |
| "step": 693 |
| }, |
| { |
| "epoch": 0.09307107733571748, |
| "grad_norm": 1.1075189838987614, |
| "learning_rate": 5.885384379669844e-07, |
| "loss": 1.7738, |
| "step": 694 |
| }, |
| { |
| "epoch": 0.0932051855163165, |
| "grad_norm": 2.1929813163212093, |
| "learning_rate": 5.885056202798763e-07, |
| "loss": 1.7975, |
| "step": 695 |
| }, |
| { |
| "epoch": 0.09333929369691552, |
| "grad_norm": 1.0998963175774283, |
| "learning_rate": 5.88472756698555e-07, |
| "loss": 1.6156, |
| "step": 696 |
| }, |
| { |
| "epoch": 0.09347340187751453, |
| "grad_norm": 1.0824346616111722, |
| "learning_rate": 5.884398472288546e-07, |
| "loss": 1.7226, |
| "step": 697 |
| }, |
| { |
| "epoch": 0.09360751005811355, |
| "grad_norm": 1.048887980139358, |
| "learning_rate": 5.884068918766182e-07, |
| "loss": 1.7065, |
| "step": 698 |
| }, |
| { |
| "epoch": 0.09374161823871256, |
| "grad_norm": 1.0293430293240384, |
| "learning_rate": 5.883738906476963e-07, |
| "loss": 1.6596, |
| "step": 699 |
| }, |
| { |
| "epoch": 0.09387572641931158, |
| "grad_norm": 1.0943419458638883, |
| "learning_rate": 5.88340843547948e-07, |
| "loss": 1.7356, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.0940098345999106, |
| "grad_norm": 1.0980484739258698, |
| "learning_rate": 5.883077505832403e-07, |
| "loss": 1.6039, |
| "step": 701 |
| }, |
| { |
| "epoch": 0.09414394278050961, |
| "grad_norm": 1.1455036041824893, |
| "learning_rate": 5.882746117594482e-07, |
| "loss": 1.6255, |
| "step": 702 |
| }, |
| { |
| "epoch": 0.09427805096110863, |
| "grad_norm": 1.4001837690870673, |
| "learning_rate": 5.882414270824554e-07, |
| "loss": 1.6008, |
| "step": 703 |
| }, |
| { |
| "epoch": 0.09441215914170764, |
| "grad_norm": 1.1130500383248842, |
| "learning_rate": 5.882081965581533e-07, |
| "loss": 1.7358, |
| "step": 704 |
| }, |
| { |
| "epoch": 0.09454626732230666, |
| "grad_norm": 1.070694937502845, |
| "learning_rate": 5.881749201924413e-07, |
| "loss": 1.6635, |
| "step": 705 |
| }, |
| { |
| "epoch": 0.09468037550290567, |
| "grad_norm": 1.1144333495898877, |
| "learning_rate": 5.881415979912274e-07, |
| "loss": 1.7066, |
| "step": 706 |
| }, |
| { |
| "epoch": 0.09481448368350469, |
| "grad_norm": 1.1422205384748831, |
| "learning_rate": 5.881082299604276e-07, |
| "loss": 1.6546, |
| "step": 707 |
| }, |
| { |
| "epoch": 0.0949485918641037, |
| "grad_norm": 1.0853098558287595, |
| "learning_rate": 5.880748161059657e-07, |
| "loss": 1.6753, |
| "step": 708 |
| }, |
| { |
| "epoch": 0.09508270004470273, |
| "grad_norm": 1.198904753001485, |
| "learning_rate": 5.88041356433774e-07, |
| "loss": 1.7569, |
| "step": 709 |
| }, |
| { |
| "epoch": 0.09521680822530175, |
| "grad_norm": 1.1071829227283936, |
| "learning_rate": 5.880078509497928e-07, |
| "loss": 1.6232, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.09535091640590077, |
| "grad_norm": 1.0695300790601336, |
| "learning_rate": 5.879742996599706e-07, |
| "loss": 1.6413, |
| "step": 711 |
| }, |
| { |
| "epoch": 0.09548502458649978, |
| "grad_norm": 3.3268091455655355, |
| "learning_rate": 5.879407025702638e-07, |
| "loss": 1.593, |
| "step": 712 |
| }, |
| { |
| "epoch": 0.0956191327670988, |
| "grad_norm": 1.0722393433959394, |
| "learning_rate": 5.879070596866374e-07, |
| "loss": 1.7546, |
| "step": 713 |
| }, |
| { |
| "epoch": 0.09575324094769781, |
| "grad_norm": 1.153579196694916, |
| "learning_rate": 5.87873371015064e-07, |
| "loss": 1.657, |
| "step": 714 |
| }, |
| { |
| "epoch": 0.09588734912829683, |
| "grad_norm": 1.1213730882230093, |
| "learning_rate": 5.878396365615248e-07, |
| "loss": 1.6892, |
| "step": 715 |
| }, |
| { |
| "epoch": 0.09602145730889584, |
| "grad_norm": 1.1795757056582914, |
| "learning_rate": 5.878058563320086e-07, |
| "loss": 1.6945, |
| "step": 716 |
| }, |
| { |
| "epoch": 0.09615556548949486, |
| "grad_norm": 1.075176593983707, |
| "learning_rate": 5.87772030332513e-07, |
| "loss": 1.7196, |
| "step": 717 |
| }, |
| { |
| "epoch": 0.09628967367009388, |
| "grad_norm": 1.0441316150069637, |
| "learning_rate": 5.877381585690431e-07, |
| "loss": 1.6256, |
| "step": 718 |
| }, |
| { |
| "epoch": 0.09642378185069289, |
| "grad_norm": 1.1023538045059467, |
| "learning_rate": 5.877042410476124e-07, |
| "loss": 1.6537, |
| "step": 719 |
| }, |
| { |
| "epoch": 0.09655789003129191, |
| "grad_norm": 1.154659783031204, |
| "learning_rate": 5.876702777742425e-07, |
| "loss": 1.75, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.09669199821189092, |
| "grad_norm": 1.1756635069685608, |
| "learning_rate": 5.876362687549632e-07, |
| "loss": 1.6535, |
| "step": 721 |
| }, |
| { |
| "epoch": 0.09682610639248994, |
| "grad_norm": 1.1127957017636008, |
| "learning_rate": 5.876022139958122e-07, |
| "loss": 1.6513, |
| "step": 722 |
| }, |
| { |
| "epoch": 0.09696021457308895, |
| "grad_norm": 1.1770680572803744, |
| "learning_rate": 5.875681135028358e-07, |
| "loss": 1.6897, |
| "step": 723 |
| }, |
| { |
| "epoch": 0.09709432275368797, |
| "grad_norm": 1.054488251672258, |
| "learning_rate": 5.875339672820877e-07, |
| "loss": 1.7035, |
| "step": 724 |
| }, |
| { |
| "epoch": 0.09722843093428699, |
| "grad_norm": 1.1537946876962146, |
| "learning_rate": 5.874997753396303e-07, |
| "loss": 1.6564, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.097362539114886, |
| "grad_norm": 1.2650547539228134, |
| "learning_rate": 5.874655376815338e-07, |
| "loss": 1.7448, |
| "step": 726 |
| }, |
| { |
| "epoch": 0.09749664729548503, |
| "grad_norm": 1.0865445919691652, |
| "learning_rate": 5.874312543138768e-07, |
| "loss": 1.7492, |
| "step": 727 |
| }, |
| { |
| "epoch": 0.09763075547608405, |
| "grad_norm": 1.0635064685924933, |
| "learning_rate": 5.873969252427457e-07, |
| "loss": 1.569, |
| "step": 728 |
| }, |
| { |
| "epoch": 0.09776486365668306, |
| "grad_norm": 1.1242141873259432, |
| "learning_rate": 5.873625504742354e-07, |
| "loss": 1.6972, |
| "step": 729 |
| }, |
| { |
| "epoch": 0.09789897183728208, |
| "grad_norm": 1.374622796897752, |
| "learning_rate": 5.873281300144483e-07, |
| "loss": 1.66, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.0980330800178811, |
| "grad_norm": 1.0742640980921085, |
| "learning_rate": 5.872936638694958e-07, |
| "loss": 1.6395, |
| "step": 731 |
| }, |
| { |
| "epoch": 0.09816718819848011, |
| "grad_norm": 1.1834566808846507, |
| "learning_rate": 5.872591520454964e-07, |
| "loss": 1.6467, |
| "step": 732 |
| }, |
| { |
| "epoch": 0.09830129637907913, |
| "grad_norm": 1.1393523410825188, |
| "learning_rate": 5.872245945485774e-07, |
| "loss": 1.6715, |
| "step": 733 |
| }, |
| { |
| "epoch": 0.09843540455967814, |
| "grad_norm": 1.133914370439065, |
| "learning_rate": 5.871899913848743e-07, |
| "loss": 1.6661, |
| "step": 734 |
| }, |
| { |
| "epoch": 0.09856951274027716, |
| "grad_norm": 1.1318819144753365, |
| "learning_rate": 5.871553425605299e-07, |
| "loss": 1.7463, |
| "step": 735 |
| }, |
| { |
| "epoch": 0.09870362092087617, |
| "grad_norm": 1.119126620886235, |
| "learning_rate": 5.871206480816961e-07, |
| "loss": 1.681, |
| "step": 736 |
| }, |
| { |
| "epoch": 0.09883772910147519, |
| "grad_norm": 1.074480380396243, |
| "learning_rate": 5.870859079545321e-07, |
| "loss": 1.6163, |
| "step": 737 |
| }, |
| { |
| "epoch": 0.0989718372820742, |
| "grad_norm": 1.1208330921778833, |
| "learning_rate": 5.870511221852059e-07, |
| "loss": 1.619, |
| "step": 738 |
| }, |
| { |
| "epoch": 0.09910594546267322, |
| "grad_norm": 1.1594847796734538, |
| "learning_rate": 5.870162907798928e-07, |
| "loss": 1.6592, |
| "step": 739 |
| }, |
| { |
| "epoch": 0.09924005364327224, |
| "grad_norm": 1.058931279874539, |
| "learning_rate": 5.869814137447771e-07, |
| "loss": 1.6851, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.09937416182387125, |
| "grad_norm": 1.1378546192527486, |
| "learning_rate": 5.869464910860505e-07, |
| "loss": 1.7918, |
| "step": 741 |
| }, |
| { |
| "epoch": 0.09950827000447027, |
| "grad_norm": 1.1325033016555488, |
| "learning_rate": 5.869115228099131e-07, |
| "loss": 1.6834, |
| "step": 742 |
| }, |
| { |
| "epoch": 0.09964237818506928, |
| "grad_norm": 1.3421525418201607, |
| "learning_rate": 5.86876508922573e-07, |
| "loss": 1.6549, |
| "step": 743 |
| }, |
| { |
| "epoch": 0.09977648636566831, |
| "grad_norm": 1.1427938179025248, |
| "learning_rate": 5.868414494302465e-07, |
| "loss": 1.6589, |
| "step": 744 |
| }, |
| { |
| "epoch": 0.09991059454626733, |
| "grad_norm": 1.1974168236579015, |
| "learning_rate": 5.86806344339158e-07, |
| "loss": 1.6378, |
| "step": 745 |
| }, |
| { |
| "epoch": 0.10004470272686634, |
| "grad_norm": 1.182005807170805, |
| "learning_rate": 5.867711936555398e-07, |
| "loss": 1.6299, |
| "step": 746 |
| }, |
| { |
| "epoch": 0.10017881090746536, |
| "grad_norm": 1.1347901749058797, |
| "learning_rate": 5.867359973856326e-07, |
| "loss": 1.6285, |
| "step": 747 |
| }, |
| { |
| "epoch": 0.10031291908806438, |
| "grad_norm": 1.0865847111724278, |
| "learning_rate": 5.867007555356848e-07, |
| "loss": 1.5712, |
| "step": 748 |
| }, |
| { |
| "epoch": 0.10044702726866339, |
| "grad_norm": 1.0792499138775284, |
| "learning_rate": 5.866654681119534e-07, |
| "loss": 1.6768, |
| "step": 749 |
| }, |
| { |
| "epoch": 0.10058113544926241, |
| "grad_norm": 1.1459851366680363, |
| "learning_rate": 5.866301351207031e-07, |
| "loss": 1.6162, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.10071524362986142, |
| "grad_norm": 1.0878281762208375, |
| "learning_rate": 5.865947565682066e-07, |
| "loss": 1.6656, |
| "step": 751 |
| }, |
| { |
| "epoch": 0.10084935181046044, |
| "grad_norm": 1.0847043417176385, |
| "learning_rate": 5.865593324607452e-07, |
| "loss": 1.6349, |
| "step": 752 |
| }, |
| { |
| "epoch": 0.10098345999105945, |
| "grad_norm": 1.07175506702241, |
| "learning_rate": 5.865238628046077e-07, |
| "loss": 1.646, |
| "step": 753 |
| }, |
| { |
| "epoch": 0.10111756817165847, |
| "grad_norm": 1.1573886829728748, |
| "learning_rate": 5.864883476060915e-07, |
| "loss": 1.6585, |
| "step": 754 |
| }, |
| { |
| "epoch": 0.10125167635225749, |
| "grad_norm": 1.0662183481503906, |
| "learning_rate": 5.864527868715017e-07, |
| "loss": 1.6685, |
| "step": 755 |
| }, |
| { |
| "epoch": 0.1013857845328565, |
| "grad_norm": 1.1141344678729455, |
| "learning_rate": 5.864171806071517e-07, |
| "loss": 1.7169, |
| "step": 756 |
| }, |
| { |
| "epoch": 0.10151989271345552, |
| "grad_norm": 1.100766756813705, |
| "learning_rate": 5.863815288193628e-07, |
| "loss": 1.6247, |
| "step": 757 |
| }, |
| { |
| "epoch": 0.10165400089405453, |
| "grad_norm": 1.0952255674456979, |
| "learning_rate": 5.863458315144646e-07, |
| "loss": 1.6211, |
| "step": 758 |
| }, |
| { |
| "epoch": 0.10178810907465355, |
| "grad_norm": 1.1257453114351714, |
| "learning_rate": 5.863100886987948e-07, |
| "loss": 1.7725, |
| "step": 759 |
| }, |
| { |
| "epoch": 0.10192221725525256, |
| "grad_norm": 1.1540265958163123, |
| "learning_rate": 5.862743003786989e-07, |
| "loss": 1.7236, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.10205632543585158, |
| "grad_norm": 1.1525383018656805, |
| "learning_rate": 5.862384665605306e-07, |
| "loss": 1.6291, |
| "step": 761 |
| }, |
| { |
| "epoch": 0.10219043361645061, |
| "grad_norm": 1.0998304145799205, |
| "learning_rate": 5.862025872506518e-07, |
| "loss": 1.6707, |
| "step": 762 |
| }, |
| { |
| "epoch": 0.10232454179704963, |
| "grad_norm": 1.1328389993712693, |
| "learning_rate": 5.861666624554323e-07, |
| "loss": 1.7046, |
| "step": 763 |
| }, |
| { |
| "epoch": 0.10245864997764864, |
| "grad_norm": 1.1261717885021774, |
| "learning_rate": 5.861306921812503e-07, |
| "loss": 1.7154, |
| "step": 764 |
| }, |
| { |
| "epoch": 0.10259275815824766, |
| "grad_norm": 1.1225339366672114, |
| "learning_rate": 5.860946764344915e-07, |
| "loss": 1.6906, |
| "step": 765 |
| }, |
| { |
| "epoch": 0.10272686633884667, |
| "grad_norm": 1.0705179266385985, |
| "learning_rate": 5.860586152215504e-07, |
| "loss": 1.6246, |
| "step": 766 |
| }, |
| { |
| "epoch": 0.10286097451944569, |
| "grad_norm": 1.1541152561285446, |
| "learning_rate": 5.860225085488287e-07, |
| "loss": 1.7682, |
| "step": 767 |
| }, |
| { |
| "epoch": 0.1029950827000447, |
| "grad_norm": 1.0637815973415343, |
| "learning_rate": 5.859863564227371e-07, |
| "loss": 1.5644, |
| "step": 768 |
| }, |
| { |
| "epoch": 0.10312919088064372, |
| "grad_norm": 1.4548832416501927, |
| "learning_rate": 5.859501588496937e-07, |
| "loss": 1.6585, |
| "step": 769 |
| }, |
| { |
| "epoch": 0.10326329906124274, |
| "grad_norm": 1.1159025503039528, |
| "learning_rate": 5.859139158361249e-07, |
| "loss": 1.7046, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.10339740724184175, |
| "grad_norm": 1.1310495005094254, |
| "learning_rate": 5.858776273884653e-07, |
| "loss": 1.6818, |
| "step": 771 |
| }, |
| { |
| "epoch": 0.10353151542244077, |
| "grad_norm": 1.0517973047871627, |
| "learning_rate": 5.858412935131574e-07, |
| "loss": 1.6145, |
| "step": 772 |
| }, |
| { |
| "epoch": 0.10366562360303978, |
| "grad_norm": 1.080650360146408, |
| "learning_rate": 5.858049142166517e-07, |
| "loss": 1.6628, |
| "step": 773 |
| }, |
| { |
| "epoch": 0.1037997317836388, |
| "grad_norm": 1.1586931721545415, |
| "learning_rate": 5.857684895054069e-07, |
| "loss": 1.6491, |
| "step": 774 |
| }, |
| { |
| "epoch": 0.10393383996423781, |
| "grad_norm": 1.1442490123077105, |
| "learning_rate": 5.857320193858896e-07, |
| "loss": 1.701, |
| "step": 775 |
| }, |
| { |
| "epoch": 0.10406794814483683, |
| "grad_norm": 1.1690889705843661, |
| "learning_rate": 5.856955038645748e-07, |
| "loss": 1.6635, |
| "step": 776 |
| }, |
| { |
| "epoch": 0.10420205632543585, |
| "grad_norm": 1.0789106990522987, |
| "learning_rate": 5.856589429479454e-07, |
| "loss": 1.7244, |
| "step": 777 |
| }, |
| { |
| "epoch": 0.10433616450603486, |
| "grad_norm": 1.1621702061459454, |
| "learning_rate": 5.856223366424918e-07, |
| "loss": 1.6577, |
| "step": 778 |
| }, |
| { |
| "epoch": 0.10447027268663389, |
| "grad_norm": 1.234518365304015, |
| "learning_rate": 5.855856849547135e-07, |
| "loss": 1.628, |
| "step": 779 |
| }, |
| { |
| "epoch": 0.10460438086723291, |
| "grad_norm": 1.0985603622430586, |
| "learning_rate": 5.855489878911173e-07, |
| "loss": 1.5708, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.10473848904783192, |
| "grad_norm": 1.2290143697832727, |
| "learning_rate": 5.855122454582182e-07, |
| "loss": 1.6148, |
| "step": 781 |
| }, |
| { |
| "epoch": 0.10487259722843094, |
| "grad_norm": 1.0968718099792736, |
| "learning_rate": 5.854754576625395e-07, |
| "loss": 1.6741, |
| "step": 782 |
| }, |
| { |
| "epoch": 0.10500670540902995, |
| "grad_norm": 1.1287867540808152, |
| "learning_rate": 5.854386245106123e-07, |
| "loss": 1.6414, |
| "step": 783 |
| }, |
| { |
| "epoch": 0.10514081358962897, |
| "grad_norm": 1.23300063689037, |
| "learning_rate": 5.854017460089758e-07, |
| "loss": 1.6692, |
| "step": 784 |
| }, |
| { |
| "epoch": 0.10527492177022799, |
| "grad_norm": 1.057896247934459, |
| "learning_rate": 5.853648221641774e-07, |
| "loss": 1.5768, |
| "step": 785 |
| }, |
| { |
| "epoch": 0.105409029950827, |
| "grad_norm": 1.1246918122007368, |
| "learning_rate": 5.853278529827722e-07, |
| "loss": 1.7188, |
| "step": 786 |
| }, |
| { |
| "epoch": 0.10554313813142602, |
| "grad_norm": 1.1394479386508116, |
| "learning_rate": 5.852908384713238e-07, |
| "loss": 1.6904, |
| "step": 787 |
| }, |
| { |
| "epoch": 0.10567724631202503, |
| "grad_norm": 1.111982268532425, |
| "learning_rate": 5.852537786364036e-07, |
| "loss": 1.6384, |
| "step": 788 |
| }, |
| { |
| "epoch": 0.10581135449262405, |
| "grad_norm": 1.1240815270464448, |
| "learning_rate": 5.85216673484591e-07, |
| "loss": 1.7382, |
| "step": 789 |
| }, |
| { |
| "epoch": 0.10594546267322306, |
| "grad_norm": 1.103447231107936, |
| "learning_rate": 5.851795230224736e-07, |
| "loss": 1.7285, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.10607957085382208, |
| "grad_norm": 1.124305841718373, |
| "learning_rate": 5.851423272566469e-07, |
| "loss": 1.5874, |
| "step": 791 |
| }, |
| { |
| "epoch": 0.1062136790344211, |
| "grad_norm": 1.1424352731892036, |
| "learning_rate": 5.851050861937145e-07, |
| "loss": 1.7097, |
| "step": 792 |
| }, |
| { |
| "epoch": 0.10634778721502011, |
| "grad_norm": 1.1724771511120693, |
| "learning_rate": 5.850677998402881e-07, |
| "loss": 1.6847, |
| "step": 793 |
| }, |
| { |
| "epoch": 0.10648189539561913, |
| "grad_norm": 1.1246235851433404, |
| "learning_rate": 5.850304682029874e-07, |
| "loss": 1.6735, |
| "step": 794 |
| }, |
| { |
| "epoch": 0.10661600357621814, |
| "grad_norm": 1.1044843136711693, |
| "learning_rate": 5.849930912884402e-07, |
| "loss": 1.6758, |
| "step": 795 |
| }, |
| { |
| "epoch": 0.10675011175681716, |
| "grad_norm": 1.086861760986685, |
| "learning_rate": 5.849556691032821e-07, |
| "loss": 1.6564, |
| "step": 796 |
| }, |
| { |
| "epoch": 0.10688421993741619, |
| "grad_norm": 1.1156492790718477, |
| "learning_rate": 5.84918201654157e-07, |
| "loss": 1.7699, |
| "step": 797 |
| }, |
| { |
| "epoch": 0.1070183281180152, |
| "grad_norm": 1.105919104931648, |
| "learning_rate": 5.848806889477168e-07, |
| "loss": 1.6673, |
| "step": 798 |
| }, |
| { |
| "epoch": 0.10715243629861422, |
| "grad_norm": 1.1197711837565212, |
| "learning_rate": 5.848431309906213e-07, |
| "loss": 1.6681, |
| "step": 799 |
| }, |
| { |
| "epoch": 0.10728654447921324, |
| "grad_norm": 1.0624511416416331, |
| "learning_rate": 5.848055277895385e-07, |
| "loss": 1.6102, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.10742065265981225, |
| "grad_norm": 1.2004229748929618, |
| "learning_rate": 5.847678793511441e-07, |
| "loss": 1.5863, |
| "step": 801 |
| }, |
| { |
| "epoch": 0.10755476084041127, |
| "grad_norm": 1.0858125624618846, |
| "learning_rate": 5.847301856821225e-07, |
| "loss": 1.5247, |
| "step": 802 |
| }, |
| { |
| "epoch": 0.10768886902101028, |
| "grad_norm": 1.1461866619519925, |
| "learning_rate": 5.846924467891654e-07, |
| "loss": 1.6982, |
| "step": 803 |
| }, |
| { |
| "epoch": 0.1078229772016093, |
| "grad_norm": 1.072949621974548, |
| "learning_rate": 5.846546626789727e-07, |
| "loss": 1.6836, |
| "step": 804 |
| }, |
| { |
| "epoch": 0.10795708538220831, |
| "grad_norm": 1.2070245013041887, |
| "learning_rate": 5.846168333582527e-07, |
| "loss": 1.6951, |
| "step": 805 |
| }, |
| { |
| "epoch": 0.10809119356280733, |
| "grad_norm": 1.1065226823941745, |
| "learning_rate": 5.845789588337217e-07, |
| "loss": 1.6581, |
| "step": 806 |
| }, |
| { |
| "epoch": 0.10822530174340635, |
| "grad_norm": 1.1493594907559954, |
| "learning_rate": 5.845410391121034e-07, |
| "loss": 1.5682, |
| "step": 807 |
| }, |
| { |
| "epoch": 0.10835940992400536, |
| "grad_norm": 1.060419028705976, |
| "learning_rate": 5.845030742001301e-07, |
| "loss": 1.6098, |
| "step": 808 |
| }, |
| { |
| "epoch": 0.10849351810460438, |
| "grad_norm": 1.0986472798667166, |
| "learning_rate": 5.84465064104542e-07, |
| "loss": 1.6998, |
| "step": 809 |
| }, |
| { |
| "epoch": 0.1086276262852034, |
| "grad_norm": 1.0780015294363108, |
| "learning_rate": 5.844270088320872e-07, |
| "loss": 1.6396, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.10876173446580241, |
| "grad_norm": 1.1471597573517582, |
| "learning_rate": 5.843889083895219e-07, |
| "loss": 1.7247, |
| "step": 811 |
| }, |
| { |
| "epoch": 0.10889584264640143, |
| "grad_norm": 1.1383862809473648, |
| "learning_rate": 5.843507627836106e-07, |
| "loss": 1.6618, |
| "step": 812 |
| }, |
| { |
| "epoch": 0.10902995082700044, |
| "grad_norm": 1.1192741205184784, |
| "learning_rate": 5.843125720211251e-07, |
| "loss": 1.6551, |
| "step": 813 |
| }, |
| { |
| "epoch": 0.10916405900759947, |
| "grad_norm": 1.137804969239655, |
| "learning_rate": 5.84274336108846e-07, |
| "loss": 1.7777, |
| "step": 814 |
| }, |
| { |
| "epoch": 0.10929816718819849, |
| "grad_norm": 1.153664414743612, |
| "learning_rate": 5.842360550535614e-07, |
| "loss": 1.693, |
| "step": 815 |
| }, |
| { |
| "epoch": 0.1094322753687975, |
| "grad_norm": 1.2362947655431056, |
| "learning_rate": 5.841977288620676e-07, |
| "loss": 1.7216, |
| "step": 816 |
| }, |
| { |
| "epoch": 0.10956638354939652, |
| "grad_norm": 1.0845642638897275, |
| "learning_rate": 5.84159357541169e-07, |
| "loss": 1.704, |
| "step": 817 |
| }, |
| { |
| "epoch": 0.10970049172999553, |
| "grad_norm": 1.1373055917212407, |
| "learning_rate": 5.841209410976779e-07, |
| "loss": 1.7146, |
| "step": 818 |
| }, |
| { |
| "epoch": 0.10983459991059455, |
| "grad_norm": 1.071610572427508, |
| "learning_rate": 5.840824795384146e-07, |
| "loss": 1.6785, |
| "step": 819 |
| }, |
| { |
| "epoch": 0.10996870809119356, |
| "grad_norm": 1.1237115070149213, |
| "learning_rate": 5.840439728702073e-07, |
| "loss": 1.7022, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.11010281627179258, |
| "grad_norm": 1.1135499435889078, |
| "learning_rate": 5.840054210998925e-07, |
| "loss": 1.6762, |
| "step": 821 |
| }, |
| { |
| "epoch": 0.1102369244523916, |
| "grad_norm": 1.1412142978650357, |
| "learning_rate": 5.839668242343147e-07, |
| "loss": 1.7325, |
| "step": 822 |
| }, |
| { |
| "epoch": 0.11037103263299061, |
| "grad_norm": 1.066696944750096, |
| "learning_rate": 5.839281822803259e-07, |
| "loss": 1.7209, |
| "step": 823 |
| }, |
| { |
| "epoch": 0.11050514081358963, |
| "grad_norm": 1.109425591853705, |
| "learning_rate": 5.838894952447866e-07, |
| "loss": 1.6248, |
| "step": 824 |
| }, |
| { |
| "epoch": 0.11063924899418864, |
| "grad_norm": 1.0738541935378725, |
| "learning_rate": 5.838507631345652e-07, |
| "loss": 1.6582, |
| "step": 825 |
| }, |
| { |
| "epoch": 0.11077335717478766, |
| "grad_norm": 1.4358787492291483, |
| "learning_rate": 5.838119859565381e-07, |
| "loss": 1.807, |
| "step": 826 |
| }, |
| { |
| "epoch": 0.11090746535538668, |
| "grad_norm": 1.1425108913039257, |
| "learning_rate": 5.837731637175898e-07, |
| "loss": 1.6146, |
| "step": 827 |
| }, |
| { |
| "epoch": 0.11104157353598569, |
| "grad_norm": 1.0637227390318094, |
| "learning_rate": 5.837342964246123e-07, |
| "loss": 1.6954, |
| "step": 828 |
| }, |
| { |
| "epoch": 0.1111756817165847, |
| "grad_norm": 1.1694795366123236, |
| "learning_rate": 5.836953840845062e-07, |
| "loss": 1.6337, |
| "step": 829 |
| }, |
| { |
| "epoch": 0.11130978989718372, |
| "grad_norm": 1.1776659131207758, |
| "learning_rate": 5.836564267041799e-07, |
| "loss": 1.7132, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.11144389807778274, |
| "grad_norm": 1.0835328202264551, |
| "learning_rate": 5.836174242905497e-07, |
| "loss": 1.7406, |
| "step": 831 |
| }, |
| { |
| "epoch": 0.11157800625838177, |
| "grad_norm": 1.0933003960120042, |
| "learning_rate": 5.835783768505399e-07, |
| "loss": 1.6104, |
| "step": 832 |
| }, |
| { |
| "epoch": 0.11171211443898078, |
| "grad_norm": 1.075129502416788, |
| "learning_rate": 5.835392843910829e-07, |
| "loss": 1.6599, |
| "step": 833 |
| }, |
| { |
| "epoch": 0.1118462226195798, |
| "grad_norm": 1.1891418452392997, |
| "learning_rate": 5.835001469191191e-07, |
| "loss": 1.6589, |
| "step": 834 |
| }, |
| { |
| "epoch": 0.11198033080017882, |
| "grad_norm": 1.7726602578762463, |
| "learning_rate": 5.834609644415967e-07, |
| "loss": 1.8068, |
| "step": 835 |
| }, |
| { |
| "epoch": 0.11211443898077783, |
| "grad_norm": 1.1160187069875398, |
| "learning_rate": 5.834217369654723e-07, |
| "loss": 1.7302, |
| "step": 836 |
| }, |
| { |
| "epoch": 0.11224854716137685, |
| "grad_norm": 1.2586778829179404, |
| "learning_rate": 5.833824644977098e-07, |
| "loss": 1.5899, |
| "step": 837 |
| }, |
| { |
| "epoch": 0.11238265534197586, |
| "grad_norm": 1.1096559717797458, |
| "learning_rate": 5.833431470452818e-07, |
| "loss": 1.7175, |
| "step": 838 |
| }, |
| { |
| "epoch": 0.11251676352257488, |
| "grad_norm": 1.1754882099239772, |
| "learning_rate": 5.833037846151686e-07, |
| "loss": 1.6674, |
| "step": 839 |
| }, |
| { |
| "epoch": 0.1126508717031739, |
| "grad_norm": 1.030872040717494, |
| "learning_rate": 5.832643772143582e-07, |
| "loss": 1.6117, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.11278497988377291, |
| "grad_norm": 1.1260356355011998, |
| "learning_rate": 5.832249248498472e-07, |
| "loss": 1.6813, |
| "step": 841 |
| }, |
| { |
| "epoch": 0.11291908806437193, |
| "grad_norm": 1.0550888868426265, |
| "learning_rate": 5.831854275286396e-07, |
| "loss": 1.6859, |
| "step": 842 |
| }, |
| { |
| "epoch": 0.11305319624497094, |
| "grad_norm": 1.165191007399385, |
| "learning_rate": 5.831458852577477e-07, |
| "loss": 1.6982, |
| "step": 843 |
| }, |
| { |
| "epoch": 0.11318730442556996, |
| "grad_norm": 1.178851685175072, |
| "learning_rate": 5.831062980441918e-07, |
| "loss": 1.6891, |
| "step": 844 |
| }, |
| { |
| "epoch": 0.11332141260616897, |
| "grad_norm": 1.173173669662085, |
| "learning_rate": 5.830666658949999e-07, |
| "loss": 1.7388, |
| "step": 845 |
| }, |
| { |
| "epoch": 0.11345552078676799, |
| "grad_norm": 1.1552209879477302, |
| "learning_rate": 5.830269888172083e-07, |
| "loss": 1.7383, |
| "step": 846 |
| }, |
| { |
| "epoch": 0.113589628967367, |
| "grad_norm": 1.0974766482142095, |
| "learning_rate": 5.82987266817861e-07, |
| "loss": 1.7139, |
| "step": 847 |
| }, |
| { |
| "epoch": 0.11372373714796602, |
| "grad_norm": 1.1314238053001549, |
| "learning_rate": 5.829474999040102e-07, |
| "loss": 1.6041, |
| "step": 848 |
| }, |
| { |
| "epoch": 0.11385784532856505, |
| "grad_norm": 1.100933720786019, |
| "learning_rate": 5.829076880827159e-07, |
| "loss": 1.7101, |
| "step": 849 |
| }, |
| { |
| "epoch": 0.11399195350916407, |
| "grad_norm": 1.1461722995944397, |
| "learning_rate": 5.828678313610463e-07, |
| "loss": 1.7009, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.11412606168976308, |
| "grad_norm": 1.2722684302580665, |
| "learning_rate": 5.828279297460774e-07, |
| "loss": 1.6484, |
| "step": 851 |
| }, |
| { |
| "epoch": 0.1142601698703621, |
| "grad_norm": 1.1151947943169025, |
| "learning_rate": 5.82787983244893e-07, |
| "loss": 1.655, |
| "step": 852 |
| }, |
| { |
| "epoch": 0.11439427805096111, |
| "grad_norm": 1.1184598730723336, |
| "learning_rate": 5.827479918645852e-07, |
| "loss": 1.6165, |
| "step": 853 |
| }, |
| { |
| "epoch": 0.11452838623156013, |
| "grad_norm": 1.023276016208069, |
| "learning_rate": 5.827079556122542e-07, |
| "loss": 1.4802, |
| "step": 854 |
| }, |
| { |
| "epoch": 0.11466249441215914, |
| "grad_norm": 1.1363089821207286, |
| "learning_rate": 5.826678744950074e-07, |
| "loss": 1.7255, |
| "step": 855 |
| }, |
| { |
| "epoch": 0.11479660259275816, |
| "grad_norm": 1.1011868598006873, |
| "learning_rate": 5.826277485199609e-07, |
| "loss": 1.6958, |
| "step": 856 |
| }, |
| { |
| "epoch": 0.11493071077335718, |
| "grad_norm": 1.1338150939022813, |
| "learning_rate": 5.825875776942388e-07, |
| "loss": 1.7061, |
| "step": 857 |
| }, |
| { |
| "epoch": 0.11506481895395619, |
| "grad_norm": 1.130051416794989, |
| "learning_rate": 5.825473620249724e-07, |
| "loss": 1.7138, |
| "step": 858 |
| }, |
| { |
| "epoch": 0.1151989271345552, |
| "grad_norm": 1.0842663625693372, |
| "learning_rate": 5.825071015193018e-07, |
| "loss": 1.6059, |
| "step": 859 |
| }, |
| { |
| "epoch": 0.11533303531515422, |
| "grad_norm": 1.126331708345394, |
| "learning_rate": 5.824667961843746e-07, |
| "loss": 1.6874, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.11546714349575324, |
| "grad_norm": 1.067788867144983, |
| "learning_rate": 5.824264460273465e-07, |
| "loss": 1.7211, |
| "step": 861 |
| }, |
| { |
| "epoch": 0.11560125167635225, |
| "grad_norm": 1.0567680329056464, |
| "learning_rate": 5.823860510553811e-07, |
| "loss": 1.5729, |
| "step": 862 |
| }, |
| { |
| "epoch": 0.11573535985695127, |
| "grad_norm": 1.088021498471896, |
| "learning_rate": 5.823456112756498e-07, |
| "loss": 1.6884, |
| "step": 863 |
| }, |
| { |
| "epoch": 0.11586946803755029, |
| "grad_norm": 1.1157283518569765, |
| "learning_rate": 5.823051266953325e-07, |
| "loss": 1.6806, |
| "step": 864 |
| }, |
| { |
| "epoch": 0.1160035762181493, |
| "grad_norm": 1.0681883774872867, |
| "learning_rate": 5.822645973216165e-07, |
| "loss": 1.6397, |
| "step": 865 |
| }, |
| { |
| "epoch": 0.11613768439874832, |
| "grad_norm": 1.0861783292304394, |
| "learning_rate": 5.822240231616973e-07, |
| "loss": 1.575, |
| "step": 866 |
| }, |
| { |
| "epoch": 0.11627179257934735, |
| "grad_norm": 1.068546853668492, |
| "learning_rate": 5.821834042227783e-07, |
| "loss": 1.6436, |
| "step": 867 |
| }, |
| { |
| "epoch": 0.11640590075994636, |
| "grad_norm": 1.1370891534192904, |
| "learning_rate": 5.821427405120708e-07, |
| "loss": 1.7133, |
| "step": 868 |
| }, |
| { |
| "epoch": 0.11654000894054538, |
| "grad_norm": 1.0975985479163, |
| "learning_rate": 5.821020320367942e-07, |
| "loss": 1.7395, |
| "step": 869 |
| }, |
| { |
| "epoch": 0.1166741171211444, |
| "grad_norm": 1.0979310675749658, |
| "learning_rate": 5.820612788041756e-07, |
| "loss": 1.733, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.11680822530174341, |
| "grad_norm": 1.1290790783874916, |
| "learning_rate": 5.820204808214503e-07, |
| "loss": 1.5963, |
| "step": 871 |
| }, |
| { |
| "epoch": 0.11694233348234243, |
| "grad_norm": 1.0767125460282738, |
| "learning_rate": 5.819796380958613e-07, |
| "loss": 1.7139, |
| "step": 872 |
| }, |
| { |
| "epoch": 0.11707644166294144, |
| "grad_norm": 1.242641974109421, |
| "learning_rate": 5.819387506346598e-07, |
| "loss": 1.7068, |
| "step": 873 |
| }, |
| { |
| "epoch": 0.11721054984354046, |
| "grad_norm": 1.0978061234757794, |
| "learning_rate": 5.818978184451048e-07, |
| "loss": 1.625, |
| "step": 874 |
| }, |
| { |
| "epoch": 0.11734465802413947, |
| "grad_norm": 1.0887952709463755, |
| "learning_rate": 5.818568415344633e-07, |
| "loss": 1.6017, |
| "step": 875 |
| }, |
| { |
| "epoch": 0.11747876620473849, |
| "grad_norm": 1.0584442299701264, |
| "learning_rate": 5.818158199100101e-07, |
| "loss": 1.7367, |
| "step": 876 |
| }, |
| { |
| "epoch": 0.1176128743853375, |
| "grad_norm": 1.0996935525118328, |
| "learning_rate": 5.817747535790283e-07, |
| "loss": 1.6186, |
| "step": 877 |
| }, |
| { |
| "epoch": 0.11774698256593652, |
| "grad_norm": 1.1314747020843203, |
| "learning_rate": 5.817336425488082e-07, |
| "loss": 1.6249, |
| "step": 878 |
| }, |
| { |
| "epoch": 0.11788109074653554, |
| "grad_norm": 1.1919795844521832, |
| "learning_rate": 5.81692486826649e-07, |
| "loss": 1.6532, |
| "step": 879 |
| }, |
| { |
| "epoch": 0.11801519892713455, |
| "grad_norm": 1.305262723197089, |
| "learning_rate": 5.816512864198571e-07, |
| "loss": 1.5978, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.11814930710773357, |
| "grad_norm": 1.1155976857853542, |
| "learning_rate": 5.816100413357471e-07, |
| "loss": 1.6797, |
| "step": 881 |
| }, |
| { |
| "epoch": 0.11828341528833258, |
| "grad_norm": 1.123108419027786, |
| "learning_rate": 5.815687515816415e-07, |
| "loss": 1.5944, |
| "step": 882 |
| }, |
| { |
| "epoch": 0.1184175234689316, |
| "grad_norm": 1.1318300431723485, |
| "learning_rate": 5.815274171648709e-07, |
| "loss": 1.6328, |
| "step": 883 |
| }, |
| { |
| "epoch": 0.11855163164953063, |
| "grad_norm": 1.1498251619378483, |
| "learning_rate": 5.814860380927734e-07, |
| "loss": 1.6131, |
| "step": 884 |
| }, |
| { |
| "epoch": 0.11868573983012964, |
| "grad_norm": 1.0940645690658886, |
| "learning_rate": 5.814446143726956e-07, |
| "loss": 1.6142, |
| "step": 885 |
| }, |
| { |
| "epoch": 0.11881984801072866, |
| "grad_norm": 1.0820516072736348, |
| "learning_rate": 5.814031460119914e-07, |
| "loss": 1.6148, |
| "step": 886 |
| }, |
| { |
| "epoch": 0.11895395619132768, |
| "grad_norm": 1.1247339726082044, |
| "learning_rate": 5.813616330180233e-07, |
| "loss": 1.7608, |
| "step": 887 |
| }, |
| { |
| "epoch": 0.11908806437192669, |
| "grad_norm": 1.3664008359044402, |
| "learning_rate": 5.813200753981611e-07, |
| "loss": 1.6969, |
| "step": 888 |
| }, |
| { |
| "epoch": 0.11922217255252571, |
| "grad_norm": 1.1603697359280436, |
| "learning_rate": 5.812784731597829e-07, |
| "loss": 1.7402, |
| "step": 889 |
| }, |
| { |
| "epoch": 0.11935628073312472, |
| "grad_norm": 1.1010475016983683, |
| "learning_rate": 5.812368263102746e-07, |
| "loss": 1.759, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.11949038891372374, |
| "grad_norm": 1.1085219941083455, |
| "learning_rate": 5.811951348570302e-07, |
| "loss": 1.667, |
| "step": 891 |
| }, |
| { |
| "epoch": 0.11962449709432275, |
| "grad_norm": 1.1139382749577305, |
| "learning_rate": 5.811533988074512e-07, |
| "loss": 1.6677, |
| "step": 892 |
| }, |
| { |
| "epoch": 0.11975860527492177, |
| "grad_norm": 1.1325956159096344, |
| "learning_rate": 5.811116181689475e-07, |
| "loss": 1.7068, |
| "step": 893 |
| }, |
| { |
| "epoch": 0.11989271345552079, |
| "grad_norm": 1.0408410504808954, |
| "learning_rate": 5.810697929489365e-07, |
| "loss": 1.6708, |
| "step": 894 |
| }, |
| { |
| "epoch": 0.1200268216361198, |
| "grad_norm": 1.0658514906014669, |
| "learning_rate": 5.810279231548439e-07, |
| "loss": 1.6833, |
| "step": 895 |
| }, |
| { |
| "epoch": 0.12016092981671882, |
| "grad_norm": 1.0840346983956348, |
| "learning_rate": 5.80986008794103e-07, |
| "loss": 1.6973, |
| "step": 896 |
| }, |
| { |
| "epoch": 0.12029503799731783, |
| "grad_norm": 1.1508325943207491, |
| "learning_rate": 5.809440498741552e-07, |
| "loss": 1.7326, |
| "step": 897 |
| }, |
| { |
| "epoch": 0.12042914617791685, |
| "grad_norm": 1.0629236207923716, |
| "learning_rate": 5.809020464024496e-07, |
| "loss": 1.5428, |
| "step": 898 |
| }, |
| { |
| "epoch": 0.12056325435851586, |
| "grad_norm": 1.112200747649366, |
| "learning_rate": 5.808599983864435e-07, |
| "loss": 1.6729, |
| "step": 899 |
| }, |
| { |
| "epoch": 0.12069736253911488, |
| "grad_norm": 1.2078470991285137, |
| "learning_rate": 5.80817905833602e-07, |
| "loss": 1.738, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.1208314707197139, |
| "grad_norm": 1.1190068417460075, |
| "learning_rate": 5.807757687513979e-07, |
| "loss": 1.6607, |
| "step": 901 |
| }, |
| { |
| "epoch": 0.12096557890031293, |
| "grad_norm": 1.0450615497760403, |
| "learning_rate": 5.807335871473122e-07, |
| "loss": 1.6588, |
| "step": 902 |
| }, |
| { |
| "epoch": 0.12109968708091194, |
| "grad_norm": 1.121198054415324, |
| "learning_rate": 5.806913610288336e-07, |
| "loss": 1.662, |
| "step": 903 |
| }, |
| { |
| "epoch": 0.12123379526151096, |
| "grad_norm": 1.1054682653267978, |
| "learning_rate": 5.806490904034589e-07, |
| "loss": 1.6706, |
| "step": 904 |
| }, |
| { |
| "epoch": 0.12136790344210997, |
| "grad_norm": 1.113997411395293, |
| "learning_rate": 5.806067752786926e-07, |
| "loss": 1.7632, |
| "step": 905 |
| }, |
| { |
| "epoch": 0.12150201162270899, |
| "grad_norm": 1.1613864633248003, |
| "learning_rate": 5.805644156620472e-07, |
| "loss": 1.7098, |
| "step": 906 |
| }, |
| { |
| "epoch": 0.121636119803308, |
| "grad_norm": 1.1055893873511211, |
| "learning_rate": 5.805220115610431e-07, |
| "loss": 1.7946, |
| "step": 907 |
| }, |
| { |
| "epoch": 0.12177022798390702, |
| "grad_norm": 1.059537639783976, |
| "learning_rate": 5.804795629832085e-07, |
| "loss": 1.6377, |
| "step": 908 |
| }, |
| { |
| "epoch": 0.12190433616450604, |
| "grad_norm": 1.075756870276535, |
| "learning_rate": 5.804370699360796e-07, |
| "loss": 1.6709, |
| "step": 909 |
| }, |
| { |
| "epoch": 0.12203844434510505, |
| "grad_norm": 1.0951662603881447, |
| "learning_rate": 5.803945324272006e-07, |
| "loss": 1.6114, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.12217255252570407, |
| "grad_norm": 1.0835170338297386, |
| "learning_rate": 5.803519504641234e-07, |
| "loss": 1.6945, |
| "step": 911 |
| }, |
| { |
| "epoch": 0.12230666070630308, |
| "grad_norm": 1.188508933084379, |
| "learning_rate": 5.803093240544077e-07, |
| "loss": 1.7176, |
| "step": 912 |
| }, |
| { |
| "epoch": 0.1224407688869021, |
| "grad_norm": 1.0574940351976068, |
| "learning_rate": 5.802666532056215e-07, |
| "loss": 1.6449, |
| "step": 913 |
| }, |
| { |
| "epoch": 0.12257487706750111, |
| "grad_norm": 1.1011954691706793, |
| "learning_rate": 5.802239379253403e-07, |
| "loss": 1.7403, |
| "step": 914 |
| }, |
| { |
| "epoch": 0.12270898524810013, |
| "grad_norm": 1.05289982245001, |
| "learning_rate": 5.801811782211476e-07, |
| "loss": 1.7121, |
| "step": 915 |
| }, |
| { |
| "epoch": 0.12284309342869915, |
| "grad_norm": 1.1247742251938873, |
| "learning_rate": 5.801383741006349e-07, |
| "loss": 1.6904, |
| "step": 916 |
| }, |
| { |
| "epoch": 0.12297720160929816, |
| "grad_norm": 1.1060690034689273, |
| "learning_rate": 5.800955255714014e-07, |
| "loss": 1.5423, |
| "step": 917 |
| }, |
| { |
| "epoch": 0.12311130978989718, |
| "grad_norm": 1.17690980567079, |
| "learning_rate": 5.800526326410544e-07, |
| "loss": 1.6638, |
| "step": 918 |
| }, |
| { |
| "epoch": 0.12324541797049621, |
| "grad_norm": 1.0758724475892376, |
| "learning_rate": 5.800096953172088e-07, |
| "loss": 1.7136, |
| "step": 919 |
| }, |
| { |
| "epoch": 0.12337952615109522, |
| "grad_norm": 1.0847412248840858, |
| "learning_rate": 5.799667136074878e-07, |
| "loss": 1.7712, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.12351363433169424, |
| "grad_norm": 1.1331387033738405, |
| "learning_rate": 5.799236875195219e-07, |
| "loss": 1.664, |
| "step": 921 |
| }, |
| { |
| "epoch": 0.12364774251229325, |
| "grad_norm": 1.3262309930515026, |
| "learning_rate": 5.798806170609502e-07, |
| "loss": 1.6546, |
| "step": 922 |
| }, |
| { |
| "epoch": 0.12378185069289227, |
| "grad_norm": 1.1280111604345993, |
| "learning_rate": 5.79837502239419e-07, |
| "loss": 1.6623, |
| "step": 923 |
| }, |
| { |
| "epoch": 0.12391595887349129, |
| "grad_norm": 1.1001484560762704, |
| "learning_rate": 5.797943430625828e-07, |
| "loss": 1.6743, |
| "step": 924 |
| }, |
| { |
| "epoch": 0.1240500670540903, |
| "grad_norm": 1.1051963249243846, |
| "learning_rate": 5.79751139538104e-07, |
| "loss": 1.6542, |
| "step": 925 |
| }, |
| { |
| "epoch": 0.12418417523468932, |
| "grad_norm": 2.096743814606382, |
| "learning_rate": 5.797078916736527e-07, |
| "loss": 1.7618, |
| "step": 926 |
| }, |
| { |
| "epoch": 0.12431828341528833, |
| "grad_norm": 1.1918807746678728, |
| "learning_rate": 5.79664599476907e-07, |
| "loss": 1.7489, |
| "step": 927 |
| }, |
| { |
| "epoch": 0.12445239159588735, |
| "grad_norm": 1.2255902304289672, |
| "learning_rate": 5.79621262955553e-07, |
| "loss": 1.805, |
| "step": 928 |
| }, |
| { |
| "epoch": 0.12458649977648636, |
| "grad_norm": 1.1112711388204457, |
| "learning_rate": 5.795778821172845e-07, |
| "loss": 1.6535, |
| "step": 929 |
| }, |
| { |
| "epoch": 0.12472060795708538, |
| "grad_norm": 1.15632851861526, |
| "learning_rate": 5.79534456969803e-07, |
| "loss": 1.7674, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.1248547161376844, |
| "grad_norm": 1.1364857063021152, |
| "learning_rate": 5.794909875208182e-07, |
| "loss": 1.6668, |
| "step": 931 |
| }, |
| { |
| "epoch": 0.12498882431828341, |
| "grad_norm": 1.1554164021245972, |
| "learning_rate": 5.794474737780474e-07, |
| "loss": 1.6862, |
| "step": 932 |
| }, |
| { |
| "epoch": 0.12512293249888243, |
| "grad_norm": 1.1360253650713825, |
| "learning_rate": 5.79403915749216e-07, |
| "loss": 1.6811, |
| "step": 933 |
| }, |
| { |
| "epoch": 0.12525704067948146, |
| "grad_norm": 1.066412847829235, |
| "learning_rate": 5.793603134420571e-07, |
| "loss": 1.6562, |
| "step": 934 |
| }, |
| { |
| "epoch": 0.12539114886008046, |
| "grad_norm": 1.081900817528408, |
| "learning_rate": 5.793166668643118e-07, |
| "loss": 1.6319, |
| "step": 935 |
| }, |
| { |
| "epoch": 0.1255252570406795, |
| "grad_norm": 1.12430422704736, |
| "learning_rate": 5.792729760237288e-07, |
| "loss": 1.6679, |
| "step": 936 |
| }, |
| { |
| "epoch": 0.1256593652212785, |
| "grad_norm": 1.1555451362888864, |
| "learning_rate": 5.79229240928065e-07, |
| "loss": 1.6272, |
| "step": 937 |
| }, |
| { |
| "epoch": 0.12579347340187752, |
| "grad_norm": 1.1120423598959, |
| "learning_rate": 5.791854615850848e-07, |
| "loss": 1.7271, |
| "step": 938 |
| }, |
| { |
| "epoch": 0.12592758158247652, |
| "grad_norm": 1.099822375040922, |
| "learning_rate": 5.791416380025607e-07, |
| "loss": 1.6762, |
| "step": 939 |
| }, |
| { |
| "epoch": 0.12606168976307555, |
| "grad_norm": 1.1055384980174303, |
| "learning_rate": 5.79097770188273e-07, |
| "loss": 1.6526, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.12619579794367455, |
| "grad_norm": 1.1135160613742192, |
| "learning_rate": 5.7905385815001e-07, |
| "loss": 1.7112, |
| "step": 941 |
| }, |
| { |
| "epoch": 0.12632990612427358, |
| "grad_norm": 1.172524893436665, |
| "learning_rate": 5.790099018955674e-07, |
| "loss": 1.6629, |
| "step": 942 |
| }, |
| { |
| "epoch": 0.12646401430487259, |
| "grad_norm": 1.143908651612981, |
| "learning_rate": 5.789659014327492e-07, |
| "loss": 1.6004, |
| "step": 943 |
| }, |
| { |
| "epoch": 0.12659812248547161, |
| "grad_norm": 1.0950798365706262, |
| "learning_rate": 5.789218567693672e-07, |
| "loss": 1.6794, |
| "step": 944 |
| }, |
| { |
| "epoch": 0.12673223066607062, |
| "grad_norm": 1.0865150988933485, |
| "learning_rate": 5.788777679132408e-07, |
| "loss": 1.7733, |
| "step": 945 |
| }, |
| { |
| "epoch": 0.12686633884666965, |
| "grad_norm": 1.081699940619205, |
| "learning_rate": 5.788336348721972e-07, |
| "loss": 1.6587, |
| "step": 946 |
| }, |
| { |
| "epoch": 0.12700044702726868, |
| "grad_norm": 1.0733926398236942, |
| "learning_rate": 5.787894576540721e-07, |
| "loss": 1.6461, |
| "step": 947 |
| }, |
| { |
| "epoch": 0.12713455520786768, |
| "grad_norm": 1.126195585933314, |
| "learning_rate": 5.787452362667083e-07, |
| "loss": 1.6838, |
| "step": 948 |
| }, |
| { |
| "epoch": 0.1272686633884667, |
| "grad_norm": 1.1329864382691732, |
| "learning_rate": 5.787009707179567e-07, |
| "loss": 1.6329, |
| "step": 949 |
| }, |
| { |
| "epoch": 0.1274027715690657, |
| "grad_norm": 1.1004395022968605, |
| "learning_rate": 5.786566610156759e-07, |
| "loss": 1.7147, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.12753687974966474, |
| "grad_norm": 1.0391080576189866, |
| "learning_rate": 5.78612307167733e-07, |
| "loss": 1.6315, |
| "step": 951 |
| }, |
| { |
| "epoch": 0.12767098793026374, |
| "grad_norm": 1.0855474578853979, |
| "learning_rate": 5.78567909182002e-07, |
| "loss": 1.7127, |
| "step": 952 |
| }, |
| { |
| "epoch": 0.12780509611086277, |
| "grad_norm": 1.1433214364150983, |
| "learning_rate": 5.785234670663652e-07, |
| "loss": 1.7042, |
| "step": 953 |
| }, |
| { |
| "epoch": 0.12793920429146177, |
| "grad_norm": 1.0903898099360794, |
| "learning_rate": 5.784789808287129e-07, |
| "loss": 1.749, |
| "step": 954 |
| }, |
| { |
| "epoch": 0.1280733124720608, |
| "grad_norm": 1.1462757739762268, |
| "learning_rate": 5.784344504769428e-07, |
| "loss": 1.7118, |
| "step": 955 |
| }, |
| { |
| "epoch": 0.1282074206526598, |
| "grad_norm": 1.0944948131751315, |
| "learning_rate": 5.783898760189609e-07, |
| "loss": 1.7308, |
| "step": 956 |
| }, |
| { |
| "epoch": 0.12834152883325883, |
| "grad_norm": 1.0898739853739683, |
| "learning_rate": 5.783452574626806e-07, |
| "loss": 1.5947, |
| "step": 957 |
| }, |
| { |
| "epoch": 0.12847563701385784, |
| "grad_norm": 1.1070871512716438, |
| "learning_rate": 5.783005948160236e-07, |
| "loss": 1.7032, |
| "step": 958 |
| }, |
| { |
| "epoch": 0.12860974519445686, |
| "grad_norm": 1.1173517977218599, |
| "learning_rate": 5.782558880869187e-07, |
| "loss": 1.76, |
| "step": 959 |
| }, |
| { |
| "epoch": 0.12874385337505587, |
| "grad_norm": 1.0784753543720036, |
| "learning_rate": 5.782111372833035e-07, |
| "loss": 1.6817, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.1288779615556549, |
| "grad_norm": 1.099729300157914, |
| "learning_rate": 5.781663424131225e-07, |
| "loss": 1.5885, |
| "step": 961 |
| }, |
| { |
| "epoch": 0.1290120697362539, |
| "grad_norm": 1.1053155402387764, |
| "learning_rate": 5.781215034843288e-07, |
| "loss": 1.649, |
| "step": 962 |
| }, |
| { |
| "epoch": 0.12914617791685293, |
| "grad_norm": 1.0498243431495933, |
| "learning_rate": 5.780766205048826e-07, |
| "loss": 1.6, |
| "step": 963 |
| }, |
| { |
| "epoch": 0.12928028609745196, |
| "grad_norm": 1.0650679197683777, |
| "learning_rate": 5.780316934827524e-07, |
| "loss": 1.7031, |
| "step": 964 |
| }, |
| { |
| "epoch": 0.12941439427805096, |
| "grad_norm": 1.2041255427364985, |
| "learning_rate": 5.779867224259144e-07, |
| "loss": 1.7187, |
| "step": 965 |
| }, |
| { |
| "epoch": 0.12954850245865, |
| "grad_norm": 1.0678692273869028, |
| "learning_rate": 5.779417073423526e-07, |
| "loss": 1.6825, |
| "step": 966 |
| }, |
| { |
| "epoch": 0.129682610639249, |
| "grad_norm": 1.1199711834538628, |
| "learning_rate": 5.778966482400589e-07, |
| "loss": 1.6826, |
| "step": 967 |
| }, |
| { |
| "epoch": 0.12981671881984802, |
| "grad_norm": 1.3086828320370905, |
| "learning_rate": 5.778515451270329e-07, |
| "loss": 1.6527, |
| "step": 968 |
| }, |
| { |
| "epoch": 0.12995082700044702, |
| "grad_norm": 1.1283872527591725, |
| "learning_rate": 5.77806398011282e-07, |
| "loss": 1.6979, |
| "step": 969 |
| }, |
| { |
| "epoch": 0.13008493518104605, |
| "grad_norm": 1.6891339086777561, |
| "learning_rate": 5.777612069008215e-07, |
| "loss": 1.6052, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.13021904336164505, |
| "grad_norm": 1.0995419197341152, |
| "learning_rate": 5.777159718036745e-07, |
| "loss": 1.6741, |
| "step": 971 |
| }, |
| { |
| "epoch": 0.13035315154224408, |
| "grad_norm": 1.0826527648905109, |
| "learning_rate": 5.776706927278718e-07, |
| "loss": 1.7414, |
| "step": 972 |
| }, |
| { |
| "epoch": 0.13048725972284309, |
| "grad_norm": 1.1749450180853513, |
| "learning_rate": 5.776253696814523e-07, |
| "loss": 1.7253, |
| "step": 973 |
| }, |
| { |
| "epoch": 0.13062136790344211, |
| "grad_norm": 1.1522644681889058, |
| "learning_rate": 5.775800026724622e-07, |
| "loss": 1.7109, |
| "step": 974 |
| }, |
| { |
| "epoch": 0.13075547608404112, |
| "grad_norm": 1.1287433508002416, |
| "learning_rate": 5.775345917089561e-07, |
| "loss": 1.7602, |
| "step": 975 |
| }, |
| { |
| "epoch": 0.13088958426464015, |
| "grad_norm": 1.1367208391544785, |
| "learning_rate": 5.77489136798996e-07, |
| "loss": 1.7096, |
| "step": 976 |
| }, |
| { |
| "epoch": 0.13102369244523915, |
| "grad_norm": 1.093651839491161, |
| "learning_rate": 5.774436379506516e-07, |
| "loss": 1.7313, |
| "step": 977 |
| }, |
| { |
| "epoch": 0.13115780062583818, |
| "grad_norm": 1.1158114646345074, |
| "learning_rate": 5.773980951720009e-07, |
| "loss": 1.7152, |
| "step": 978 |
| }, |
| { |
| "epoch": 0.13129190880643718, |
| "grad_norm": 1.1405133501951592, |
| "learning_rate": 5.773525084711293e-07, |
| "loss": 1.6721, |
| "step": 979 |
| }, |
| { |
| "epoch": 0.1314260169870362, |
| "grad_norm": 1.0757304379815442, |
| "learning_rate": 5.773068778561302e-07, |
| "loss": 1.64, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.13156012516763524, |
| "grad_norm": 1.0607235063703084, |
| "learning_rate": 5.772612033351045e-07, |
| "loss": 1.7254, |
| "step": 981 |
| }, |
| { |
| "epoch": 0.13169423334823424, |
| "grad_norm": 1.0583251896426324, |
| "learning_rate": 5.772154849161613e-07, |
| "loss": 1.687, |
| "step": 982 |
| }, |
| { |
| "epoch": 0.13182834152883327, |
| "grad_norm": 1.098628320814992, |
| "learning_rate": 5.771697226074171e-07, |
| "loss": 1.635, |
| "step": 983 |
| }, |
| { |
| "epoch": 0.13196244970943227, |
| "grad_norm": 1.1805474022437217, |
| "learning_rate": 5.771239164169966e-07, |
| "loss": 1.6698, |
| "step": 984 |
| }, |
| { |
| "epoch": 0.1320965578900313, |
| "grad_norm": 1.0875587476789947, |
| "learning_rate": 5.77078066353032e-07, |
| "loss": 1.6354, |
| "step": 985 |
| }, |
| { |
| "epoch": 0.1322306660706303, |
| "grad_norm": 1.2112176511625345, |
| "learning_rate": 5.770321724236633e-07, |
| "loss": 1.7872, |
| "step": 986 |
| }, |
| { |
| "epoch": 0.13236477425122933, |
| "grad_norm": 1.2350020465740164, |
| "learning_rate": 5.769862346370384e-07, |
| "loss": 1.7646, |
| "step": 987 |
| }, |
| { |
| "epoch": 0.13249888243182834, |
| "grad_norm": 1.1782226253464931, |
| "learning_rate": 5.769402530013128e-07, |
| "loss": 1.7215, |
| "step": 988 |
| }, |
| { |
| "epoch": 0.13263299061242736, |
| "grad_norm": 1.0995226058236465, |
| "learning_rate": 5.768942275246503e-07, |
| "loss": 1.6472, |
| "step": 989 |
| }, |
| { |
| "epoch": 0.13276709879302637, |
| "grad_norm": 1.1354276853120844, |
| "learning_rate": 5.768481582152218e-07, |
| "loss": 1.7206, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.1329012069736254, |
| "grad_norm": 1.1299465711204602, |
| "learning_rate": 5.768020450812064e-07, |
| "loss": 1.6917, |
| "step": 991 |
| }, |
| { |
| "epoch": 0.1330353151542244, |
| "grad_norm": 1.0767689418910376, |
| "learning_rate": 5.767558881307906e-07, |
| "loss": 1.6643, |
| "step": 992 |
| }, |
| { |
| "epoch": 0.13316942333482343, |
| "grad_norm": 1.1138902596082148, |
| "learning_rate": 5.767096873721693e-07, |
| "loss": 1.7642, |
| "step": 993 |
| }, |
| { |
| "epoch": 0.13330353151542243, |
| "grad_norm": 1.1056642001660029, |
| "learning_rate": 5.766634428135447e-07, |
| "loss": 1.689, |
| "step": 994 |
| }, |
| { |
| "epoch": 0.13343763969602146, |
| "grad_norm": 1.0482595089911335, |
| "learning_rate": 5.76617154463127e-07, |
| "loss": 1.635, |
| "step": 995 |
| }, |
| { |
| "epoch": 0.13357174787662046, |
| "grad_norm": 1.0936790475077613, |
| "learning_rate": 5.765708223291338e-07, |
| "loss": 1.6614, |
| "step": 996 |
| }, |
| { |
| "epoch": 0.1337058560572195, |
| "grad_norm": 1.1904352264236198, |
| "learning_rate": 5.765244464197911e-07, |
| "loss": 1.6631, |
| "step": 997 |
| }, |
| { |
| "epoch": 0.13383996423781852, |
| "grad_norm": 1.1399324270789883, |
| "learning_rate": 5.76478026743332e-07, |
| "loss": 1.6956, |
| "step": 998 |
| }, |
| { |
| "epoch": 0.13397407241841752, |
| "grad_norm": 1.0631541550252919, |
| "learning_rate": 5.76431563307998e-07, |
| "loss": 1.6357, |
| "step": 999 |
| }, |
| { |
| "epoch": 0.13410818059901655, |
| "grad_norm": 2.7939617071812304, |
| "learning_rate": 5.763850561220378e-07, |
| "loss": 1.7513, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.13424228877961555, |
| "grad_norm": 1.1023053650764323, |
| "learning_rate": 5.763385051937082e-07, |
| "loss": 1.6986, |
| "step": 1001 |
| }, |
| { |
| "epoch": 0.13437639696021458, |
| "grad_norm": 1.1134127723095217, |
| "learning_rate": 5.762919105312739e-07, |
| "loss": 1.6972, |
| "step": 1002 |
| }, |
| { |
| "epoch": 0.13451050514081359, |
| "grad_norm": 1.3206325684664686, |
| "learning_rate": 5.762452721430068e-07, |
| "loss": 1.6561, |
| "step": 1003 |
| }, |
| { |
| "epoch": 0.13464461332141262, |
| "grad_norm": 1.1017815335316827, |
| "learning_rate": 5.761985900371871e-07, |
| "loss": 1.6294, |
| "step": 1004 |
| }, |
| { |
| "epoch": 0.13477872150201162, |
| "grad_norm": 1.091998126330244, |
| "learning_rate": 5.761518642221027e-07, |
| "loss": 1.6645, |
| "step": 1005 |
| }, |
| { |
| "epoch": 0.13491282968261065, |
| "grad_norm": 1.1390065790034687, |
| "learning_rate": 5.76105094706049e-07, |
| "loss": 1.6634, |
| "step": 1006 |
| }, |
| { |
| "epoch": 0.13504693786320965, |
| "grad_norm": 1.1165938666136697, |
| "learning_rate": 5.760582814973294e-07, |
| "loss": 1.6884, |
| "step": 1007 |
| }, |
| { |
| "epoch": 0.13518104604380868, |
| "grad_norm": 1.1265961333800854, |
| "learning_rate": 5.760114246042548e-07, |
| "loss": 1.581, |
| "step": 1008 |
| }, |
| { |
| "epoch": 0.13531515422440768, |
| "grad_norm": 1.1108402335230954, |
| "learning_rate": 5.759645240351442e-07, |
| "loss": 1.6948, |
| "step": 1009 |
| }, |
| { |
| "epoch": 0.1354492624050067, |
| "grad_norm": 1.1540406201851725, |
| "learning_rate": 5.75917579798324e-07, |
| "loss": 1.6816, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.1355833705856057, |
| "grad_norm": 1.0776760932575635, |
| "learning_rate": 5.758705919021285e-07, |
| "loss": 1.6455, |
| "step": 1011 |
| }, |
| { |
| "epoch": 0.13571747876620474, |
| "grad_norm": 1.1626622938941558, |
| "learning_rate": 5.758235603549001e-07, |
| "loss": 1.7679, |
| "step": 1012 |
| }, |
| { |
| "epoch": 0.13585158694680374, |
| "grad_norm": 1.187443307470314, |
| "learning_rate": 5.757764851649882e-07, |
| "loss": 1.6258, |
| "step": 1013 |
| }, |
| { |
| "epoch": 0.13598569512740277, |
| "grad_norm": 1.1483737298574974, |
| "learning_rate": 5.757293663407507e-07, |
| "loss": 1.7531, |
| "step": 1014 |
| }, |
| { |
| "epoch": 0.13611980330800177, |
| "grad_norm": 1.108423451892347, |
| "learning_rate": 5.756822038905527e-07, |
| "loss": 1.5847, |
| "step": 1015 |
| }, |
| { |
| "epoch": 0.1362539114886008, |
| "grad_norm": 1.056521665647446, |
| "learning_rate": 5.756349978227674e-07, |
| "loss": 1.6545, |
| "step": 1016 |
| }, |
| { |
| "epoch": 0.13638801966919983, |
| "grad_norm": 1.122523040636454, |
| "learning_rate": 5.755877481457756e-07, |
| "loss": 1.6762, |
| "step": 1017 |
| }, |
| { |
| "epoch": 0.13652212784979884, |
| "grad_norm": 1.1104212906292141, |
| "learning_rate": 5.755404548679657e-07, |
| "loss": 1.6761, |
| "step": 1018 |
| }, |
| { |
| "epoch": 0.13665623603039787, |
| "grad_norm": 1.0971062205375117, |
| "learning_rate": 5.75493117997734e-07, |
| "loss": 1.6676, |
| "step": 1019 |
| }, |
| { |
| "epoch": 0.13679034421099687, |
| "grad_norm": 1.1923600261259284, |
| "learning_rate": 5.754457375434848e-07, |
| "loss": 1.6966, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.1369244523915959, |
| "grad_norm": 1.1577052085464195, |
| "learning_rate": 5.753983135136295e-07, |
| "loss": 1.7123, |
| "step": 1021 |
| }, |
| { |
| "epoch": 0.1370585605721949, |
| "grad_norm": 1.1404232349413184, |
| "learning_rate": 5.753508459165879e-07, |
| "loss": 1.703, |
| "step": 1022 |
| }, |
| { |
| "epoch": 0.13719266875279393, |
| "grad_norm": 1.392333260935911, |
| "learning_rate": 5.75303334760787e-07, |
| "loss": 1.7096, |
| "step": 1023 |
| }, |
| { |
| "epoch": 0.13732677693339293, |
| "grad_norm": 1.113423870991827, |
| "learning_rate": 5.75255780054662e-07, |
| "loss": 1.7556, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.13746088511399196, |
| "grad_norm": 1.0653465618827531, |
| "learning_rate": 5.752081818066555e-07, |
| "loss": 1.7324, |
| "step": 1025 |
| }, |
| { |
| "epoch": 0.13759499329459096, |
| "grad_norm": 1.0145309694174296, |
| "learning_rate": 5.751605400252179e-07, |
| "loss": 1.684, |
| "step": 1026 |
| }, |
| { |
| "epoch": 0.13772910147519, |
| "grad_norm": 1.1507242589279925, |
| "learning_rate": 5.751128547188073e-07, |
| "loss": 1.7363, |
| "step": 1027 |
| }, |
| { |
| "epoch": 0.137863209655789, |
| "grad_norm": 1.1602441710831857, |
| "learning_rate": 5.750651258958897e-07, |
| "loss": 1.6452, |
| "step": 1028 |
| }, |
| { |
| "epoch": 0.13799731783638802, |
| "grad_norm": 1.0450164574336993, |
| "learning_rate": 5.750173535649387e-07, |
| "loss": 1.6581, |
| "step": 1029 |
| }, |
| { |
| "epoch": 0.13813142601698702, |
| "grad_norm": 1.1152601638616617, |
| "learning_rate": 5.749695377344356e-07, |
| "loss": 1.7178, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.13826553419758605, |
| "grad_norm": 1.1109479531814108, |
| "learning_rate": 5.749216784128695e-07, |
| "loss": 1.6318, |
| "step": 1031 |
| }, |
| { |
| "epoch": 0.13839964237818506, |
| "grad_norm": 1.1171173194344595, |
| "learning_rate": 5.748737756087372e-07, |
| "loss": 1.7563, |
| "step": 1032 |
| }, |
| { |
| "epoch": 0.13853375055878409, |
| "grad_norm": 1.1229721774030046, |
| "learning_rate": 5.74825829330543e-07, |
| "loss": 1.6557, |
| "step": 1033 |
| }, |
| { |
| "epoch": 0.13866785873938312, |
| "grad_norm": 1.0610467262170575, |
| "learning_rate": 5.747778395867995e-07, |
| "loss": 1.5954, |
| "step": 1034 |
| }, |
| { |
| "epoch": 0.13880196691998212, |
| "grad_norm": 1.057400993985582, |
| "learning_rate": 5.747298063860264e-07, |
| "loss": 1.6836, |
| "step": 1035 |
| }, |
| { |
| "epoch": 0.13893607510058115, |
| "grad_norm": 1.2946727429654457, |
| "learning_rate": 5.746817297367512e-07, |
| "loss": 1.7718, |
| "step": 1036 |
| }, |
| { |
| "epoch": 0.13907018328118015, |
| "grad_norm": 1.0793836410907007, |
| "learning_rate": 5.746336096475097e-07, |
| "loss": 1.6192, |
| "step": 1037 |
| }, |
| { |
| "epoch": 0.13920429146177918, |
| "grad_norm": 1.0456487983417475, |
| "learning_rate": 5.745854461268445e-07, |
| "loss": 1.6997, |
| "step": 1038 |
| }, |
| { |
| "epoch": 0.13933839964237818, |
| "grad_norm": 1.0783776132275518, |
| "learning_rate": 5.745372391833066e-07, |
| "loss": 1.5643, |
| "step": 1039 |
| }, |
| { |
| "epoch": 0.1394725078229772, |
| "grad_norm": 1.1073544797133057, |
| "learning_rate": 5.744889888254545e-07, |
| "loss": 1.7453, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.1396066160035762, |
| "grad_norm": 1.0897237578625294, |
| "learning_rate": 5.744406950618546e-07, |
| "loss": 1.7507, |
| "step": 1041 |
| }, |
| { |
| "epoch": 0.13974072418417524, |
| "grad_norm": 1.1334242880215313, |
| "learning_rate": 5.743923579010804e-07, |
| "loss": 1.5952, |
| "step": 1042 |
| }, |
| { |
| "epoch": 0.13987483236477424, |
| "grad_norm": 1.0794611740077888, |
| "learning_rate": 5.743439773517138e-07, |
| "loss": 1.6699, |
| "step": 1043 |
| }, |
| { |
| "epoch": 0.14000894054537327, |
| "grad_norm": 1.2221425859227393, |
| "learning_rate": 5.742955534223441e-07, |
| "loss": 1.6667, |
| "step": 1044 |
| }, |
| { |
| "epoch": 0.14014304872597227, |
| "grad_norm": 1.0734586645398891, |
| "learning_rate": 5.742470861215682e-07, |
| "loss": 1.7595, |
| "step": 1045 |
| }, |
| { |
| "epoch": 0.1402771569065713, |
| "grad_norm": 1.1044082425274806, |
| "learning_rate": 5.74198575457991e-07, |
| "loss": 1.6741, |
| "step": 1046 |
| }, |
| { |
| "epoch": 0.1404112650871703, |
| "grad_norm": 1.114278005814131, |
| "learning_rate": 5.741500214402247e-07, |
| "loss": 1.6869, |
| "step": 1047 |
| }, |
| { |
| "epoch": 0.14054537326776934, |
| "grad_norm": 1.1185672447220645, |
| "learning_rate": 5.741014240768896e-07, |
| "loss": 1.7676, |
| "step": 1048 |
| }, |
| { |
| "epoch": 0.14067948144836834, |
| "grad_norm": 1.1307460519899954, |
| "learning_rate": 5.740527833766135e-07, |
| "loss": 1.7232, |
| "step": 1049 |
| }, |
| { |
| "epoch": 0.14081358962896737, |
| "grad_norm": 1.1013230366573936, |
| "learning_rate": 5.740040993480318e-07, |
| "loss": 1.7287, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.1409476978095664, |
| "grad_norm": 1.2887563539916567, |
| "learning_rate": 5.739553719997877e-07, |
| "loss": 1.6725, |
| "step": 1051 |
| }, |
| { |
| "epoch": 0.1410818059901654, |
| "grad_norm": 1.128200473385445, |
| "learning_rate": 5.739066013405322e-07, |
| "loss": 1.7193, |
| "step": 1052 |
| }, |
| { |
| "epoch": 0.14121591417076443, |
| "grad_norm": 1.0948929309224316, |
| "learning_rate": 5.738577873789237e-07, |
| "loss": 1.6993, |
| "step": 1053 |
| }, |
| { |
| "epoch": 0.14135002235136343, |
| "grad_norm": 1.0842896614577642, |
| "learning_rate": 5.738089301236286e-07, |
| "loss": 1.7045, |
| "step": 1054 |
| }, |
| { |
| "epoch": 0.14148413053196246, |
| "grad_norm": 1.0699301937780477, |
| "learning_rate": 5.73760029583321e-07, |
| "loss": 1.7216, |
| "step": 1055 |
| }, |
| { |
| "epoch": 0.14161823871256146, |
| "grad_norm": 1.0958889223597748, |
| "learning_rate": 5.737110857666822e-07, |
| "loss": 1.6649, |
| "step": 1056 |
| }, |
| { |
| "epoch": 0.1417523468931605, |
| "grad_norm": 1.0656247406409773, |
| "learning_rate": 5.736620986824017e-07, |
| "loss": 1.683, |
| "step": 1057 |
| }, |
| { |
| "epoch": 0.1418864550737595, |
| "grad_norm": 1.2444649158517036, |
| "learning_rate": 5.736130683391765e-07, |
| "loss": 1.6188, |
| "step": 1058 |
| }, |
| { |
| "epoch": 0.14202056325435852, |
| "grad_norm": 1.0989443966595032, |
| "learning_rate": 5.735639947457113e-07, |
| "loss": 1.7038, |
| "step": 1059 |
| }, |
| { |
| "epoch": 0.14215467143495752, |
| "grad_norm": 1.142667824771637, |
| "learning_rate": 5.735148779107184e-07, |
| "loss": 1.6156, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.14228877961555655, |
| "grad_norm": 1.1299828935757683, |
| "learning_rate": 5.734657178429179e-07, |
| "loss": 1.6754, |
| "step": 1061 |
| }, |
| { |
| "epoch": 0.14242288779615556, |
| "grad_norm": 1.0986771884553144, |
| "learning_rate": 5.734165145510375e-07, |
| "loss": 1.6201, |
| "step": 1062 |
| }, |
| { |
| "epoch": 0.14255699597675459, |
| "grad_norm": 1.0853274840023213, |
| "learning_rate": 5.733672680438124e-07, |
| "loss": 1.6885, |
| "step": 1063 |
| }, |
| { |
| "epoch": 0.1426911041573536, |
| "grad_norm": 1.0820811488797877, |
| "learning_rate": 5.73317978329986e-07, |
| "loss": 1.7995, |
| "step": 1064 |
| }, |
| { |
| "epoch": 0.14282521233795262, |
| "grad_norm": 1.1295149364952306, |
| "learning_rate": 5.732686454183087e-07, |
| "loss": 1.6925, |
| "step": 1065 |
| }, |
| { |
| "epoch": 0.14295932051855162, |
| "grad_norm": 1.057888764325057, |
| "learning_rate": 5.732192693175391e-07, |
| "loss": 1.6412, |
| "step": 1066 |
| }, |
| { |
| "epoch": 0.14309342869915065, |
| "grad_norm": 1.098616962497695, |
| "learning_rate": 5.731698500364434e-07, |
| "loss": 1.6271, |
| "step": 1067 |
| }, |
| { |
| "epoch": 0.14322753687974968, |
| "grad_norm": 1.2745609637830848, |
| "learning_rate": 5.731203875837949e-07, |
| "loss": 1.671, |
| "step": 1068 |
| }, |
| { |
| "epoch": 0.14336164506034868, |
| "grad_norm": 1.120730846705753, |
| "learning_rate": 5.730708819683753e-07, |
| "loss": 1.7433, |
| "step": 1069 |
| }, |
| { |
| "epoch": 0.1434957532409477, |
| "grad_norm": 1.1177693123454027, |
| "learning_rate": 5.730213331989736e-07, |
| "loss": 1.7291, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.1436298614215467, |
| "grad_norm": 1.0910765331643333, |
| "learning_rate": 5.729717412843866e-07, |
| "loss": 1.6739, |
| "step": 1071 |
| }, |
| { |
| "epoch": 0.14376396960214574, |
| "grad_norm": 1.1741168573690484, |
| "learning_rate": 5.729221062334186e-07, |
| "loss": 1.7401, |
| "step": 1072 |
| }, |
| { |
| "epoch": 0.14389807778274474, |
| "grad_norm": 1.2230565196681809, |
| "learning_rate": 5.728724280548815e-07, |
| "loss": 1.6466, |
| "step": 1073 |
| }, |
| { |
| "epoch": 0.14403218596334377, |
| "grad_norm": 1.075125807457348, |
| "learning_rate": 5.728227067575953e-07, |
| "loss": 1.6632, |
| "step": 1074 |
| }, |
| { |
| "epoch": 0.14416629414394277, |
| "grad_norm": 1.0629310683077087, |
| "learning_rate": 5.727729423503871e-07, |
| "loss": 1.6456, |
| "step": 1075 |
| }, |
| { |
| "epoch": 0.1443004023245418, |
| "grad_norm": 1.131277162697691, |
| "learning_rate": 5.72723134842092e-07, |
| "loss": 1.7069, |
| "step": 1076 |
| }, |
| { |
| "epoch": 0.1444345105051408, |
| "grad_norm": 1.4319225703993534, |
| "learning_rate": 5.726732842415527e-07, |
| "loss": 1.7104, |
| "step": 1077 |
| }, |
| { |
| "epoch": 0.14456861868573984, |
| "grad_norm": 1.1218543441609072, |
| "learning_rate": 5.726233905576194e-07, |
| "loss": 1.8235, |
| "step": 1078 |
| }, |
| { |
| "epoch": 0.14470272686633884, |
| "grad_norm": 1.0682688173779038, |
| "learning_rate": 5.725734537991502e-07, |
| "loss": 1.7334, |
| "step": 1079 |
| }, |
| { |
| "epoch": 0.14483683504693787, |
| "grad_norm": 1.0513899411618064, |
| "learning_rate": 5.725234739750106e-07, |
| "loss": 1.564, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.14497094322753687, |
| "grad_norm": 1.073556864405118, |
| "learning_rate": 5.724734510940738e-07, |
| "loss": 1.6191, |
| "step": 1081 |
| }, |
| { |
| "epoch": 0.1451050514081359, |
| "grad_norm": 1.1272658425201874, |
| "learning_rate": 5.724233851652208e-07, |
| "loss": 1.5812, |
| "step": 1082 |
| }, |
| { |
| "epoch": 0.1452391595887349, |
| "grad_norm": 1.1649864304286308, |
| "learning_rate": 5.723732761973399e-07, |
| "loss": 1.7974, |
| "step": 1083 |
| }, |
| { |
| "epoch": 0.14537326776933393, |
| "grad_norm": 1.1842565824330795, |
| "learning_rate": 5.723231241993277e-07, |
| "loss": 1.642, |
| "step": 1084 |
| }, |
| { |
| "epoch": 0.14550737594993293, |
| "grad_norm": 1.1226873500626315, |
| "learning_rate": 5.722729291800877e-07, |
| "loss": 1.648, |
| "step": 1085 |
| }, |
| { |
| "epoch": 0.14564148413053196, |
| "grad_norm": 1.074175742058312, |
| "learning_rate": 5.722226911485315e-07, |
| "loss": 1.6477, |
| "step": 1086 |
| }, |
| { |
| "epoch": 0.145775592311131, |
| "grad_norm": 1.6414796585857712, |
| "learning_rate": 5.721724101135781e-07, |
| "loss": 1.6099, |
| "step": 1087 |
| }, |
| { |
| "epoch": 0.14590970049173, |
| "grad_norm": 1.1490676419596029, |
| "learning_rate": 5.721220860841543e-07, |
| "loss": 1.5671, |
| "step": 1088 |
| }, |
| { |
| "epoch": 0.14604380867232902, |
| "grad_norm": 1.0434774110585503, |
| "learning_rate": 5.720717190691943e-07, |
| "loss": 1.6001, |
| "step": 1089 |
| }, |
| { |
| "epoch": 0.14617791685292802, |
| "grad_norm": 1.0806260779363936, |
| "learning_rate": 5.720213090776403e-07, |
| "loss": 1.7541, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.14631202503352705, |
| "grad_norm": 1.1814630509058974, |
| "learning_rate": 5.719708561184417e-07, |
| "loss": 1.6864, |
| "step": 1091 |
| }, |
| { |
| "epoch": 0.14644613321412606, |
| "grad_norm": 1.0965207690798646, |
| "learning_rate": 5.719203602005559e-07, |
| "loss": 1.7179, |
| "step": 1092 |
| }, |
| { |
| "epoch": 0.14658024139472509, |
| "grad_norm": 1.187634257937833, |
| "learning_rate": 5.718698213329479e-07, |
| "loss": 1.5889, |
| "step": 1093 |
| }, |
| { |
| "epoch": 0.1467143495753241, |
| "grad_norm": 1.151719981823989, |
| "learning_rate": 5.718192395245899e-07, |
| "loss": 1.6503, |
| "step": 1094 |
| }, |
| { |
| "epoch": 0.14684845775592312, |
| "grad_norm": 1.0407283688373252, |
| "learning_rate": 5.717686147844622e-07, |
| "loss": 1.5976, |
| "step": 1095 |
| }, |
| { |
| "epoch": 0.14698256593652212, |
| "grad_norm": 1.0743575974553181, |
| "learning_rate": 5.717179471215527e-07, |
| "loss": 1.7028, |
| "step": 1096 |
| }, |
| { |
| "epoch": 0.14711667411712115, |
| "grad_norm": 1.080606301144591, |
| "learning_rate": 5.716672365448564e-07, |
| "loss": 1.6827, |
| "step": 1097 |
| }, |
| { |
| "epoch": 0.14725078229772015, |
| "grad_norm": 1.0807596555370267, |
| "learning_rate": 5.716164830633764e-07, |
| "loss": 1.6778, |
| "step": 1098 |
| }, |
| { |
| "epoch": 0.14738489047831918, |
| "grad_norm": 1.1284745845133346, |
| "learning_rate": 5.715656866861234e-07, |
| "loss": 1.6209, |
| "step": 1099 |
| }, |
| { |
| "epoch": 0.14751899865891818, |
| "grad_norm": 0.989581549531516, |
| "learning_rate": 5.715148474221156e-07, |
| "loss": 1.5879, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.1476531068395172, |
| "grad_norm": 1.1254043833078187, |
| "learning_rate": 5.714639652803788e-07, |
| "loss": 1.6834, |
| "step": 1101 |
| }, |
| { |
| "epoch": 0.1477872150201162, |
| "grad_norm": 1.0789006249002853, |
| "learning_rate": 5.714130402699465e-07, |
| "loss": 1.6314, |
| "step": 1102 |
| }, |
| { |
| "epoch": 0.14792132320071524, |
| "grad_norm": 1.0792687942782158, |
| "learning_rate": 5.713620723998597e-07, |
| "loss": 1.7229, |
| "step": 1103 |
| }, |
| { |
| "epoch": 0.14805543138131427, |
| "grad_norm": 1.1190452519207015, |
| "learning_rate": 5.71311061679167e-07, |
| "loss": 1.6851, |
| "step": 1104 |
| }, |
| { |
| "epoch": 0.14818953956191327, |
| "grad_norm": 1.1240598043365235, |
| "learning_rate": 5.712600081169248e-07, |
| "loss": 1.6486, |
| "step": 1105 |
| }, |
| { |
| "epoch": 0.1483236477425123, |
| "grad_norm": 1.110168533453958, |
| "learning_rate": 5.71208911722197e-07, |
| "loss": 1.651, |
| "step": 1106 |
| }, |
| { |
| "epoch": 0.1484577559231113, |
| "grad_norm": 1.0688369448448625, |
| "learning_rate": 5.71157772504055e-07, |
| "loss": 1.709, |
| "step": 1107 |
| }, |
| { |
| "epoch": 0.14859186410371034, |
| "grad_norm": 1.1187107525701387, |
| "learning_rate": 5.711065904715777e-07, |
| "loss": 1.7167, |
| "step": 1108 |
| }, |
| { |
| "epoch": 0.14872597228430934, |
| "grad_norm": 1.1397259364080825, |
| "learning_rate": 5.710553656338521e-07, |
| "loss": 1.6975, |
| "step": 1109 |
| }, |
| { |
| "epoch": 0.14886008046490837, |
| "grad_norm": 1.1590128512082682, |
| "learning_rate": 5.710040979999723e-07, |
| "loss": 1.7414, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.14899418864550737, |
| "grad_norm": 1.167811852838392, |
| "learning_rate": 5.709527875790403e-07, |
| "loss": 1.6626, |
| "step": 1111 |
| }, |
| { |
| "epoch": 0.1491282968261064, |
| "grad_norm": 1.0973271552840278, |
| "learning_rate": 5.709014343801655e-07, |
| "loss": 1.6324, |
| "step": 1112 |
| }, |
| { |
| "epoch": 0.1492624050067054, |
| "grad_norm": 1.3487898998822019, |
| "learning_rate": 5.708500384124648e-07, |
| "loss": 1.6641, |
| "step": 1113 |
| }, |
| { |
| "epoch": 0.14939651318730443, |
| "grad_norm": 1.173261054584497, |
| "learning_rate": 5.707985996850633e-07, |
| "loss": 1.6297, |
| "step": 1114 |
| }, |
| { |
| "epoch": 0.14953062136790343, |
| "grad_norm": 1.056190301936881, |
| "learning_rate": 5.707471182070929e-07, |
| "loss": 1.7222, |
| "step": 1115 |
| }, |
| { |
| "epoch": 0.14966472954850246, |
| "grad_norm": 1.0543304581404804, |
| "learning_rate": 5.706955939876936e-07, |
| "loss": 1.6486, |
| "step": 1116 |
| }, |
| { |
| "epoch": 0.14979883772910146, |
| "grad_norm": 1.0951287089797115, |
| "learning_rate": 5.706440270360128e-07, |
| "loss": 1.6158, |
| "step": 1117 |
| }, |
| { |
| "epoch": 0.1499329459097005, |
| "grad_norm": 1.1191851976325244, |
| "learning_rate": 5.705924173612055e-07, |
| "loss": 1.7315, |
| "step": 1118 |
| }, |
| { |
| "epoch": 0.1500670540902995, |
| "grad_norm": 1.0577825904689977, |
| "learning_rate": 5.705407649724343e-07, |
| "loss": 1.6935, |
| "step": 1119 |
| }, |
| { |
| "epoch": 0.15020116227089853, |
| "grad_norm": 1.056299942663864, |
| "learning_rate": 5.704890698788693e-07, |
| "loss": 1.628, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.15033527045149755, |
| "grad_norm": 1.1590721147664085, |
| "learning_rate": 5.704373320896886e-07, |
| "loss": 1.6249, |
| "step": 1121 |
| }, |
| { |
| "epoch": 0.15046937863209656, |
| "grad_norm": 1.1117527447235374, |
| "learning_rate": 5.703855516140773e-07, |
| "loss": 1.7004, |
| "step": 1122 |
| }, |
| { |
| "epoch": 0.1506034868126956, |
| "grad_norm": 1.1049104937281078, |
| "learning_rate": 5.703337284612283e-07, |
| "loss": 1.6377, |
| "step": 1123 |
| }, |
| { |
| "epoch": 0.1507375949932946, |
| "grad_norm": 1.59710670500923, |
| "learning_rate": 5.702818626403422e-07, |
| "loss": 1.6834, |
| "step": 1124 |
| }, |
| { |
| "epoch": 0.15087170317389362, |
| "grad_norm": 1.0967048039417424, |
| "learning_rate": 5.702299541606271e-07, |
| "loss": 1.7351, |
| "step": 1125 |
| }, |
| { |
| "epoch": 0.15100581135449262, |
| "grad_norm": 1.0979605765370022, |
| "learning_rate": 5.701780030312985e-07, |
| "loss": 1.6961, |
| "step": 1126 |
| }, |
| { |
| "epoch": 0.15113991953509165, |
| "grad_norm": 1.0799636277645253, |
| "learning_rate": 5.701260092615798e-07, |
| "loss": 1.6698, |
| "step": 1127 |
| }, |
| { |
| "epoch": 0.15127402771569065, |
| "grad_norm": 1.0680391383117414, |
| "learning_rate": 5.700739728607018e-07, |
| "loss": 1.6337, |
| "step": 1128 |
| }, |
| { |
| "epoch": 0.15140813589628968, |
| "grad_norm": 1.1265492196116744, |
| "learning_rate": 5.700218938379027e-07, |
| "loss": 1.758, |
| "step": 1129 |
| }, |
| { |
| "epoch": 0.15154224407688868, |
| "grad_norm": 1.1871181924509882, |
| "learning_rate": 5.699697722024286e-07, |
| "loss": 1.7564, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.1516763522574877, |
| "grad_norm": 1.0181987331367963, |
| "learning_rate": 5.69917607963533e-07, |
| "loss": 1.5776, |
| "step": 1131 |
| }, |
| { |
| "epoch": 0.15181046043808671, |
| "grad_norm": 1.1284590442586029, |
| "learning_rate": 5.698654011304768e-07, |
| "loss": 1.6984, |
| "step": 1132 |
| }, |
| { |
| "epoch": 0.15194456861868574, |
| "grad_norm": 1.2930521652564555, |
| "learning_rate": 5.698131517125288e-07, |
| "loss": 1.6334, |
| "step": 1133 |
| }, |
| { |
| "epoch": 0.15207867679928475, |
| "grad_norm": 1.117570312123897, |
| "learning_rate": 5.697608597189651e-07, |
| "loss": 1.6531, |
| "step": 1134 |
| }, |
| { |
| "epoch": 0.15221278497988378, |
| "grad_norm": 1.4856967946676458, |
| "learning_rate": 5.697085251590694e-07, |
| "loss": 1.6406, |
| "step": 1135 |
| }, |
| { |
| "epoch": 0.15234689316048278, |
| "grad_norm": 1.1601905755705224, |
| "learning_rate": 5.696561480421331e-07, |
| "loss": 1.6839, |
| "step": 1136 |
| }, |
| { |
| "epoch": 0.1524810013410818, |
| "grad_norm": 1.1233822318963709, |
| "learning_rate": 5.696037283774549e-07, |
| "loss": 1.6607, |
| "step": 1137 |
| }, |
| { |
| "epoch": 0.15261510952168084, |
| "grad_norm": 1.1742187355064484, |
| "learning_rate": 5.695512661743415e-07, |
| "loss": 1.6646, |
| "step": 1138 |
| }, |
| { |
| "epoch": 0.15274921770227984, |
| "grad_norm": 1.086363990541314, |
| "learning_rate": 5.694987614421066e-07, |
| "loss": 1.6739, |
| "step": 1139 |
| }, |
| { |
| "epoch": 0.15288332588287887, |
| "grad_norm": 1.194737878034564, |
| "learning_rate": 5.694462141900719e-07, |
| "loss": 1.6835, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.15301743406347787, |
| "grad_norm": 1.1598758612040898, |
| "learning_rate": 5.693936244275662e-07, |
| "loss": 1.6587, |
| "step": 1141 |
| }, |
| { |
| "epoch": 0.1531515422440769, |
| "grad_norm": 1.1381348609460207, |
| "learning_rate": 5.693409921639263e-07, |
| "loss": 1.7111, |
| "step": 1142 |
| }, |
| { |
| "epoch": 0.1532856504246759, |
| "grad_norm": 1.0954642701505761, |
| "learning_rate": 5.692883174084963e-07, |
| "loss": 1.6453, |
| "step": 1143 |
| }, |
| { |
| "epoch": 0.15341975860527493, |
| "grad_norm": 1.181240368838665, |
| "learning_rate": 5.69235600170628e-07, |
| "loss": 1.7074, |
| "step": 1144 |
| }, |
| { |
| "epoch": 0.15355386678587393, |
| "grad_norm": 1.0848362523541808, |
| "learning_rate": 5.691828404596804e-07, |
| "loss": 1.7188, |
| "step": 1145 |
| }, |
| { |
| "epoch": 0.15368797496647296, |
| "grad_norm": 1.0976088776241693, |
| "learning_rate": 5.691300382850205e-07, |
| "loss": 1.6133, |
| "step": 1146 |
| }, |
| { |
| "epoch": 0.15382208314707196, |
| "grad_norm": 1.1535833554516768, |
| "learning_rate": 5.690771936560228e-07, |
| "loss": 1.6823, |
| "step": 1147 |
| }, |
| { |
| "epoch": 0.153956191327671, |
| "grad_norm": 1.1763699702630221, |
| "learning_rate": 5.690243065820687e-07, |
| "loss": 1.692, |
| "step": 1148 |
| }, |
| { |
| "epoch": 0.15409029950827, |
| "grad_norm": 1.0627345607622845, |
| "learning_rate": 5.689713770725477e-07, |
| "loss": 1.5961, |
| "step": 1149 |
| }, |
| { |
| "epoch": 0.15422440768886903, |
| "grad_norm": 1.0792270716448427, |
| "learning_rate": 5.689184051368572e-07, |
| "loss": 1.64, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.15435851586946803, |
| "grad_norm": 1.0247043986886288, |
| "learning_rate": 5.688653907844009e-07, |
| "loss": 1.5285, |
| "step": 1151 |
| }, |
| { |
| "epoch": 0.15449262405006706, |
| "grad_norm": 1.07857428312717, |
| "learning_rate": 5.688123340245914e-07, |
| "loss": 1.6444, |
| "step": 1152 |
| }, |
| { |
| "epoch": 0.15462673223066606, |
| "grad_norm": 1.0930284133542458, |
| "learning_rate": 5.687592348668479e-07, |
| "loss": 1.6882, |
| "step": 1153 |
| }, |
| { |
| "epoch": 0.1547608404112651, |
| "grad_norm": 1.0484076712069612, |
| "learning_rate": 5.687060933205976e-07, |
| "loss": 1.5796, |
| "step": 1154 |
| }, |
| { |
| "epoch": 0.1548949485918641, |
| "grad_norm": 1.1209018475352952, |
| "learning_rate": 5.686529093952749e-07, |
| "loss": 1.702, |
| "step": 1155 |
| }, |
| { |
| "epoch": 0.15502905677246312, |
| "grad_norm": 1.084792074670866, |
| "learning_rate": 5.685996831003221e-07, |
| "loss": 1.6856, |
| "step": 1156 |
| }, |
| { |
| "epoch": 0.15516316495306215, |
| "grad_norm": 1.081652083067762, |
| "learning_rate": 5.685464144451888e-07, |
| "loss": 1.6781, |
| "step": 1157 |
| }, |
| { |
| "epoch": 0.15529727313366115, |
| "grad_norm": 1.2019370572090728, |
| "learning_rate": 5.684931034393319e-07, |
| "loss": 1.6854, |
| "step": 1158 |
| }, |
| { |
| "epoch": 0.15543138131426018, |
| "grad_norm": 1.1546384235930545, |
| "learning_rate": 5.684397500922163e-07, |
| "loss": 1.5995, |
| "step": 1159 |
| }, |
| { |
| "epoch": 0.15556548949485918, |
| "grad_norm": 1.0806139711906346, |
| "learning_rate": 5.68386354413314e-07, |
| "loss": 1.6043, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.1556995976754582, |
| "grad_norm": 1.1695139264738694, |
| "learning_rate": 5.683329164121049e-07, |
| "loss": 1.6565, |
| "step": 1161 |
| }, |
| { |
| "epoch": 0.15583370585605721, |
| "grad_norm": 1.1082458941671236, |
| "learning_rate": 5.682794360980761e-07, |
| "loss": 1.6997, |
| "step": 1162 |
| }, |
| { |
| "epoch": 0.15596781403665624, |
| "grad_norm": 1.171803562739694, |
| "learning_rate": 5.682259134807222e-07, |
| "loss": 1.5452, |
| "step": 1163 |
| }, |
| { |
| "epoch": 0.15610192221725525, |
| "grad_norm": 1.0813601117636722, |
| "learning_rate": 5.681723485695456e-07, |
| "loss": 1.6468, |
| "step": 1164 |
| }, |
| { |
| "epoch": 0.15623603039785428, |
| "grad_norm": 1.0850091737441245, |
| "learning_rate": 5.681187413740558e-07, |
| "loss": 1.6521, |
| "step": 1165 |
| }, |
| { |
| "epoch": 0.15637013857845328, |
| "grad_norm": 1.0888617126493352, |
| "learning_rate": 5.680650919037703e-07, |
| "loss": 1.6318, |
| "step": 1166 |
| }, |
| { |
| "epoch": 0.1565042467590523, |
| "grad_norm": 1.0832051131221956, |
| "learning_rate": 5.680114001682137e-07, |
| "loss": 1.6244, |
| "step": 1167 |
| }, |
| { |
| "epoch": 0.1566383549396513, |
| "grad_norm": 1.1345011329722676, |
| "learning_rate": 5.679576661769184e-07, |
| "loss": 1.6903, |
| "step": 1168 |
| }, |
| { |
| "epoch": 0.15677246312025034, |
| "grad_norm": 1.0989237696533585, |
| "learning_rate": 5.679038899394239e-07, |
| "loss": 1.748, |
| "step": 1169 |
| }, |
| { |
| "epoch": 0.15690657130084934, |
| "grad_norm": 1.0586060818560636, |
| "learning_rate": 5.678500714652776e-07, |
| "loss": 1.7243, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.15704067948144837, |
| "grad_norm": 1.1184535612835667, |
| "learning_rate": 5.677962107640342e-07, |
| "loss": 1.6538, |
| "step": 1171 |
| }, |
| { |
| "epoch": 0.15717478766204737, |
| "grad_norm": 1.0607792312898765, |
| "learning_rate": 5.677423078452561e-07, |
| "loss": 1.6324, |
| "step": 1172 |
| }, |
| { |
| "epoch": 0.1573088958426464, |
| "grad_norm": 1.0442851907949064, |
| "learning_rate": 5.676883627185129e-07, |
| "loss": 1.6818, |
| "step": 1173 |
| }, |
| { |
| "epoch": 0.15744300402324543, |
| "grad_norm": 1.0805916545031482, |
| "learning_rate": 5.676343753933818e-07, |
| "loss": 1.6477, |
| "step": 1174 |
| }, |
| { |
| "epoch": 0.15757711220384443, |
| "grad_norm": 1.055305047370012, |
| "learning_rate": 5.675803458794477e-07, |
| "loss": 1.675, |
| "step": 1175 |
| }, |
| { |
| "epoch": 0.15771122038444346, |
| "grad_norm": 1.1317344965112557, |
| "learning_rate": 5.675262741863026e-07, |
| "loss": 1.6195, |
| "step": 1176 |
| }, |
| { |
| "epoch": 0.15784532856504246, |
| "grad_norm": 1.0677408822746999, |
| "learning_rate": 5.674721603235462e-07, |
| "loss": 1.673, |
| "step": 1177 |
| }, |
| { |
| "epoch": 0.1579794367456415, |
| "grad_norm": 1.1173608676015656, |
| "learning_rate": 5.67418004300786e-07, |
| "loss": 1.704, |
| "step": 1178 |
| }, |
| { |
| "epoch": 0.1581135449262405, |
| "grad_norm": 1.056889330893961, |
| "learning_rate": 5.673638061276364e-07, |
| "loss": 1.6232, |
| "step": 1179 |
| }, |
| { |
| "epoch": 0.15824765310683953, |
| "grad_norm": 1.1175288057488566, |
| "learning_rate": 5.673095658137197e-07, |
| "loss": 1.7439, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.15838176128743853, |
| "grad_norm": 1.1363903828654547, |
| "learning_rate": 5.672552833686654e-07, |
| "loss": 1.6943, |
| "step": 1181 |
| }, |
| { |
| "epoch": 0.15851586946803756, |
| "grad_norm": 1.0761526122635945, |
| "learning_rate": 5.672009588021108e-07, |
| "loss": 1.6178, |
| "step": 1182 |
| }, |
| { |
| "epoch": 0.15864997764863656, |
| "grad_norm": 1.0868039624863182, |
| "learning_rate": 5.671465921237003e-07, |
| "loss": 1.7295, |
| "step": 1183 |
| }, |
| { |
| "epoch": 0.1587840858292356, |
| "grad_norm": 1.5375983527794888, |
| "learning_rate": 5.670921833430861e-07, |
| "loss": 1.5868, |
| "step": 1184 |
| }, |
| { |
| "epoch": 0.1589181940098346, |
| "grad_norm": 1.1761526374271758, |
| "learning_rate": 5.670377324699277e-07, |
| "loss": 1.6585, |
| "step": 1185 |
| }, |
| { |
| "epoch": 0.15905230219043362, |
| "grad_norm": 1.0911545993652647, |
| "learning_rate": 5.669832395138923e-07, |
| "loss": 1.6849, |
| "step": 1186 |
| }, |
| { |
| "epoch": 0.15918641037103262, |
| "grad_norm": 1.0517360680747312, |
| "learning_rate": 5.669287044846542e-07, |
| "loss": 1.7081, |
| "step": 1187 |
| }, |
| { |
| "epoch": 0.15932051855163165, |
| "grad_norm": 1.0460736006845528, |
| "learning_rate": 5.668741273918952e-07, |
| "loss": 1.6946, |
| "step": 1188 |
| }, |
| { |
| "epoch": 0.15945462673223065, |
| "grad_norm": 1.1057544457050006, |
| "learning_rate": 5.668195082453052e-07, |
| "loss": 1.6648, |
| "step": 1189 |
| }, |
| { |
| "epoch": 0.15958873491282968, |
| "grad_norm": 1.290894867238456, |
| "learning_rate": 5.667648470545808e-07, |
| "loss": 1.6921, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.1597228430934287, |
| "grad_norm": 1.2497492674256703, |
| "learning_rate": 5.667101438294264e-07, |
| "loss": 1.7095, |
| "step": 1191 |
| }, |
| { |
| "epoch": 0.15985695127402771, |
| "grad_norm": 1.1080523067750003, |
| "learning_rate": 5.666553985795538e-07, |
| "loss": 1.6313, |
| "step": 1192 |
| }, |
| { |
| "epoch": 0.15999105945462674, |
| "grad_norm": 1.0983444417697228, |
| "learning_rate": 5.666006113146823e-07, |
| "loss": 1.6836, |
| "step": 1193 |
| }, |
| { |
| "epoch": 0.16012516763522575, |
| "grad_norm": 1.1242609644362185, |
| "learning_rate": 5.665457820445387e-07, |
| "loss": 1.6522, |
| "step": 1194 |
| }, |
| { |
| "epoch": 0.16025927581582478, |
| "grad_norm": 1.1033082182518592, |
| "learning_rate": 5.664909107788571e-07, |
| "loss": 1.6958, |
| "step": 1195 |
| }, |
| { |
| "epoch": 0.16039338399642378, |
| "grad_norm": 1.1353654965954614, |
| "learning_rate": 5.664359975273792e-07, |
| "loss": 1.6604, |
| "step": 1196 |
| }, |
| { |
| "epoch": 0.1605274921770228, |
| "grad_norm": 1.1259316457840236, |
| "learning_rate": 5.663810422998543e-07, |
| "loss": 1.7241, |
| "step": 1197 |
| }, |
| { |
| "epoch": 0.1606616003576218, |
| "grad_norm": 1.0922411903046598, |
| "learning_rate": 5.663260451060388e-07, |
| "loss": 1.6432, |
| "step": 1198 |
| }, |
| { |
| "epoch": 0.16079570853822084, |
| "grad_norm": 1.0707962447880088, |
| "learning_rate": 5.662710059556966e-07, |
| "loss": 1.6666, |
| "step": 1199 |
| }, |
| { |
| "epoch": 0.16092981671881984, |
| "grad_norm": 1.0837296784325723, |
| "learning_rate": 5.662159248585993e-07, |
| "loss": 1.6965, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.16106392489941887, |
| "grad_norm": 1.0703824186490674, |
| "learning_rate": 5.66160801824526e-07, |
| "loss": 1.7293, |
| "step": 1201 |
| }, |
| { |
| "epoch": 0.16119803308001787, |
| "grad_norm": 1.095076268284643, |
| "learning_rate": 5.661056368632625e-07, |
| "loss": 1.6433, |
| "step": 1202 |
| }, |
| { |
| "epoch": 0.1613321412606169, |
| "grad_norm": 1.0622058510882262, |
| "learning_rate": 5.660504299846032e-07, |
| "loss": 1.6237, |
| "step": 1203 |
| }, |
| { |
| "epoch": 0.1614662494412159, |
| "grad_norm": 1.0981636682859879, |
| "learning_rate": 5.65995181198349e-07, |
| "loss": 1.8076, |
| "step": 1204 |
| }, |
| { |
| "epoch": 0.16160035762181493, |
| "grad_norm": 1.1393139443072446, |
| "learning_rate": 5.659398905143088e-07, |
| "loss": 1.7572, |
| "step": 1205 |
| }, |
| { |
| "epoch": 0.16173446580241393, |
| "grad_norm": 1.0960864805053374, |
| "learning_rate": 5.658845579422985e-07, |
| "loss": 1.6836, |
| "step": 1206 |
| }, |
| { |
| "epoch": 0.16186857398301296, |
| "grad_norm": 1.0536699550048987, |
| "learning_rate": 5.658291834921417e-07, |
| "loss": 1.6933, |
| "step": 1207 |
| }, |
| { |
| "epoch": 0.162002682163612, |
| "grad_norm": 1.1996669047917732, |
| "learning_rate": 5.657737671736696e-07, |
| "loss": 1.6405, |
| "step": 1208 |
| }, |
| { |
| "epoch": 0.162136790344211, |
| "grad_norm": 1.10569454454835, |
| "learning_rate": 5.657183089967204e-07, |
| "loss": 1.5797, |
| "step": 1209 |
| }, |
| { |
| "epoch": 0.16227089852481003, |
| "grad_norm": 1.2803251145710948, |
| "learning_rate": 5.6566280897114e-07, |
| "loss": 1.6207, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.16240500670540903, |
| "grad_norm": 1.048684333970024, |
| "learning_rate": 5.656072671067818e-07, |
| "loss": 1.5924, |
| "step": 1211 |
| }, |
| { |
| "epoch": 0.16253911488600806, |
| "grad_norm": 1.0612522875516415, |
| "learning_rate": 5.655516834135063e-07, |
| "loss": 1.5299, |
| "step": 1212 |
| }, |
| { |
| "epoch": 0.16267322306660706, |
| "grad_norm": 1.0932249588392913, |
| "learning_rate": 5.65496057901182e-07, |
| "loss": 1.6653, |
| "step": 1213 |
| }, |
| { |
| "epoch": 0.1628073312472061, |
| "grad_norm": 1.0734042304698213, |
| "learning_rate": 5.65440390579684e-07, |
| "loss": 1.5442, |
| "step": 1214 |
| }, |
| { |
| "epoch": 0.1629414394278051, |
| "grad_norm": 1.1189271058187575, |
| "learning_rate": 5.653846814588957e-07, |
| "loss": 1.6881, |
| "step": 1215 |
| }, |
| { |
| "epoch": 0.16307554760840412, |
| "grad_norm": 1.1589238023336688, |
| "learning_rate": 5.653289305487072e-07, |
| "loss": 1.7461, |
| "step": 1216 |
| }, |
| { |
| "epoch": 0.16320965578900312, |
| "grad_norm": 1.02665461506197, |
| "learning_rate": 5.652731378590166e-07, |
| "loss": 1.6576, |
| "step": 1217 |
| }, |
| { |
| "epoch": 0.16334376396960215, |
| "grad_norm": 1.1444702149064363, |
| "learning_rate": 5.65217303399729e-07, |
| "loss": 1.6162, |
| "step": 1218 |
| }, |
| { |
| "epoch": 0.16347787215020115, |
| "grad_norm": 1.1311619335366723, |
| "learning_rate": 5.65161427180757e-07, |
| "loss": 1.6957, |
| "step": 1219 |
| }, |
| { |
| "epoch": 0.16361198033080018, |
| "grad_norm": 1.0555386995041562, |
| "learning_rate": 5.651055092120208e-07, |
| "loss": 1.7145, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.16374608851139918, |
| "grad_norm": 1.189321876945114, |
| "learning_rate": 5.650495495034477e-07, |
| "loss": 1.698, |
| "step": 1221 |
| }, |
| { |
| "epoch": 0.16388019669199821, |
| "grad_norm": 1.084782331393969, |
| "learning_rate": 5.649935480649729e-07, |
| "loss": 1.6739, |
| "step": 1222 |
| }, |
| { |
| "epoch": 0.16401430487259722, |
| "grad_norm": 1.1283603135723947, |
| "learning_rate": 5.649375049065386e-07, |
| "loss": 1.752, |
| "step": 1223 |
| }, |
| { |
| "epoch": 0.16414841305319625, |
| "grad_norm": 1.11896193815645, |
| "learning_rate": 5.648814200380943e-07, |
| "loss": 1.6303, |
| "step": 1224 |
| }, |
| { |
| "epoch": 0.16428252123379525, |
| "grad_norm": 1.067115391566694, |
| "learning_rate": 5.648252934695973e-07, |
| "loss": 1.6735, |
| "step": 1225 |
| }, |
| { |
| "epoch": 0.16441662941439428, |
| "grad_norm": 1.0804557718519556, |
| "learning_rate": 5.64769125211012e-07, |
| "loss": 1.6247, |
| "step": 1226 |
| }, |
| { |
| "epoch": 0.1645507375949933, |
| "grad_norm": 1.0059736180266399, |
| "learning_rate": 5.647129152723106e-07, |
| "loss": 1.5354, |
| "step": 1227 |
| }, |
| { |
| "epoch": 0.1646848457755923, |
| "grad_norm": 1.0770670756683223, |
| "learning_rate": 5.646566636634721e-07, |
| "loss": 1.6768, |
| "step": 1228 |
| }, |
| { |
| "epoch": 0.16481895395619134, |
| "grad_norm": 1.0638623481159848, |
| "learning_rate": 5.646003703944834e-07, |
| "loss": 1.6413, |
| "step": 1229 |
| }, |
| { |
| "epoch": 0.16495306213679034, |
| "grad_norm": 1.0839631787802386, |
| "learning_rate": 5.645440354753386e-07, |
| "loss": 1.6411, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.16508717031738937, |
| "grad_norm": 1.1589896172936287, |
| "learning_rate": 5.644876589160391e-07, |
| "loss": 1.6042, |
| "step": 1231 |
| }, |
| { |
| "epoch": 0.16522127849798837, |
| "grad_norm": 1.1160410996742565, |
| "learning_rate": 5.644312407265939e-07, |
| "loss": 1.6573, |
| "step": 1232 |
| }, |
| { |
| "epoch": 0.1653553866785874, |
| "grad_norm": 1.4171454379604909, |
| "learning_rate": 5.643747809170193e-07, |
| "loss": 1.6332, |
| "step": 1233 |
| }, |
| { |
| "epoch": 0.1654894948591864, |
| "grad_norm": 1.0531642470485152, |
| "learning_rate": 5.643182794973391e-07, |
| "loss": 1.6602, |
| "step": 1234 |
| }, |
| { |
| "epoch": 0.16562360303978543, |
| "grad_norm": 1.1086706049405617, |
| "learning_rate": 5.64261736477584e-07, |
| "loss": 1.7038, |
| "step": 1235 |
| }, |
| { |
| "epoch": 0.16575771122038443, |
| "grad_norm": 1.0944161073367153, |
| "learning_rate": 5.642051518677929e-07, |
| "loss": 1.6386, |
| "step": 1236 |
| }, |
| { |
| "epoch": 0.16589181940098346, |
| "grad_norm": 1.0383994077860026, |
| "learning_rate": 5.641485256780112e-07, |
| "loss": 1.6683, |
| "step": 1237 |
| }, |
| { |
| "epoch": 0.16602592758158247, |
| "grad_norm": 1.110409441026267, |
| "learning_rate": 5.640918579182926e-07, |
| "loss": 1.7666, |
| "step": 1238 |
| }, |
| { |
| "epoch": 0.1661600357621815, |
| "grad_norm": 1.062864948914823, |
| "learning_rate": 5.640351485986973e-07, |
| "loss": 1.6995, |
| "step": 1239 |
| }, |
| { |
| "epoch": 0.1662941439427805, |
| "grad_norm": 1.1144719375181737, |
| "learning_rate": 5.639783977292936e-07, |
| "loss": 1.6904, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.16642825212337953, |
| "grad_norm": 1.090081045271864, |
| "learning_rate": 5.639216053201565e-07, |
| "loss": 1.696, |
| "step": 1241 |
| }, |
| { |
| "epoch": 0.16656236030397853, |
| "grad_norm": 1.0630959169468894, |
| "learning_rate": 5.638647713813691e-07, |
| "loss": 1.6521, |
| "step": 1242 |
| }, |
| { |
| "epoch": 0.16669646848457756, |
| "grad_norm": 2.998931919925447, |
| "learning_rate": 5.638078959230211e-07, |
| "loss": 1.706, |
| "step": 1243 |
| }, |
| { |
| "epoch": 0.1668305766651766, |
| "grad_norm": 1.2341388992185853, |
| "learning_rate": 5.637509789552104e-07, |
| "loss": 1.5942, |
| "step": 1244 |
| }, |
| { |
| "epoch": 0.1669646848457756, |
| "grad_norm": 1.1027382262588608, |
| "learning_rate": 5.636940204880415e-07, |
| "loss": 1.6176, |
| "step": 1245 |
| }, |
| { |
| "epoch": 0.16709879302637462, |
| "grad_norm": 1.1453532005308322, |
| "learning_rate": 5.636370205316269e-07, |
| "loss": 1.7051, |
| "step": 1246 |
| }, |
| { |
| "epoch": 0.16723290120697362, |
| "grad_norm": 1.1774692080993565, |
| "learning_rate": 5.63579979096086e-07, |
| "loss": 1.7089, |
| "step": 1247 |
| }, |
| { |
| "epoch": 0.16736700938757265, |
| "grad_norm": 1.05810539274269, |
| "learning_rate": 5.635228961915458e-07, |
| "loss": 1.6353, |
| "step": 1248 |
| }, |
| { |
| "epoch": 0.16750111756817165, |
| "grad_norm": 1.1450836955803443, |
| "learning_rate": 5.634657718281407e-07, |
| "loss": 1.7418, |
| "step": 1249 |
| }, |
| { |
| "epoch": 0.16763522574877068, |
| "grad_norm": 1.125948952992154, |
| "learning_rate": 5.634086060160121e-07, |
| "loss": 1.7343, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.16776933392936969, |
| "grad_norm": 1.069728820008434, |
| "learning_rate": 5.633513987653094e-07, |
| "loss": 1.4826, |
| "step": 1251 |
| }, |
| { |
| "epoch": 0.16790344210996871, |
| "grad_norm": 1.0401896130830024, |
| "learning_rate": 5.632941500861885e-07, |
| "loss": 1.7211, |
| "step": 1252 |
| }, |
| { |
| "epoch": 0.16803755029056772, |
| "grad_norm": 1.09563187676157, |
| "learning_rate": 5.632368599888135e-07, |
| "loss": 1.7378, |
| "step": 1253 |
| }, |
| { |
| "epoch": 0.16817165847116675, |
| "grad_norm": 1.0701481214906692, |
| "learning_rate": 5.631795284833555e-07, |
| "loss": 1.7191, |
| "step": 1254 |
| }, |
| { |
| "epoch": 0.16830576665176575, |
| "grad_norm": 1.2554327805183711, |
| "learning_rate": 5.631221555799927e-07, |
| "loss": 1.6476, |
| "step": 1255 |
| }, |
| { |
| "epoch": 0.16843987483236478, |
| "grad_norm": 1.0867457009428256, |
| "learning_rate": 5.63064741288911e-07, |
| "loss": 1.6594, |
| "step": 1256 |
| }, |
| { |
| "epoch": 0.16857398301296378, |
| "grad_norm": 1.0587419661389497, |
| "learning_rate": 5.630072856203037e-07, |
| "loss": 1.7365, |
| "step": 1257 |
| }, |
| { |
| "epoch": 0.1687080911935628, |
| "grad_norm": 1.0437016123668459, |
| "learning_rate": 5.629497885843712e-07, |
| "loss": 1.6223, |
| "step": 1258 |
| }, |
| { |
| "epoch": 0.1688421993741618, |
| "grad_norm": 1.093304989043814, |
| "learning_rate": 5.628922501913211e-07, |
| "loss": 1.7281, |
| "step": 1259 |
| }, |
| { |
| "epoch": 0.16897630755476084, |
| "grad_norm": 1.0787876584693192, |
| "learning_rate": 5.628346704513689e-07, |
| "loss": 1.7033, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.16911041573535987, |
| "grad_norm": 1.119310868984826, |
| "learning_rate": 5.627770493747369e-07, |
| "loss": 1.6785, |
| "step": 1261 |
| }, |
| { |
| "epoch": 0.16924452391595887, |
| "grad_norm": 1.0543862123255383, |
| "learning_rate": 5.62719386971655e-07, |
| "loss": 1.6329, |
| "step": 1262 |
| }, |
| { |
| "epoch": 0.1693786320965579, |
| "grad_norm": 1.1801974734059986, |
| "learning_rate": 5.626616832523605e-07, |
| "loss": 1.6647, |
| "step": 1263 |
| }, |
| { |
| "epoch": 0.1695127402771569, |
| "grad_norm": 1.0966012840078587, |
| "learning_rate": 5.626039382270977e-07, |
| "loss": 1.7489, |
| "step": 1264 |
| }, |
| { |
| "epoch": 0.16964684845775593, |
| "grad_norm": 1.0464685107772078, |
| "learning_rate": 5.625461519061187e-07, |
| "loss": 1.613, |
| "step": 1265 |
| }, |
| { |
| "epoch": 0.16978095663835494, |
| "grad_norm": 1.1162999981242707, |
| "learning_rate": 5.624883242996825e-07, |
| "loss": 1.6777, |
| "step": 1266 |
| }, |
| { |
| "epoch": 0.16991506481895396, |
| "grad_norm": 1.0848332959906992, |
| "learning_rate": 5.624304554180556e-07, |
| "loss": 1.6708, |
| "step": 1267 |
| }, |
| { |
| "epoch": 0.17004917299955297, |
| "grad_norm": 1.0397576875295036, |
| "learning_rate": 5.623725452715121e-07, |
| "loss": 1.6809, |
| "step": 1268 |
| }, |
| { |
| "epoch": 0.170183281180152, |
| "grad_norm": 1.0775743863836376, |
| "learning_rate": 5.62314593870333e-07, |
| "loss": 1.7068, |
| "step": 1269 |
| }, |
| { |
| "epoch": 0.170317389360751, |
| "grad_norm": 1.1030270698791587, |
| "learning_rate": 5.622566012248068e-07, |
| "loss": 1.7731, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.17045149754135003, |
| "grad_norm": 1.0632600433435002, |
| "learning_rate": 5.621985673452292e-07, |
| "loss": 1.6944, |
| "step": 1271 |
| }, |
| { |
| "epoch": 0.17058560572194903, |
| "grad_norm": 2.354964154428233, |
| "learning_rate": 5.621404922419036e-07, |
| "loss": 1.5583, |
| "step": 1272 |
| }, |
| { |
| "epoch": 0.17071971390254806, |
| "grad_norm": 1.0841684512277456, |
| "learning_rate": 5.620823759251403e-07, |
| "loss": 1.6523, |
| "step": 1273 |
| }, |
| { |
| "epoch": 0.17085382208314706, |
| "grad_norm": 1.1343004749820542, |
| "learning_rate": 5.62024218405257e-07, |
| "loss": 1.6026, |
| "step": 1274 |
| }, |
| { |
| "epoch": 0.1709879302637461, |
| "grad_norm": 1.3571816054618184, |
| "learning_rate": 5.619660196925789e-07, |
| "loss": 1.6434, |
| "step": 1275 |
| }, |
| { |
| "epoch": 0.1711220384443451, |
| "grad_norm": 1.058572028264877, |
| "learning_rate": 5.619077797974385e-07, |
| "loss": 1.6225, |
| "step": 1276 |
| }, |
| { |
| "epoch": 0.17125614662494412, |
| "grad_norm": 1.068136194752418, |
| "learning_rate": 5.618494987301753e-07, |
| "loss": 1.6629, |
| "step": 1277 |
| }, |
| { |
| "epoch": 0.17139025480554315, |
| "grad_norm": 1.2779625791938292, |
| "learning_rate": 5.617911765011364e-07, |
| "loss": 1.6295, |
| "step": 1278 |
| }, |
| { |
| "epoch": 0.17152436298614215, |
| "grad_norm": 1.09073380795014, |
| "learning_rate": 5.617328131206761e-07, |
| "loss": 1.6544, |
| "step": 1279 |
| }, |
| { |
| "epoch": 0.17165847116674118, |
| "grad_norm": 1.0808553452465872, |
| "learning_rate": 5.616744085991562e-07, |
| "loss": 1.6671, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.17179257934734019, |
| "grad_norm": 1.1043939527890692, |
| "learning_rate": 5.616159629469456e-07, |
| "loss": 1.6977, |
| "step": 1281 |
| }, |
| { |
| "epoch": 0.17192668752793921, |
| "grad_norm": 1.0969178723829076, |
| "learning_rate": 5.615574761744202e-07, |
| "loss": 1.7814, |
| "step": 1282 |
| }, |
| { |
| "epoch": 0.17206079570853822, |
| "grad_norm": 1.0619478458391556, |
| "learning_rate": 5.614989482919641e-07, |
| "loss": 1.6899, |
| "step": 1283 |
| }, |
| { |
| "epoch": 0.17219490388913725, |
| "grad_norm": 1.1116637641823053, |
| "learning_rate": 5.614403793099678e-07, |
| "loss": 1.6795, |
| "step": 1284 |
| }, |
| { |
| "epoch": 0.17232901206973625, |
| "grad_norm": 1.1188139751673378, |
| "learning_rate": 5.613817692388295e-07, |
| "loss": 1.6586, |
| "step": 1285 |
| }, |
| { |
| "epoch": 0.17246312025033528, |
| "grad_norm": 1.1092151541540025, |
| "learning_rate": 5.613231180889545e-07, |
| "loss": 1.731, |
| "step": 1286 |
| }, |
| { |
| "epoch": 0.17259722843093428, |
| "grad_norm": 1.0776307968053882, |
| "learning_rate": 5.612644258707557e-07, |
| "loss": 1.639, |
| "step": 1287 |
| }, |
| { |
| "epoch": 0.1727313366115333, |
| "grad_norm": 1.1568418405932983, |
| "learning_rate": 5.612056925946532e-07, |
| "loss": 1.6265, |
| "step": 1288 |
| }, |
| { |
| "epoch": 0.1728654447921323, |
| "grad_norm": 1.1686914549112786, |
| "learning_rate": 5.611469182710741e-07, |
| "loss": 1.5635, |
| "step": 1289 |
| }, |
| { |
| "epoch": 0.17299955297273134, |
| "grad_norm": 1.0798126174498692, |
| "learning_rate": 5.61088102910453e-07, |
| "loss": 1.6009, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.17313366115333034, |
| "grad_norm": 1.0565094574884266, |
| "learning_rate": 5.61029246523232e-07, |
| "loss": 1.6236, |
| "step": 1291 |
| }, |
| { |
| "epoch": 0.17326776933392937, |
| "grad_norm": 1.1580137951907012, |
| "learning_rate": 5.609703491198601e-07, |
| "loss": 1.6664, |
| "step": 1292 |
| }, |
| { |
| "epoch": 0.17340187751452837, |
| "grad_norm": 1.0812242416939941, |
| "learning_rate": 5.609114107107936e-07, |
| "loss": 1.5541, |
| "step": 1293 |
| }, |
| { |
| "epoch": 0.1735359856951274, |
| "grad_norm": 1.0926652109752668, |
| "learning_rate": 5.608524313064966e-07, |
| "loss": 1.6495, |
| "step": 1294 |
| }, |
| { |
| "epoch": 0.1736700938757264, |
| "grad_norm": 1.116001777343314, |
| "learning_rate": 5.607934109174398e-07, |
| "loss": 1.568, |
| "step": 1295 |
| }, |
| { |
| "epoch": 0.17380420205632544, |
| "grad_norm": 1.0742848470460207, |
| "learning_rate": 5.607343495541017e-07, |
| "loss": 1.6815, |
| "step": 1296 |
| }, |
| { |
| "epoch": 0.17393831023692446, |
| "grad_norm": 1.1104040571093063, |
| "learning_rate": 5.606752472269675e-07, |
| "loss": 1.7855, |
| "step": 1297 |
| }, |
| { |
| "epoch": 0.17407241841752347, |
| "grad_norm": 1.1082815736136737, |
| "learning_rate": 5.606161039465304e-07, |
| "loss": 1.5563, |
| "step": 1298 |
| }, |
| { |
| "epoch": 0.1742065265981225, |
| "grad_norm": 1.3426693471935263, |
| "learning_rate": 5.605569197232904e-07, |
| "loss": 1.6382, |
| "step": 1299 |
| }, |
| { |
| "epoch": 0.1743406347787215, |
| "grad_norm": 1.1018630739261308, |
| "learning_rate": 5.604976945677547e-07, |
| "loss": 1.5862, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.17447474295932053, |
| "grad_norm": 1.08258660371521, |
| "learning_rate": 5.604384284904382e-07, |
| "loss": 1.7377, |
| "step": 1301 |
| }, |
| { |
| "epoch": 0.17460885113991953, |
| "grad_norm": 1.0416433850048736, |
| "learning_rate": 5.603791215018626e-07, |
| "loss": 1.6654, |
| "step": 1302 |
| }, |
| { |
| "epoch": 0.17474295932051856, |
| "grad_norm": 1.0585227638311847, |
| "learning_rate": 5.603197736125572e-07, |
| "loss": 1.6259, |
| "step": 1303 |
| }, |
| { |
| "epoch": 0.17487706750111756, |
| "grad_norm": 1.800828493151873, |
| "learning_rate": 5.602603848330582e-07, |
| "loss": 1.6681, |
| "step": 1304 |
| }, |
| { |
| "epoch": 0.1750111756817166, |
| "grad_norm": 1.2442322404337642, |
| "learning_rate": 5.602009551739095e-07, |
| "loss": 1.7388, |
| "step": 1305 |
| }, |
| { |
| "epoch": 0.1751452838623156, |
| "grad_norm": 1.0650536278693077, |
| "learning_rate": 5.60141484645662e-07, |
| "loss": 1.6913, |
| "step": 1306 |
| }, |
| { |
| "epoch": 0.17527939204291462, |
| "grad_norm": 1.0715066374394453, |
| "learning_rate": 5.600819732588738e-07, |
| "loss": 1.7508, |
| "step": 1307 |
| }, |
| { |
| "epoch": 0.17541350022351362, |
| "grad_norm": 1.2154515219706747, |
| "learning_rate": 5.600224210241104e-07, |
| "loss": 1.6431, |
| "step": 1308 |
| }, |
| { |
| "epoch": 0.17554760840411265, |
| "grad_norm": 1.0580023010334576, |
| "learning_rate": 5.599628279519445e-07, |
| "loss": 1.7028, |
| "step": 1309 |
| }, |
| { |
| "epoch": 0.17568171658471166, |
| "grad_norm": 1.0649573978054163, |
| "learning_rate": 5.599031940529562e-07, |
| "loss": 1.7045, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.17581582476531069, |
| "grad_norm": 1.066600801218827, |
| "learning_rate": 5.598435193377324e-07, |
| "loss": 1.6888, |
| "step": 1311 |
| }, |
| { |
| "epoch": 0.1759499329459097, |
| "grad_norm": 1.2123022138020687, |
| "learning_rate": 5.597838038168678e-07, |
| "loss": 1.7297, |
| "step": 1312 |
| }, |
| { |
| "epoch": 0.17608404112650872, |
| "grad_norm": 1.0436067677488805, |
| "learning_rate": 5.59724047500964e-07, |
| "loss": 1.652, |
| "step": 1313 |
| }, |
| { |
| "epoch": 0.17621814930710775, |
| "grad_norm": 1.0487601395222634, |
| "learning_rate": 5.5966425040063e-07, |
| "loss": 1.7444, |
| "step": 1314 |
| }, |
| { |
| "epoch": 0.17635225748770675, |
| "grad_norm": 1.117082389094809, |
| "learning_rate": 5.596044125264818e-07, |
| "loss": 1.64, |
| "step": 1315 |
| }, |
| { |
| "epoch": 0.17648636566830578, |
| "grad_norm": 1.0558238043899169, |
| "learning_rate": 5.595445338891431e-07, |
| "loss": 1.6659, |
| "step": 1316 |
| }, |
| { |
| "epoch": 0.17662047384890478, |
| "grad_norm": 1.0478981037852866, |
| "learning_rate": 5.594846144992443e-07, |
| "loss": 1.52, |
| "step": 1317 |
| }, |
| { |
| "epoch": 0.1767545820295038, |
| "grad_norm": 1.257918943849832, |
| "learning_rate": 5.594246543674234e-07, |
| "loss": 1.7601, |
| "step": 1318 |
| }, |
| { |
| "epoch": 0.1768886902101028, |
| "grad_norm": 1.4225322949034613, |
| "learning_rate": 5.593646535043253e-07, |
| "loss": 1.7307, |
| "step": 1319 |
| }, |
| { |
| "epoch": 0.17702279839070184, |
| "grad_norm": 1.1490395041861463, |
| "learning_rate": 5.593046119206027e-07, |
| "loss": 1.7181, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.17715690657130084, |
| "grad_norm": 1.0611730445421508, |
| "learning_rate": 5.59244529626915e-07, |
| "loss": 1.6528, |
| "step": 1321 |
| }, |
| { |
| "epoch": 0.17729101475189987, |
| "grad_norm": 1.204549135410644, |
| "learning_rate": 5.591844066339289e-07, |
| "loss": 1.7908, |
| "step": 1322 |
| }, |
| { |
| "epoch": 0.17742512293249887, |
| "grad_norm": 1.1001829655239295, |
| "learning_rate": 5.591242429523187e-07, |
| "loss": 1.6403, |
| "step": 1323 |
| }, |
| { |
| "epoch": 0.1775592311130979, |
| "grad_norm": 1.1251080236723472, |
| "learning_rate": 5.590640385927655e-07, |
| "loss": 1.6476, |
| "step": 1324 |
| }, |
| { |
| "epoch": 0.1776933392936969, |
| "grad_norm": 1.0879047909659794, |
| "learning_rate": 5.590037935659577e-07, |
| "loss": 1.7197, |
| "step": 1325 |
| }, |
| { |
| "epoch": 0.17782744747429594, |
| "grad_norm": 1.0406989517811054, |
| "learning_rate": 5.589435078825912e-07, |
| "loss": 1.5898, |
| "step": 1326 |
| }, |
| { |
| "epoch": 0.17796155565489494, |
| "grad_norm": 1.055284942749228, |
| "learning_rate": 5.588831815533688e-07, |
| "loss": 1.6537, |
| "step": 1327 |
| }, |
| { |
| "epoch": 0.17809566383549397, |
| "grad_norm": 1.1132782384590842, |
| "learning_rate": 5.588228145890006e-07, |
| "loss": 1.6304, |
| "step": 1328 |
| }, |
| { |
| "epoch": 0.17822977201609297, |
| "grad_norm": 1.1856096238614278, |
| "learning_rate": 5.587624070002039e-07, |
| "loss": 1.6901, |
| "step": 1329 |
| }, |
| { |
| "epoch": 0.178363880196692, |
| "grad_norm": 1.0716839423819353, |
| "learning_rate": 5.587019587977035e-07, |
| "loss": 1.6256, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.17849798837729103, |
| "grad_norm": 1.0832321039520167, |
| "learning_rate": 5.586414699922309e-07, |
| "loss": 1.6811, |
| "step": 1331 |
| }, |
| { |
| "epoch": 0.17863209655789003, |
| "grad_norm": 1.0997046830321784, |
| "learning_rate": 5.585809405945252e-07, |
| "loss": 1.5625, |
| "step": 1332 |
| }, |
| { |
| "epoch": 0.17876620473848906, |
| "grad_norm": 1.0713255103444261, |
| "learning_rate": 5.585203706153326e-07, |
| "loss": 1.6532, |
| "step": 1333 |
| }, |
| { |
| "epoch": 0.17890031291908806, |
| "grad_norm": 1.097655546141729, |
| "learning_rate": 5.584597600654066e-07, |
| "loss": 1.561, |
| "step": 1334 |
| }, |
| { |
| "epoch": 0.1790344210996871, |
| "grad_norm": 1.118524842313588, |
| "learning_rate": 5.583991089555074e-07, |
| "loss": 1.6562, |
| "step": 1335 |
| }, |
| { |
| "epoch": 0.1791685292802861, |
| "grad_norm": 1.143484492621255, |
| "learning_rate": 5.583384172964032e-07, |
| "loss": 1.6106, |
| "step": 1336 |
| }, |
| { |
| "epoch": 0.17930263746088512, |
| "grad_norm": 1.1214046342101587, |
| "learning_rate": 5.582776850988688e-07, |
| "loss": 1.6307, |
| "step": 1337 |
| }, |
| { |
| "epoch": 0.17943674564148412, |
| "grad_norm": 1.1213846092161437, |
| "learning_rate": 5.582169123736864e-07, |
| "loss": 1.7581, |
| "step": 1338 |
| }, |
| { |
| "epoch": 0.17957085382208315, |
| "grad_norm": 1.1045643310044297, |
| "learning_rate": 5.581560991316455e-07, |
| "loss": 1.7356, |
| "step": 1339 |
| }, |
| { |
| "epoch": 0.17970496200268216, |
| "grad_norm": 1.1684585589911254, |
| "learning_rate": 5.580952453835426e-07, |
| "loss": 1.7319, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.17983907018328119, |
| "grad_norm": 1.3021764184252913, |
| "learning_rate": 5.580343511401813e-07, |
| "loss": 1.7263, |
| "step": 1341 |
| }, |
| { |
| "epoch": 0.1799731783638802, |
| "grad_norm": 1.113861073703856, |
| "learning_rate": 5.579734164123729e-07, |
| "loss": 1.6896, |
| "step": 1342 |
| }, |
| { |
| "epoch": 0.18010728654447922, |
| "grad_norm": 1.081482477946928, |
| "learning_rate": 5.579124412109352e-07, |
| "loss": 1.7272, |
| "step": 1343 |
| }, |
| { |
| "epoch": 0.18024139472507822, |
| "grad_norm": 1.2066355523363086, |
| "learning_rate": 5.578514255466939e-07, |
| "loss": 1.7111, |
| "step": 1344 |
| }, |
| { |
| "epoch": 0.18037550290567725, |
| "grad_norm": 1.0985468030112344, |
| "learning_rate": 5.577903694304811e-07, |
| "loss": 1.6341, |
| "step": 1345 |
| }, |
| { |
| "epoch": 0.18050961108627625, |
| "grad_norm": 1.171300719246094, |
| "learning_rate": 5.577292728731368e-07, |
| "loss": 1.7271, |
| "step": 1346 |
| }, |
| { |
| "epoch": 0.18064371926687528, |
| "grad_norm": 1.0938624509126613, |
| "learning_rate": 5.576681358855078e-07, |
| "loss": 1.6505, |
| "step": 1347 |
| }, |
| { |
| "epoch": 0.1807778274474743, |
| "grad_norm": 1.1376662655489747, |
| "learning_rate": 5.57606958478448e-07, |
| "loss": 1.6729, |
| "step": 1348 |
| }, |
| { |
| "epoch": 0.1809119356280733, |
| "grad_norm": 1.1248050141842243, |
| "learning_rate": 5.575457406628189e-07, |
| "loss": 1.6139, |
| "step": 1349 |
| }, |
| { |
| "epoch": 0.18104604380867234, |
| "grad_norm": 1.0939373053874768, |
| "learning_rate": 5.574844824494888e-07, |
| "loss": 1.6295, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.18118015198927134, |
| "grad_norm": 1.0842961883880395, |
| "learning_rate": 5.574231838493333e-07, |
| "loss": 1.5905, |
| "step": 1351 |
| }, |
| { |
| "epoch": 0.18131426016987037, |
| "grad_norm": 1.1099129964326464, |
| "learning_rate": 5.573618448732349e-07, |
| "loss": 1.5986, |
| "step": 1352 |
| }, |
| { |
| "epoch": 0.18144836835046937, |
| "grad_norm": 1.1232448273106495, |
| "learning_rate": 5.573004655320838e-07, |
| "loss": 1.7579, |
| "step": 1353 |
| }, |
| { |
| "epoch": 0.1815824765310684, |
| "grad_norm": 1.1666528664724998, |
| "learning_rate": 5.57239045836777e-07, |
| "loss": 1.6152, |
| "step": 1354 |
| }, |
| { |
| "epoch": 0.1817165847116674, |
| "grad_norm": 1.1370227967293582, |
| "learning_rate": 5.571775857982186e-07, |
| "loss": 1.7261, |
| "step": 1355 |
| }, |
| { |
| "epoch": 0.18185069289226644, |
| "grad_norm": 1.1281838118145104, |
| "learning_rate": 5.571160854273203e-07, |
| "loss": 1.7791, |
| "step": 1356 |
| }, |
| { |
| "epoch": 0.18198480107286544, |
| "grad_norm": 1.1128745175377743, |
| "learning_rate": 5.570545447350004e-07, |
| "loss": 1.6613, |
| "step": 1357 |
| }, |
| { |
| "epoch": 0.18211890925346447, |
| "grad_norm": 1.0867439824153309, |
| "learning_rate": 5.569929637321848e-07, |
| "loss": 1.7577, |
| "step": 1358 |
| }, |
| { |
| "epoch": 0.18225301743406347, |
| "grad_norm": 1.1168304669263995, |
| "learning_rate": 5.569313424298063e-07, |
| "loss": 1.6313, |
| "step": 1359 |
| }, |
| { |
| "epoch": 0.1823871256146625, |
| "grad_norm": 1.0783686555511454, |
| "learning_rate": 5.56869680838805e-07, |
| "loss": 1.6155, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.1825212337952615, |
| "grad_norm": 1.1849330577729977, |
| "learning_rate": 5.568079789701281e-07, |
| "loss": 1.7919, |
| "step": 1361 |
| }, |
| { |
| "epoch": 0.18265534197586053, |
| "grad_norm": 1.0642283339220127, |
| "learning_rate": 5.567462368347296e-07, |
| "loss": 1.6483, |
| "step": 1362 |
| }, |
| { |
| "epoch": 0.18278945015645953, |
| "grad_norm": 1.0762888034859384, |
| "learning_rate": 5.566844544435715e-07, |
| "loss": 1.6447, |
| "step": 1363 |
| }, |
| { |
| "epoch": 0.18292355833705856, |
| "grad_norm": 1.1102699057236556, |
| "learning_rate": 5.566226318076221e-07, |
| "loss": 1.6753, |
| "step": 1364 |
| }, |
| { |
| "epoch": 0.18305766651765756, |
| "grad_norm": 1.0900024036456375, |
| "learning_rate": 5.565607689378574e-07, |
| "loss": 1.6932, |
| "step": 1365 |
| }, |
| { |
| "epoch": 0.1831917746982566, |
| "grad_norm": 1.170525713074084, |
| "learning_rate": 5.564988658452601e-07, |
| "loss": 1.6378, |
| "step": 1366 |
| }, |
| { |
| "epoch": 0.18332588287885562, |
| "grad_norm": 1.1252580693238932, |
| "learning_rate": 5.564369225408206e-07, |
| "loss": 1.7611, |
| "step": 1367 |
| }, |
| { |
| "epoch": 0.18345999105945462, |
| "grad_norm": 1.0779299976202001, |
| "learning_rate": 5.563749390355356e-07, |
| "loss": 1.6517, |
| "step": 1368 |
| }, |
| { |
| "epoch": 0.18359409924005365, |
| "grad_norm": 1.0810638342875853, |
| "learning_rate": 5.563129153404099e-07, |
| "loss": 1.5525, |
| "step": 1369 |
| }, |
| { |
| "epoch": 0.18372820742065266, |
| "grad_norm": 1.061240323219775, |
| "learning_rate": 5.562508514664548e-07, |
| "loss": 1.7482, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.18386231560125169, |
| "grad_norm": 1.1362519090350038, |
| "learning_rate": 5.561887474246889e-07, |
| "loss": 1.5771, |
| "step": 1371 |
| }, |
| { |
| "epoch": 0.1839964237818507, |
| "grad_norm": 1.7306083620793078, |
| "learning_rate": 5.561266032261379e-07, |
| "loss": 1.6738, |
| "step": 1372 |
| }, |
| { |
| "epoch": 0.18413053196244972, |
| "grad_norm": 1.1266147426655102, |
| "learning_rate": 5.560644188818348e-07, |
| "loss": 1.6809, |
| "step": 1373 |
| }, |
| { |
| "epoch": 0.18426464014304872, |
| "grad_norm": 1.4560506903910069, |
| "learning_rate": 5.560021944028195e-07, |
| "loss": 1.7862, |
| "step": 1374 |
| }, |
| { |
| "epoch": 0.18439874832364775, |
| "grad_norm": 1.1339717685703572, |
| "learning_rate": 5.559399298001391e-07, |
| "loss": 1.7362, |
| "step": 1375 |
| }, |
| { |
| "epoch": 0.18453285650424675, |
| "grad_norm": 1.0605805880234964, |
| "learning_rate": 5.55877625084848e-07, |
| "loss": 1.6264, |
| "step": 1376 |
| }, |
| { |
| "epoch": 0.18466696468484578, |
| "grad_norm": 1.1589703072777433, |
| "learning_rate": 5.558152802680075e-07, |
| "loss": 1.6524, |
| "step": 1377 |
| }, |
| { |
| "epoch": 0.18480107286544478, |
| "grad_norm": 1.0842230894260985, |
| "learning_rate": 5.557528953606858e-07, |
| "loss": 1.8047, |
| "step": 1378 |
| }, |
| { |
| "epoch": 0.1849351810460438, |
| "grad_norm": 1.1794280210617787, |
| "learning_rate": 5.55690470373959e-07, |
| "loss": 1.6757, |
| "step": 1379 |
| }, |
| { |
| "epoch": 0.1850692892266428, |
| "grad_norm": 1.097631119847551, |
| "learning_rate": 5.556280053189095e-07, |
| "loss": 1.6108, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.18520339740724184, |
| "grad_norm": 1.1017129023282082, |
| "learning_rate": 5.555655002066273e-07, |
| "loss": 1.7577, |
| "step": 1381 |
| }, |
| { |
| "epoch": 0.18533750558784085, |
| "grad_norm": 1.1361790282178577, |
| "learning_rate": 5.555029550482091e-07, |
| "loss": 1.7294, |
| "step": 1382 |
| }, |
| { |
| "epoch": 0.18547161376843987, |
| "grad_norm": 1.055142090473337, |
| "learning_rate": 5.554403698547593e-07, |
| "loss": 1.6388, |
| "step": 1383 |
| }, |
| { |
| "epoch": 0.1856057219490389, |
| "grad_norm": 7.061910083877572, |
| "learning_rate": 5.553777446373886e-07, |
| "loss": 1.6087, |
| "step": 1384 |
| }, |
| { |
| "epoch": 0.1857398301296379, |
| "grad_norm": 1.1547867916367462, |
| "learning_rate": 5.553150794072159e-07, |
| "loss": 1.6509, |
| "step": 1385 |
| }, |
| { |
| "epoch": 0.18587393831023694, |
| "grad_norm": 1.193219273609135, |
| "learning_rate": 5.552523741753659e-07, |
| "loss": 1.8231, |
| "step": 1386 |
| }, |
| { |
| "epoch": 0.18600804649083594, |
| "grad_norm": 1.0693060290055107, |
| "learning_rate": 5.551896289529716e-07, |
| "loss": 1.656, |
| "step": 1387 |
| }, |
| { |
| "epoch": 0.18614215467143497, |
| "grad_norm": 1.1745807906563366, |
| "learning_rate": 5.551268437511724e-07, |
| "loss": 1.6985, |
| "step": 1388 |
| }, |
| { |
| "epoch": 0.18627626285203397, |
| "grad_norm": 1.099307648055397, |
| "learning_rate": 5.550640185811148e-07, |
| "loss": 1.6393, |
| "step": 1389 |
| }, |
| { |
| "epoch": 0.186410371032633, |
| "grad_norm": 1.1139438125947954, |
| "learning_rate": 5.550011534539527e-07, |
| "loss": 1.6638, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.186544479213232, |
| "grad_norm": 1.0670126218487324, |
| "learning_rate": 5.549382483808472e-07, |
| "loss": 1.6649, |
| "step": 1391 |
| }, |
| { |
| "epoch": 0.18667858739383103, |
| "grad_norm": 1.1017328082824618, |
| "learning_rate": 5.548753033729658e-07, |
| "loss": 1.6979, |
| "step": 1392 |
| }, |
| { |
| "epoch": 0.18681269557443003, |
| "grad_norm": 1.1113229457677472, |
| "learning_rate": 5.548123184414838e-07, |
| "loss": 1.6629, |
| "step": 1393 |
| }, |
| { |
| "epoch": 0.18694680375502906, |
| "grad_norm": 1.061154042048288, |
| "learning_rate": 5.547492935975834e-07, |
| "loss": 1.6141, |
| "step": 1394 |
| }, |
| { |
| "epoch": 0.18708091193562806, |
| "grad_norm": 1.1037785149371337, |
| "learning_rate": 5.546862288524536e-07, |
| "loss": 1.619, |
| "step": 1395 |
| }, |
| { |
| "epoch": 0.1872150201162271, |
| "grad_norm": 1.042211773070437, |
| "learning_rate": 5.546231242172909e-07, |
| "loss": 1.6314, |
| "step": 1396 |
| }, |
| { |
| "epoch": 0.1873491282968261, |
| "grad_norm": 1.0991209850271397, |
| "learning_rate": 5.545599797032986e-07, |
| "loss": 1.6851, |
| "step": 1397 |
| }, |
| { |
| "epoch": 0.18748323647742512, |
| "grad_norm": 1.0820523032730966, |
| "learning_rate": 5.544967953216872e-07, |
| "loss": 1.614, |
| "step": 1398 |
| }, |
| { |
| "epoch": 0.18761734465802413, |
| "grad_norm": 1.0852415180954882, |
| "learning_rate": 5.544335710836741e-07, |
| "loss": 1.7069, |
| "step": 1399 |
| }, |
| { |
| "epoch": 0.18775145283862316, |
| "grad_norm": 1.1386169714316008, |
| "learning_rate": 5.543703070004842e-07, |
| "loss": 1.7039, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.18788556101922219, |
| "grad_norm": 1.1108755081549002, |
| "learning_rate": 5.543070030833488e-07, |
| "loss": 1.5328, |
| "step": 1401 |
| }, |
| { |
| "epoch": 0.1880196691998212, |
| "grad_norm": 1.1300665980334217, |
| "learning_rate": 5.542436593435071e-07, |
| "loss": 1.5492, |
| "step": 1402 |
| }, |
| { |
| "epoch": 0.18815377738042022, |
| "grad_norm": 1.1648000097455284, |
| "learning_rate": 5.541802757922047e-07, |
| "loss": 1.7602, |
| "step": 1403 |
| }, |
| { |
| "epoch": 0.18828788556101922, |
| "grad_norm": 1.2562290482462215, |
| "learning_rate": 5.541168524406944e-07, |
| "loss": 1.7935, |
| "step": 1404 |
| }, |
| { |
| "epoch": 0.18842199374161825, |
| "grad_norm": 1.0533823510103384, |
| "learning_rate": 5.540533893002363e-07, |
| "loss": 1.6259, |
| "step": 1405 |
| }, |
| { |
| "epoch": 0.18855610192221725, |
| "grad_norm": 1.1167925185725207, |
| "learning_rate": 5.539898863820975e-07, |
| "loss": 1.6887, |
| "step": 1406 |
| }, |
| { |
| "epoch": 0.18869021010281628, |
| "grad_norm": 1.1001250134186094, |
| "learning_rate": 5.539263436975518e-07, |
| "loss": 1.6111, |
| "step": 1407 |
| }, |
| { |
| "epoch": 0.18882431828341528, |
| "grad_norm": 1.0625193817660576, |
| "learning_rate": 5.538627612578808e-07, |
| "loss": 1.6671, |
| "step": 1408 |
| }, |
| { |
| "epoch": 0.1889584264640143, |
| "grad_norm": 1.080439840869442, |
| "learning_rate": 5.537991390743723e-07, |
| "loss": 1.6131, |
| "step": 1409 |
| }, |
| { |
| "epoch": 0.1890925346446133, |
| "grad_norm": 1.0665498302900025, |
| "learning_rate": 5.537354771583218e-07, |
| "loss": 1.6202, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.18922664282521234, |
| "grad_norm": 1.062235350568671, |
| "learning_rate": 5.536717755210317e-07, |
| "loss": 1.7539, |
| "step": 1411 |
| }, |
| { |
| "epoch": 0.18936075100581135, |
| "grad_norm": 1.1086535902273902, |
| "learning_rate": 5.536080341738112e-07, |
| "loss": 1.6395, |
| "step": 1412 |
| }, |
| { |
| "epoch": 0.18949485918641037, |
| "grad_norm": 1.0667252815429409, |
| "learning_rate": 5.535442531279765e-07, |
| "loss": 1.6353, |
| "step": 1413 |
| }, |
| { |
| "epoch": 0.18962896736700938, |
| "grad_norm": 1.046416301897227, |
| "learning_rate": 5.534804323948516e-07, |
| "loss": 1.6511, |
| "step": 1414 |
| }, |
| { |
| "epoch": 0.1897630755476084, |
| "grad_norm": 1.075372549798005, |
| "learning_rate": 5.534165719857666e-07, |
| "loss": 1.7723, |
| "step": 1415 |
| }, |
| { |
| "epoch": 0.1898971837282074, |
| "grad_norm": 1.104299289930867, |
| "learning_rate": 5.533526719120594e-07, |
| "loss": 1.6641, |
| "step": 1416 |
| }, |
| { |
| "epoch": 0.19003129190880644, |
| "grad_norm": 1.0927891670744394, |
| "learning_rate": 5.532887321850742e-07, |
| "loss": 1.5863, |
| "step": 1417 |
| }, |
| { |
| "epoch": 0.19016540008940547, |
| "grad_norm": 1.1166521049854372, |
| "learning_rate": 5.532247528161629e-07, |
| "loss": 1.6574, |
| "step": 1418 |
| }, |
| { |
| "epoch": 0.19029950827000447, |
| "grad_norm": 1.1320778263461202, |
| "learning_rate": 5.531607338166842e-07, |
| "loss": 1.6688, |
| "step": 1419 |
| }, |
| { |
| "epoch": 0.1904336164506035, |
| "grad_norm": 1.1471242711580207, |
| "learning_rate": 5.530966751980036e-07, |
| "loss": 1.6654, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.1905677246312025, |
| "grad_norm": 1.0867888184745689, |
| "learning_rate": 5.530325769714941e-07, |
| "loss": 1.5906, |
| "step": 1421 |
| }, |
| { |
| "epoch": 0.19070183281180153, |
| "grad_norm": 1.4483826712692085, |
| "learning_rate": 5.529684391485354e-07, |
| "loss": 1.5822, |
| "step": 1422 |
| }, |
| { |
| "epoch": 0.19083594099240053, |
| "grad_norm": 1.255407468844781, |
| "learning_rate": 5.529042617405144e-07, |
| "loss": 1.7131, |
| "step": 1423 |
| }, |
| { |
| "epoch": 0.19097004917299956, |
| "grad_norm": 1.158464569939825, |
| "learning_rate": 5.528400447588247e-07, |
| "loss": 1.7756, |
| "step": 1424 |
| }, |
| { |
| "epoch": 0.19110415735359856, |
| "grad_norm": 1.0950885308074678, |
| "learning_rate": 5.527757882148672e-07, |
| "loss": 1.5582, |
| "step": 1425 |
| }, |
| { |
| "epoch": 0.1912382655341976, |
| "grad_norm": 1.1070256742947473, |
| "learning_rate": 5.527114921200501e-07, |
| "loss": 1.6467, |
| "step": 1426 |
| }, |
| { |
| "epoch": 0.1913723737147966, |
| "grad_norm": 1.0928498062976033, |
| "learning_rate": 5.52647156485788e-07, |
| "loss": 1.7125, |
| "step": 1427 |
| }, |
| { |
| "epoch": 0.19150648189539562, |
| "grad_norm": 1.1327469366060336, |
| "learning_rate": 5.525827813235029e-07, |
| "loss": 1.6743, |
| "step": 1428 |
| }, |
| { |
| "epoch": 0.19164059007599463, |
| "grad_norm": 1.0882012662709442, |
| "learning_rate": 5.525183666446239e-07, |
| "loss": 1.6799, |
| "step": 1429 |
| }, |
| { |
| "epoch": 0.19177469825659366, |
| "grad_norm": 1.1709943857735898, |
| "learning_rate": 5.524539124605868e-07, |
| "loss": 1.766, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.19190880643719266, |
| "grad_norm": 1.0839291014706198, |
| "learning_rate": 5.523894187828345e-07, |
| "loss": 1.6322, |
| "step": 1431 |
| }, |
| { |
| "epoch": 0.1920429146177917, |
| "grad_norm": 1.0975188778434444, |
| "learning_rate": 5.523248856228172e-07, |
| "loss": 1.7589, |
| "step": 1432 |
| }, |
| { |
| "epoch": 0.1921770227983907, |
| "grad_norm": 1.1022611138397802, |
| "learning_rate": 5.522603129919919e-07, |
| "loss": 1.6493, |
| "step": 1433 |
| }, |
| { |
| "epoch": 0.19231113097898972, |
| "grad_norm": 1.0944356638645014, |
| "learning_rate": 5.521957009018224e-07, |
| "loss": 1.6845, |
| "step": 1434 |
| }, |
| { |
| "epoch": 0.19244523915958872, |
| "grad_norm": 1.1206597827966063, |
| "learning_rate": 5.521310493637798e-07, |
| "loss": 1.6926, |
| "step": 1435 |
| }, |
| { |
| "epoch": 0.19257934734018775, |
| "grad_norm": 1.0956992634305383, |
| "learning_rate": 5.520663583893422e-07, |
| "loss": 1.6463, |
| "step": 1436 |
| }, |
| { |
| "epoch": 0.19271345552078678, |
| "grad_norm": 1.0831083719944854, |
| "learning_rate": 5.520016279899947e-07, |
| "loss": 1.599, |
| "step": 1437 |
| }, |
| { |
| "epoch": 0.19284756370138578, |
| "grad_norm": 1.391549260981187, |
| "learning_rate": 5.51936858177229e-07, |
| "loss": 1.6344, |
| "step": 1438 |
| }, |
| { |
| "epoch": 0.1929816718819848, |
| "grad_norm": 1.1524973265055787, |
| "learning_rate": 5.518720489625443e-07, |
| "loss": 1.7242, |
| "step": 1439 |
| }, |
| { |
| "epoch": 0.19311578006258381, |
| "grad_norm": 1.1802426876707486, |
| "learning_rate": 5.518072003574467e-07, |
| "loss": 1.6515, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.19324988824318284, |
| "grad_norm": 1.1402824833918361, |
| "learning_rate": 5.51742312373449e-07, |
| "loss": 1.8068, |
| "step": 1441 |
| }, |
| { |
| "epoch": 0.19338399642378185, |
| "grad_norm": 1.3034827789380141, |
| "learning_rate": 5.516773850220713e-07, |
| "loss": 1.5961, |
| "step": 1442 |
| }, |
| { |
| "epoch": 0.19351810460438088, |
| "grad_norm": 1.0690564805797904, |
| "learning_rate": 5.516124183148406e-07, |
| "loss": 1.6845, |
| "step": 1443 |
| }, |
| { |
| "epoch": 0.19365221278497988, |
| "grad_norm": 1.0643025118264189, |
| "learning_rate": 5.515474122632908e-07, |
| "loss": 1.6856, |
| "step": 1444 |
| }, |
| { |
| "epoch": 0.1937863209655789, |
| "grad_norm": 1.1264779418191524, |
| "learning_rate": 5.51482366878963e-07, |
| "loss": 1.6055, |
| "step": 1445 |
| }, |
| { |
| "epoch": 0.1939204291461779, |
| "grad_norm": 1.024225937952105, |
| "learning_rate": 5.51417282173405e-07, |
| "loss": 1.6615, |
| "step": 1446 |
| }, |
| { |
| "epoch": 0.19405453732677694, |
| "grad_norm": 1.161971897328525, |
| "learning_rate": 5.513521581581719e-07, |
| "loss": 1.6043, |
| "step": 1447 |
| }, |
| { |
| "epoch": 0.19418864550737594, |
| "grad_norm": 1.0885797045193277, |
| "learning_rate": 5.512869948448252e-07, |
| "loss": 1.701, |
| "step": 1448 |
| }, |
| { |
| "epoch": 0.19432275368797497, |
| "grad_norm": 1.1421314031719336, |
| "learning_rate": 5.512217922449342e-07, |
| "loss": 1.6471, |
| "step": 1449 |
| }, |
| { |
| "epoch": 0.19445686186857397, |
| "grad_norm": 1.077561352558914, |
| "learning_rate": 5.511565503700745e-07, |
| "loss": 1.7467, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.194590970049173, |
| "grad_norm": 1.1713587803273386, |
| "learning_rate": 5.51091269231829e-07, |
| "loss": 1.833, |
| "step": 1451 |
| }, |
| { |
| "epoch": 0.194725078229772, |
| "grad_norm": 1.1325441610620945, |
| "learning_rate": 5.510259488417875e-07, |
| "loss": 1.6516, |
| "step": 1452 |
| }, |
| { |
| "epoch": 0.19485918641037103, |
| "grad_norm": 1.105302401232543, |
| "learning_rate": 5.509605892115468e-07, |
| "loss": 1.6555, |
| "step": 1453 |
| }, |
| { |
| "epoch": 0.19499329459097006, |
| "grad_norm": 1.1082502770943088, |
| "learning_rate": 5.508951903527105e-07, |
| "loss": 1.6901, |
| "step": 1454 |
| }, |
| { |
| "epoch": 0.19512740277156906, |
| "grad_norm": 1.2100417158092283, |
| "learning_rate": 5.508297522768895e-07, |
| "loss": 1.7645, |
| "step": 1455 |
| }, |
| { |
| "epoch": 0.1952615109521681, |
| "grad_norm": 1.054087647701517, |
| "learning_rate": 5.507642749957011e-07, |
| "loss": 1.714, |
| "step": 1456 |
| }, |
| { |
| "epoch": 0.1953956191327671, |
| "grad_norm": 1.0560637240698765, |
| "learning_rate": 5.506987585207703e-07, |
| "loss": 1.6332, |
| "step": 1457 |
| }, |
| { |
| "epoch": 0.19552972731336613, |
| "grad_norm": 1.110689185152269, |
| "learning_rate": 5.506332028637285e-07, |
| "loss": 1.6175, |
| "step": 1458 |
| }, |
| { |
| "epoch": 0.19566383549396513, |
| "grad_norm": 1.0676099046686827, |
| "learning_rate": 5.505676080362142e-07, |
| "loss": 1.753, |
| "step": 1459 |
| }, |
| { |
| "epoch": 0.19579794367456416, |
| "grad_norm": 1.0306885085920625, |
| "learning_rate": 5.505019740498731e-07, |
| "loss": 1.5685, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.19593205185516316, |
| "grad_norm": 1.0775372740576943, |
| "learning_rate": 5.504363009163573e-07, |
| "loss": 1.6199, |
| "step": 1461 |
| }, |
| { |
| "epoch": 0.1960661600357622, |
| "grad_norm": 1.0643274573728114, |
| "learning_rate": 5.503705886473264e-07, |
| "loss": 1.6547, |
| "step": 1462 |
| }, |
| { |
| "epoch": 0.1962002682163612, |
| "grad_norm": 1.0711004226035805, |
| "learning_rate": 5.503048372544466e-07, |
| "loss": 1.7047, |
| "step": 1463 |
| }, |
| { |
| "epoch": 0.19633437639696022, |
| "grad_norm": 1.123667947934815, |
| "learning_rate": 5.502390467493915e-07, |
| "loss": 1.7008, |
| "step": 1464 |
| }, |
| { |
| "epoch": 0.19646848457755922, |
| "grad_norm": 1.0844329149084733, |
| "learning_rate": 5.501732171438408e-07, |
| "loss": 1.6279, |
| "step": 1465 |
| }, |
| { |
| "epoch": 0.19660259275815825, |
| "grad_norm": 1.436970815874584, |
| "learning_rate": 5.501073484494822e-07, |
| "loss": 1.6543, |
| "step": 1466 |
| }, |
| { |
| "epoch": 0.19673670093875725, |
| "grad_norm": 1.1579140829195231, |
| "learning_rate": 5.500414406780093e-07, |
| "loss": 1.6149, |
| "step": 1467 |
| }, |
| { |
| "epoch": 0.19687080911935628, |
| "grad_norm": 1.1219759034001007, |
| "learning_rate": 5.499754938411235e-07, |
| "loss": 1.6853, |
| "step": 1468 |
| }, |
| { |
| "epoch": 0.19700491729995528, |
| "grad_norm": 1.1456958318708046, |
| "learning_rate": 5.499095079505327e-07, |
| "loss": 1.6056, |
| "step": 1469 |
| }, |
| { |
| "epoch": 0.19713902548055431, |
| "grad_norm": 1.135367963109951, |
| "learning_rate": 5.498434830179519e-07, |
| "loss": 1.6775, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.19727313366115334, |
| "grad_norm": 1.0665068451667536, |
| "learning_rate": 5.497774190551028e-07, |
| "loss": 1.6953, |
| "step": 1471 |
| }, |
| { |
| "epoch": 0.19740724184175235, |
| "grad_norm": 1.0531212330423794, |
| "learning_rate": 5.497113160737142e-07, |
| "loss": 1.6531, |
| "step": 1472 |
| }, |
| { |
| "epoch": 0.19754135002235138, |
| "grad_norm": 1.1454744923401645, |
| "learning_rate": 5.496451740855217e-07, |
| "loss": 1.7061, |
| "step": 1473 |
| }, |
| { |
| "epoch": 0.19767545820295038, |
| "grad_norm": 1.1044037302229577, |
| "learning_rate": 5.49578993102268e-07, |
| "loss": 1.6111, |
| "step": 1474 |
| }, |
| { |
| "epoch": 0.1978095663835494, |
| "grad_norm": 1.0685087974547518, |
| "learning_rate": 5.495127731357029e-07, |
| "loss": 1.572, |
| "step": 1475 |
| }, |
| { |
| "epoch": 0.1979436745641484, |
| "grad_norm": 1.0974414948618096, |
| "learning_rate": 5.494465141975826e-07, |
| "loss": 1.6854, |
| "step": 1476 |
| }, |
| { |
| "epoch": 0.19807778274474744, |
| "grad_norm": 1.0834578832501205, |
| "learning_rate": 5.493802162996703e-07, |
| "loss": 1.6889, |
| "step": 1477 |
| }, |
| { |
| "epoch": 0.19821189092534644, |
| "grad_norm": 1.070274290599906, |
| "learning_rate": 5.493138794537367e-07, |
| "loss": 1.6939, |
| "step": 1478 |
| }, |
| { |
| "epoch": 0.19834599910594547, |
| "grad_norm": 1.115057911105637, |
| "learning_rate": 5.49247503671559e-07, |
| "loss": 1.6584, |
| "step": 1479 |
| }, |
| { |
| "epoch": 0.19848010728654447, |
| "grad_norm": 1.1561061527897827, |
| "learning_rate": 5.491810889649211e-07, |
| "loss": 1.7095, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.1986142154671435, |
| "grad_norm": 1.1456838684818837, |
| "learning_rate": 5.491146353456139e-07, |
| "loss": 1.5911, |
| "step": 1481 |
| }, |
| { |
| "epoch": 0.1987483236477425, |
| "grad_norm": 1.0828440940723576, |
| "learning_rate": 5.490481428254358e-07, |
| "loss": 1.6674, |
| "step": 1482 |
| }, |
| { |
| "epoch": 0.19888243182834153, |
| "grad_norm": 1.1636923332921367, |
| "learning_rate": 5.489816114161914e-07, |
| "loss": 1.7205, |
| "step": 1483 |
| }, |
| { |
| "epoch": 0.19901654000894053, |
| "grad_norm": 1.197166180009061, |
| "learning_rate": 5.489150411296926e-07, |
| "loss": 1.5965, |
| "step": 1484 |
| }, |
| { |
| "epoch": 0.19915064818953956, |
| "grad_norm": 1.9827106666547534, |
| "learning_rate": 5.488484319777578e-07, |
| "loss": 1.7469, |
| "step": 1485 |
| }, |
| { |
| "epoch": 0.19928475637013857, |
| "grad_norm": 1.140838885188788, |
| "learning_rate": 5.487817839722128e-07, |
| "loss": 1.7168, |
| "step": 1486 |
| }, |
| { |
| "epoch": 0.1994188645507376, |
| "grad_norm": 1.0817633006855307, |
| "learning_rate": 5.487150971248901e-07, |
| "loss": 1.5428, |
| "step": 1487 |
| }, |
| { |
| "epoch": 0.19955297273133663, |
| "grad_norm": 1.076792423128002, |
| "learning_rate": 5.486483714476288e-07, |
| "loss": 1.788, |
| "step": 1488 |
| }, |
| { |
| "epoch": 0.19968708091193563, |
| "grad_norm": 1.1267981548935038, |
| "learning_rate": 5.485816069522754e-07, |
| "loss": 1.692, |
| "step": 1489 |
| }, |
| { |
| "epoch": 0.19982118909253466, |
| "grad_norm": 1.0735390096180335, |
| "learning_rate": 5.485148036506829e-07, |
| "loss": 1.6896, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.19995529727313366, |
| "grad_norm": 1.067799342487284, |
| "learning_rate": 5.484479615547114e-07, |
| "loss": 1.5558, |
| "step": 1491 |
| }, |
| { |
| "epoch": 0.2000894054537327, |
| "grad_norm": 1.134188777380917, |
| "learning_rate": 5.483810806762278e-07, |
| "loss": 1.6667, |
| "step": 1492 |
| }, |
| { |
| "epoch": 0.2002235136343317, |
| "grad_norm": 1.0312169428441251, |
| "learning_rate": 5.483141610271059e-07, |
| "loss": 1.5311, |
| "step": 1493 |
| }, |
| { |
| "epoch": 0.20035762181493072, |
| "grad_norm": 1.113434318828811, |
| "learning_rate": 5.482472026192263e-07, |
| "loss": 1.662, |
| "step": 1494 |
| }, |
| { |
| "epoch": 0.20049172999552972, |
| "grad_norm": 1.0830554984993648, |
| "learning_rate": 5.481802054644767e-07, |
| "loss": 1.6549, |
| "step": 1495 |
| }, |
| { |
| "epoch": 0.20062583817612875, |
| "grad_norm": 1.1263172768039542, |
| "learning_rate": 5.481131695747516e-07, |
| "loss": 1.7273, |
| "step": 1496 |
| }, |
| { |
| "epoch": 0.20075994635672775, |
| "grad_norm": 1.0175973585933547, |
| "learning_rate": 5.480460949619521e-07, |
| "loss": 1.6573, |
| "step": 1497 |
| }, |
| { |
| "epoch": 0.20089405453732678, |
| "grad_norm": 1.0684638665771677, |
| "learning_rate": 5.479789816379866e-07, |
| "loss": 1.5783, |
| "step": 1498 |
| }, |
| { |
| "epoch": 0.20102816271792578, |
| "grad_norm": 1.100911731230959, |
| "learning_rate": 5.479118296147701e-07, |
| "loss": 1.7139, |
| "step": 1499 |
| }, |
| { |
| "epoch": 0.20116227089852481, |
| "grad_norm": 1.0645364314712737, |
| "learning_rate": 5.478446389042245e-07, |
| "loss": 1.6684, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.20129637907912382, |
| "grad_norm": 1.0556389823591241, |
| "learning_rate": 5.477774095182787e-07, |
| "loss": 1.5132, |
| "step": 1501 |
| }, |
| { |
| "epoch": 0.20143048725972285, |
| "grad_norm": 1.2334210157237786, |
| "learning_rate": 5.477101414688683e-07, |
| "loss": 1.6951, |
| "step": 1502 |
| }, |
| { |
| "epoch": 0.20156459544032185, |
| "grad_norm": 1.058485353217571, |
| "learning_rate": 5.47642834767936e-07, |
| "loss": 1.6295, |
| "step": 1503 |
| }, |
| { |
| "epoch": 0.20169870362092088, |
| "grad_norm": 1.0445219837933504, |
| "learning_rate": 5.475754894274309e-07, |
| "loss": 1.6173, |
| "step": 1504 |
| }, |
| { |
| "epoch": 0.20183281180151988, |
| "grad_norm": 1.1004187774444296, |
| "learning_rate": 5.475081054593096e-07, |
| "loss": 1.739, |
| "step": 1505 |
| }, |
| { |
| "epoch": 0.2019669199821189, |
| "grad_norm": 1.1602467124536924, |
| "learning_rate": 5.47440682875535e-07, |
| "loss": 1.6625, |
| "step": 1506 |
| }, |
| { |
| "epoch": 0.20210102816271794, |
| "grad_norm": 1.0567141838600442, |
| "learning_rate": 5.47373221688077e-07, |
| "loss": 1.7637, |
| "step": 1507 |
| }, |
| { |
| "epoch": 0.20223513634331694, |
| "grad_norm": 1.1231422155189525, |
| "learning_rate": 5.473057219089128e-07, |
| "loss": 1.6322, |
| "step": 1508 |
| }, |
| { |
| "epoch": 0.20236924452391597, |
| "grad_norm": 1.090099414447627, |
| "learning_rate": 5.472381835500258e-07, |
| "loss": 1.7463, |
| "step": 1509 |
| }, |
| { |
| "epoch": 0.20250335270451497, |
| "grad_norm": 1.036240114395212, |
| "learning_rate": 5.471706066234064e-07, |
| "loss": 1.5938, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.202637460885114, |
| "grad_norm": 1.0971271814274632, |
| "learning_rate": 5.471029911410524e-07, |
| "loss": 1.729, |
| "step": 1511 |
| }, |
| { |
| "epoch": 0.202771569065713, |
| "grad_norm": 1.0884227452009132, |
| "learning_rate": 5.470353371149678e-07, |
| "loss": 1.6752, |
| "step": 1512 |
| }, |
| { |
| "epoch": 0.20290567724631203, |
| "grad_norm": 1.0387697751366196, |
| "learning_rate": 5.469676445571636e-07, |
| "loss": 1.6329, |
| "step": 1513 |
| }, |
| { |
| "epoch": 0.20303978542691103, |
| "grad_norm": 1.0513306520797294, |
| "learning_rate": 5.468999134796577e-07, |
| "loss": 1.7112, |
| "step": 1514 |
| }, |
| { |
| "epoch": 0.20317389360751006, |
| "grad_norm": 1.0894137924530085, |
| "learning_rate": 5.46832143894475e-07, |
| "loss": 1.6982, |
| "step": 1515 |
| }, |
| { |
| "epoch": 0.20330800178810907, |
| "grad_norm": 1.0770824339698073, |
| "learning_rate": 5.467643358136469e-07, |
| "loss": 1.7484, |
| "step": 1516 |
| }, |
| { |
| "epoch": 0.2034421099687081, |
| "grad_norm": 1.095801657453924, |
| "learning_rate": 5.466964892492119e-07, |
| "loss": 1.6417, |
| "step": 1517 |
| }, |
| { |
| "epoch": 0.2035762181493071, |
| "grad_norm": 1.0796491311299437, |
| "learning_rate": 5.466286042132154e-07, |
| "loss": 1.701, |
| "step": 1518 |
| }, |
| { |
| "epoch": 0.20371032632990613, |
| "grad_norm": 1.1233329399203666, |
| "learning_rate": 5.465606807177093e-07, |
| "loss": 1.7951, |
| "step": 1519 |
| }, |
| { |
| "epoch": 0.20384443451050513, |
| "grad_norm": 1.1327885765244115, |
| "learning_rate": 5.464927187747525e-07, |
| "loss": 1.7971, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.20397854269110416, |
| "grad_norm": 1.088717235432573, |
| "learning_rate": 5.464247183964108e-07, |
| "loss": 1.7474, |
| "step": 1521 |
| }, |
| { |
| "epoch": 0.20411265087170316, |
| "grad_norm": 1.1850510030757087, |
| "learning_rate": 5.463566795947566e-07, |
| "loss": 1.755, |
| "step": 1522 |
| }, |
| { |
| "epoch": 0.2042467590523022, |
| "grad_norm": 1.0812752508540497, |
| "learning_rate": 5.462886023818697e-07, |
| "loss": 1.7443, |
| "step": 1523 |
| }, |
| { |
| "epoch": 0.20438086723290122, |
| "grad_norm": 1.1119200217165637, |
| "learning_rate": 5.462204867698359e-07, |
| "loss": 1.7364, |
| "step": 1524 |
| }, |
| { |
| "epoch": 0.20451497541350022, |
| "grad_norm": 1.0637313799778825, |
| "learning_rate": 5.461523327707483e-07, |
| "loss": 1.6503, |
| "step": 1525 |
| }, |
| { |
| "epoch": 0.20464908359409925, |
| "grad_norm": 1.0673393108107518, |
| "learning_rate": 5.460841403967067e-07, |
| "loss": 1.7131, |
| "step": 1526 |
| }, |
| { |
| "epoch": 0.20478319177469825, |
| "grad_norm": 1.1295826465075736, |
| "learning_rate": 5.46015909659818e-07, |
| "loss": 1.6669, |
| "step": 1527 |
| }, |
| { |
| "epoch": 0.20491729995529728, |
| "grad_norm": 1.037795106149209, |
| "learning_rate": 5.459476405721954e-07, |
| "loss": 1.7402, |
| "step": 1528 |
| }, |
| { |
| "epoch": 0.20505140813589628, |
| "grad_norm": 1.0645070431850514, |
| "learning_rate": 5.458793331459591e-07, |
| "loss": 1.5445, |
| "step": 1529 |
| }, |
| { |
| "epoch": 0.20518551631649531, |
| "grad_norm": 1.128995508468257, |
| "learning_rate": 5.458109873932364e-07, |
| "loss": 1.648, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.20531962449709432, |
| "grad_norm": 1.1073104845376167, |
| "learning_rate": 5.45742603326161e-07, |
| "loss": 1.6629, |
| "step": 1531 |
| }, |
| { |
| "epoch": 0.20545373267769335, |
| "grad_norm": 1.0389720964404514, |
| "learning_rate": 5.456741809568737e-07, |
| "loss": 1.6007, |
| "step": 1532 |
| }, |
| { |
| "epoch": 0.20558784085829235, |
| "grad_norm": 1.0874355308974621, |
| "learning_rate": 5.456057202975218e-07, |
| "loss": 1.7692, |
| "step": 1533 |
| }, |
| { |
| "epoch": 0.20572194903889138, |
| "grad_norm": 1.1762066099415274, |
| "learning_rate": 5.455372213602598e-07, |
| "loss": 1.7199, |
| "step": 1534 |
| }, |
| { |
| "epoch": 0.20585605721949038, |
| "grad_norm": 1.1248545879023728, |
| "learning_rate": 5.454686841572487e-07, |
| "loss": 1.6949, |
| "step": 1535 |
| }, |
| { |
| "epoch": 0.2059901654000894, |
| "grad_norm": 1.1062297817819333, |
| "learning_rate": 5.454001087006563e-07, |
| "loss": 1.6879, |
| "step": 1536 |
| }, |
| { |
| "epoch": 0.2061242735806884, |
| "grad_norm": 1.5278212260735322, |
| "learning_rate": 5.453314950026572e-07, |
| "loss": 1.6452, |
| "step": 1537 |
| }, |
| { |
| "epoch": 0.20625838176128744, |
| "grad_norm": 1.1382568321141864, |
| "learning_rate": 5.452628430754329e-07, |
| "loss": 1.6296, |
| "step": 1538 |
| }, |
| { |
| "epoch": 0.20639248994188644, |
| "grad_norm": 1.0827447066590228, |
| "learning_rate": 5.451941529311719e-07, |
| "loss": 1.6213, |
| "step": 1539 |
| }, |
| { |
| "epoch": 0.20652659812248547, |
| "grad_norm": 1.090225177526994, |
| "learning_rate": 5.451254245820687e-07, |
| "loss": 1.7525, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.2066607063030845, |
| "grad_norm": 1.1632282700056857, |
| "learning_rate": 5.450566580403255e-07, |
| "loss": 1.7183, |
| "step": 1541 |
| }, |
| { |
| "epoch": 0.2067948144836835, |
| "grad_norm": 1.0773895407601781, |
| "learning_rate": 5.449878533181507e-07, |
| "loss": 1.5786, |
| "step": 1542 |
| }, |
| { |
| "epoch": 0.20692892266428253, |
| "grad_norm": 1.1177081269020515, |
| "learning_rate": 5.449190104277597e-07, |
| "loss": 1.6153, |
| "step": 1543 |
| }, |
| { |
| "epoch": 0.20706303084488153, |
| "grad_norm": 1.0715060717734257, |
| "learning_rate": 5.448501293813747e-07, |
| "loss": 1.6768, |
| "step": 1544 |
| }, |
| { |
| "epoch": 0.20719713902548056, |
| "grad_norm": 1.0810287574993174, |
| "learning_rate": 5.447812101912244e-07, |
| "loss": 1.6401, |
| "step": 1545 |
| }, |
| { |
| "epoch": 0.20733124720607957, |
| "grad_norm": 1.130608952204106, |
| "learning_rate": 5.447122528695449e-07, |
| "loss": 1.6824, |
| "step": 1546 |
| }, |
| { |
| "epoch": 0.2074653553866786, |
| "grad_norm": 1.0467682842596422, |
| "learning_rate": 5.446432574285782e-07, |
| "loss": 1.6087, |
| "step": 1547 |
| }, |
| { |
| "epoch": 0.2075994635672776, |
| "grad_norm": 1.139618228642282, |
| "learning_rate": 5.445742238805737e-07, |
| "loss": 1.7645, |
| "step": 1548 |
| }, |
| { |
| "epoch": 0.20773357174787663, |
| "grad_norm": 1.1216742451759847, |
| "learning_rate": 5.445051522377873e-07, |
| "loss": 1.7316, |
| "step": 1549 |
| }, |
| { |
| "epoch": 0.20786767992847563, |
| "grad_norm": 1.0899102977167905, |
| "learning_rate": 5.44436042512482e-07, |
| "loss": 1.6322, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.20800178810907466, |
| "grad_norm": 1.0497718485142342, |
| "learning_rate": 5.44366894716927e-07, |
| "loss": 1.6566, |
| "step": 1551 |
| }, |
| { |
| "epoch": 0.20813589628967366, |
| "grad_norm": 1.0712432967566454, |
| "learning_rate": 5.442977088633988e-07, |
| "loss": 1.6461, |
| "step": 1552 |
| }, |
| { |
| "epoch": 0.2082700044702727, |
| "grad_norm": 1.1933916778735016, |
| "learning_rate": 5.442284849641803e-07, |
| "loss": 1.7043, |
| "step": 1553 |
| }, |
| { |
| "epoch": 0.2084041126508717, |
| "grad_norm": 1.0126599257311222, |
| "learning_rate": 5.441592230315611e-07, |
| "loss": 1.6054, |
| "step": 1554 |
| }, |
| { |
| "epoch": 0.20853822083147072, |
| "grad_norm": 1.3982183722799013, |
| "learning_rate": 5.440899230778381e-07, |
| "loss": 1.6898, |
| "step": 1555 |
| }, |
| { |
| "epoch": 0.20867232901206972, |
| "grad_norm": 1.056858598949215, |
| "learning_rate": 5.440205851153145e-07, |
| "loss": 1.6916, |
| "step": 1556 |
| }, |
| { |
| "epoch": 0.20880643719266875, |
| "grad_norm": 1.176924033372761, |
| "learning_rate": 5.439512091563e-07, |
| "loss": 1.7511, |
| "step": 1557 |
| }, |
| { |
| "epoch": 0.20894054537326778, |
| "grad_norm": 1.057297882595847, |
| "learning_rate": 5.438817952131117e-07, |
| "loss": 1.6588, |
| "step": 1558 |
| }, |
| { |
| "epoch": 0.20907465355386678, |
| "grad_norm": 1.0967767040598801, |
| "learning_rate": 5.43812343298073e-07, |
| "loss": 1.6058, |
| "step": 1559 |
| }, |
| { |
| "epoch": 0.20920876173446581, |
| "grad_norm": 1.0935764349197725, |
| "learning_rate": 5.437428534235142e-07, |
| "loss": 1.7097, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.20934286991506482, |
| "grad_norm": 1.1271250900157348, |
| "learning_rate": 5.436733256017723e-07, |
| "loss": 1.6236, |
| "step": 1561 |
| }, |
| { |
| "epoch": 0.20947697809566385, |
| "grad_norm": 1.1745352200541934, |
| "learning_rate": 5.43603759845191e-07, |
| "loss": 1.6031, |
| "step": 1562 |
| }, |
| { |
| "epoch": 0.20961108627626285, |
| "grad_norm": 1.110585453023522, |
| "learning_rate": 5.435341561661208e-07, |
| "loss": 1.6934, |
| "step": 1563 |
| }, |
| { |
| "epoch": 0.20974519445686188, |
| "grad_norm": 1.1030892366405238, |
| "learning_rate": 5.434645145769189e-07, |
| "loss": 1.6745, |
| "step": 1564 |
| }, |
| { |
| "epoch": 0.20987930263746088, |
| "grad_norm": 1.0681781865728208, |
| "learning_rate": 5.433948350899491e-07, |
| "loss": 1.6327, |
| "step": 1565 |
| }, |
| { |
| "epoch": 0.2100134108180599, |
| "grad_norm": 1.1588290451716836, |
| "learning_rate": 5.433251177175822e-07, |
| "loss": 1.6737, |
| "step": 1566 |
| }, |
| { |
| "epoch": 0.2101475189986589, |
| "grad_norm": 1.055357765245883, |
| "learning_rate": 5.432553624721957e-07, |
| "loss": 1.6018, |
| "step": 1567 |
| }, |
| { |
| "epoch": 0.21028162717925794, |
| "grad_norm": 1.2241168862848832, |
| "learning_rate": 5.431855693661734e-07, |
| "loss": 1.6702, |
| "step": 1568 |
| }, |
| { |
| "epoch": 0.21041573535985694, |
| "grad_norm": 1.0592720322600389, |
| "learning_rate": 5.431157384119064e-07, |
| "loss": 1.6243, |
| "step": 1569 |
| }, |
| { |
| "epoch": 0.21054984354045597, |
| "grad_norm": 1.0780860574912356, |
| "learning_rate": 5.43045869621792e-07, |
| "loss": 1.5921, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.21068395172105497, |
| "grad_norm": 1.0964102584808006, |
| "learning_rate": 5.429759630082348e-07, |
| "loss": 1.6461, |
| "step": 1571 |
| }, |
| { |
| "epoch": 0.210818059901654, |
| "grad_norm": 1.135891674611892, |
| "learning_rate": 5.429060185836456e-07, |
| "loss": 1.6602, |
| "step": 1572 |
| }, |
| { |
| "epoch": 0.210952168082253, |
| "grad_norm": 1.104678715415077, |
| "learning_rate": 5.42836036360442e-07, |
| "loss": 1.5908, |
| "step": 1573 |
| }, |
| { |
| "epoch": 0.21108627626285204, |
| "grad_norm": 1.1405223716065391, |
| "learning_rate": 5.427660163510486e-07, |
| "loss": 1.6062, |
| "step": 1574 |
| }, |
| { |
| "epoch": 0.21122038444345104, |
| "grad_norm": 1.055115497261772, |
| "learning_rate": 5.426959585678964e-07, |
| "loss": 1.614, |
| "step": 1575 |
| }, |
| { |
| "epoch": 0.21135449262405007, |
| "grad_norm": 1.0866284593737212, |
| "learning_rate": 5.426258630234232e-07, |
| "loss": 1.623, |
| "step": 1576 |
| }, |
| { |
| "epoch": 0.2114886008046491, |
| "grad_norm": 1.1082738074471385, |
| "learning_rate": 5.425557297300736e-07, |
| "loss": 1.6905, |
| "step": 1577 |
| }, |
| { |
| "epoch": 0.2116227089852481, |
| "grad_norm": 1.0561977130172522, |
| "learning_rate": 5.424855587002988e-07, |
| "loss": 1.7265, |
| "step": 1578 |
| }, |
| { |
| "epoch": 0.21175681716584713, |
| "grad_norm": 1.111034072593952, |
| "learning_rate": 5.424153499465566e-07, |
| "loss": 1.5797, |
| "step": 1579 |
| }, |
| { |
| "epoch": 0.21189092534644613, |
| "grad_norm": 1.110485425151033, |
| "learning_rate": 5.42345103481312e-07, |
| "loss": 1.7321, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.21202503352704516, |
| "grad_norm": 1.057458554660141, |
| "learning_rate": 5.42274819317036e-07, |
| "loss": 1.6052, |
| "step": 1581 |
| }, |
| { |
| "epoch": 0.21215914170764416, |
| "grad_norm": 1.0759547522338926, |
| "learning_rate": 5.422044974662066e-07, |
| "loss": 1.5403, |
| "step": 1582 |
| }, |
| { |
| "epoch": 0.2122932498882432, |
| "grad_norm": 1.09889881778652, |
| "learning_rate": 5.421341379413087e-07, |
| "loss": 1.6477, |
| "step": 1583 |
| }, |
| { |
| "epoch": 0.2124273580688422, |
| "grad_norm": 1.0824182868909191, |
| "learning_rate": 5.420637407548336e-07, |
| "loss": 1.6666, |
| "step": 1584 |
| }, |
| { |
| "epoch": 0.21256146624944122, |
| "grad_norm": 1.1246790227619754, |
| "learning_rate": 5.419933059192792e-07, |
| "loss": 1.7284, |
| "step": 1585 |
| }, |
| { |
| "epoch": 0.21269557443004022, |
| "grad_norm": 1.1784965009347046, |
| "learning_rate": 5.419228334471505e-07, |
| "loss": 1.6751, |
| "step": 1586 |
| }, |
| { |
| "epoch": 0.21282968261063925, |
| "grad_norm": 1.0981401155317758, |
| "learning_rate": 5.418523233509588e-07, |
| "loss": 1.5569, |
| "step": 1587 |
| }, |
| { |
| "epoch": 0.21296379079123826, |
| "grad_norm": 1.059671249600233, |
| "learning_rate": 5.417817756432223e-07, |
| "loss": 1.6094, |
| "step": 1588 |
| }, |
| { |
| "epoch": 0.21309789897183729, |
| "grad_norm": 1.0850751309161322, |
| "learning_rate": 5.417111903364658e-07, |
| "loss": 1.6205, |
| "step": 1589 |
| }, |
| { |
| "epoch": 0.2132320071524363, |
| "grad_norm": 1.1513764671534936, |
| "learning_rate": 5.416405674432208e-07, |
| "loss": 1.6778, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.21336611533303532, |
| "grad_norm": 1.0380273585127677, |
| "learning_rate": 5.415699069760254e-07, |
| "loss": 1.6195, |
| "step": 1591 |
| }, |
| { |
| "epoch": 0.21350022351363432, |
| "grad_norm": 1.166702747823365, |
| "learning_rate": 5.414992089474245e-07, |
| "loss": 1.6814, |
| "step": 1592 |
| }, |
| { |
| "epoch": 0.21363433169423335, |
| "grad_norm": 1.1893324397979834, |
| "learning_rate": 5.414284733699695e-07, |
| "loss": 1.773, |
| "step": 1593 |
| }, |
| { |
| "epoch": 0.21376843987483238, |
| "grad_norm": 1.1127641897298384, |
| "learning_rate": 5.413577002562186e-07, |
| "loss": 1.7076, |
| "step": 1594 |
| }, |
| { |
| "epoch": 0.21390254805543138, |
| "grad_norm": 1.080383382708094, |
| "learning_rate": 5.412868896187365e-07, |
| "loss": 1.7324, |
| "step": 1595 |
| }, |
| { |
| "epoch": 0.2140366562360304, |
| "grad_norm": 1.0952540724207267, |
| "learning_rate": 5.412160414700948e-07, |
| "loss": 1.7437, |
| "step": 1596 |
| }, |
| { |
| "epoch": 0.2141707644166294, |
| "grad_norm": 1.153542257175551, |
| "learning_rate": 5.411451558228716e-07, |
| "loss": 1.7386, |
| "step": 1597 |
| }, |
| { |
| "epoch": 0.21430487259722844, |
| "grad_norm": 1.111562609679836, |
| "learning_rate": 5.410742326896519e-07, |
| "loss": 1.6339, |
| "step": 1598 |
| }, |
| { |
| "epoch": 0.21443898077782744, |
| "grad_norm": 1.0752256282606487, |
| "learning_rate": 5.410032720830268e-07, |
| "loss": 1.6502, |
| "step": 1599 |
| }, |
| { |
| "epoch": 0.21457308895842647, |
| "grad_norm": 1.1124138961511616, |
| "learning_rate": 5.409322740155947e-07, |
| "loss": 1.6977, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.21470719713902547, |
| "grad_norm": 1.1079557958778445, |
| "learning_rate": 5.408612384999601e-07, |
| "loss": 1.752, |
| "step": 1601 |
| }, |
| { |
| "epoch": 0.2148413053196245, |
| "grad_norm": 1.0753628455770323, |
| "learning_rate": 5.407901655487346e-07, |
| "loss": 1.6314, |
| "step": 1602 |
| }, |
| { |
| "epoch": 0.2149754135002235, |
| "grad_norm": 1.083459999091914, |
| "learning_rate": 5.407190551745362e-07, |
| "loss": 1.6034, |
| "step": 1603 |
| }, |
| { |
| "epoch": 0.21510952168082254, |
| "grad_norm": 1.0970151998487565, |
| "learning_rate": 5.406479073899896e-07, |
| "loss": 1.6246, |
| "step": 1604 |
| }, |
| { |
| "epoch": 0.21524362986142154, |
| "grad_norm": 1.0937201976032398, |
| "learning_rate": 5.405767222077262e-07, |
| "loss": 1.7172, |
| "step": 1605 |
| }, |
| { |
| "epoch": 0.21537773804202057, |
| "grad_norm": 1.0450933325728613, |
| "learning_rate": 5.405054996403838e-07, |
| "loss": 1.6418, |
| "step": 1606 |
| }, |
| { |
| "epoch": 0.21551184622261957, |
| "grad_norm": 1.1080460169200497, |
| "learning_rate": 5.40434239700607e-07, |
| "loss": 1.5472, |
| "step": 1607 |
| }, |
| { |
| "epoch": 0.2156459544032186, |
| "grad_norm": 1.1272243483080113, |
| "learning_rate": 5.403629424010473e-07, |
| "loss": 1.6365, |
| "step": 1608 |
| }, |
| { |
| "epoch": 0.2157800625838176, |
| "grad_norm": 1.0764797457941864, |
| "learning_rate": 5.402916077543625e-07, |
| "loss": 1.6407, |
| "step": 1609 |
| }, |
| { |
| "epoch": 0.21591417076441663, |
| "grad_norm": 1.113524889126991, |
| "learning_rate": 5.402202357732169e-07, |
| "loss": 1.6827, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.21604827894501566, |
| "grad_norm": 1.0108430825355625, |
| "learning_rate": 5.40148826470282e-07, |
| "loss": 1.6089, |
| "step": 1611 |
| }, |
| { |
| "epoch": 0.21618238712561466, |
| "grad_norm": 1.0591615486944377, |
| "learning_rate": 5.400773798582352e-07, |
| "loss": 1.6503, |
| "step": 1612 |
| }, |
| { |
| "epoch": 0.2163164953062137, |
| "grad_norm": 1.0340063487662052, |
| "learning_rate": 5.400058959497611e-07, |
| "loss": 1.6383, |
| "step": 1613 |
| }, |
| { |
| "epoch": 0.2164506034868127, |
| "grad_norm": 1.1516572358715267, |
| "learning_rate": 5.399343747575507e-07, |
| "loss": 1.6974, |
| "step": 1614 |
| }, |
| { |
| "epoch": 0.21658471166741172, |
| "grad_norm": 1.0592103543406746, |
| "learning_rate": 5.398628162943016e-07, |
| "loss": 1.6353, |
| "step": 1615 |
| }, |
| { |
| "epoch": 0.21671881984801072, |
| "grad_norm": 1.0385313908985447, |
| "learning_rate": 5.39791220572718e-07, |
| "loss": 1.6162, |
| "step": 1616 |
| }, |
| { |
| "epoch": 0.21685292802860975, |
| "grad_norm": 1.2744072569777416, |
| "learning_rate": 5.397195876055107e-07, |
| "loss": 1.6091, |
| "step": 1617 |
| }, |
| { |
| "epoch": 0.21698703620920876, |
| "grad_norm": 1.1238614219371639, |
| "learning_rate": 5.396479174053974e-07, |
| "loss": 1.6806, |
| "step": 1618 |
| }, |
| { |
| "epoch": 0.21712114438980779, |
| "grad_norm": 1.1243988511377025, |
| "learning_rate": 5.39576209985102e-07, |
| "loss": 1.6404, |
| "step": 1619 |
| }, |
| { |
| "epoch": 0.2172552525704068, |
| "grad_norm": 1.110274303539327, |
| "learning_rate": 5.395044653573553e-07, |
| "loss": 1.7572, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.21738936075100582, |
| "grad_norm": 1.485784445158895, |
| "learning_rate": 5.394326835348946e-07, |
| "loss": 1.6521, |
| "step": 1621 |
| }, |
| { |
| "epoch": 0.21752346893160482, |
| "grad_norm": 1.1075544133593012, |
| "learning_rate": 5.393608645304638e-07, |
| "loss": 1.6241, |
| "step": 1622 |
| }, |
| { |
| "epoch": 0.21765757711220385, |
| "grad_norm": 1.1036354518105045, |
| "learning_rate": 5.392890083568133e-07, |
| "loss": 1.7734, |
| "step": 1623 |
| }, |
| { |
| "epoch": 0.21779168529280285, |
| "grad_norm": 1.1528361438777202, |
| "learning_rate": 5.392171150267002e-07, |
| "loss": 1.6317, |
| "step": 1624 |
| }, |
| { |
| "epoch": 0.21792579347340188, |
| "grad_norm": 1.093945976907915, |
| "learning_rate": 5.391451845528883e-07, |
| "loss": 1.6645, |
| "step": 1625 |
| }, |
| { |
| "epoch": 0.21805990165400088, |
| "grad_norm": 1.0725853841774324, |
| "learning_rate": 5.390732169481478e-07, |
| "loss": 1.6491, |
| "step": 1626 |
| }, |
| { |
| "epoch": 0.2181940098345999, |
| "grad_norm": 1.1106862604843828, |
| "learning_rate": 5.390012122252557e-07, |
| "loss": 1.6931, |
| "step": 1627 |
| }, |
| { |
| "epoch": 0.21832811801519894, |
| "grad_norm": 1.2277327010437984, |
| "learning_rate": 5.389291703969954e-07, |
| "loss": 1.6584, |
| "step": 1628 |
| }, |
| { |
| "epoch": 0.21846222619579794, |
| "grad_norm": 1.1082783806832028, |
| "learning_rate": 5.388570914761571e-07, |
| "loss": 1.6083, |
| "step": 1629 |
| }, |
| { |
| "epoch": 0.21859633437639697, |
| "grad_norm": 1.0835070473943422, |
| "learning_rate": 5.387849754755371e-07, |
| "loss": 1.6693, |
| "step": 1630 |
| }, |
| { |
| "epoch": 0.21873044255699597, |
| "grad_norm": 1.0984810480873552, |
| "learning_rate": 5.38712822407939e-07, |
| "loss": 1.7465, |
| "step": 1631 |
| }, |
| { |
| "epoch": 0.218864550737595, |
| "grad_norm": 1.0824052521651053, |
| "learning_rate": 5.386406322861723e-07, |
| "loss": 1.6514, |
| "step": 1632 |
| }, |
| { |
| "epoch": 0.218998658918194, |
| "grad_norm": 1.1359714482507233, |
| "learning_rate": 5.385684051230537e-07, |
| "loss": 1.7069, |
| "step": 1633 |
| }, |
| { |
| "epoch": 0.21913276709879304, |
| "grad_norm": 1.1071556040519455, |
| "learning_rate": 5.384961409314061e-07, |
| "loss": 1.7147, |
| "step": 1634 |
| }, |
| { |
| "epoch": 0.21926687527939204, |
| "grad_norm": 1.2083127255075479, |
| "learning_rate": 5.384238397240588e-07, |
| "loss": 1.6825, |
| "step": 1635 |
| }, |
| { |
| "epoch": 0.21940098345999107, |
| "grad_norm": 1.090487031491975, |
| "learning_rate": 5.383515015138481e-07, |
| "loss": 1.6754, |
| "step": 1636 |
| }, |
| { |
| "epoch": 0.21953509164059007, |
| "grad_norm": 1.1766814612885304, |
| "learning_rate": 5.382791263136168e-07, |
| "loss": 1.6694, |
| "step": 1637 |
| }, |
| { |
| "epoch": 0.2196691998211891, |
| "grad_norm": 1.122843389486521, |
| "learning_rate": 5.382067141362139e-07, |
| "loss": 1.6044, |
| "step": 1638 |
| }, |
| { |
| "epoch": 0.2198033080017881, |
| "grad_norm": 1.223339411577744, |
| "learning_rate": 5.381342649944952e-07, |
| "loss": 1.6101, |
| "step": 1639 |
| }, |
| { |
| "epoch": 0.21993741618238713, |
| "grad_norm": 1.0694591790206647, |
| "learning_rate": 5.380617789013233e-07, |
| "loss": 1.6867, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.22007152436298613, |
| "grad_norm": 1.2184481374104812, |
| "learning_rate": 5.379892558695671e-07, |
| "loss": 1.8251, |
| "step": 1641 |
| }, |
| { |
| "epoch": 0.22020563254358516, |
| "grad_norm": 1.144903181431307, |
| "learning_rate": 5.37916695912102e-07, |
| "loss": 1.6531, |
| "step": 1642 |
| }, |
| { |
| "epoch": 0.22033974072418416, |
| "grad_norm": 1.0887276474568761, |
| "learning_rate": 5.378440990418099e-07, |
| "loss": 1.6042, |
| "step": 1643 |
| }, |
| { |
| "epoch": 0.2204738489047832, |
| "grad_norm": 1.0674234053275629, |
| "learning_rate": 5.377714652715797e-07, |
| "loss": 1.6711, |
| "step": 1644 |
| }, |
| { |
| "epoch": 0.2206079570853822, |
| "grad_norm": 1.0790696186946844, |
| "learning_rate": 5.376987946143065e-07, |
| "loss": 1.6381, |
| "step": 1645 |
| }, |
| { |
| "epoch": 0.22074206526598122, |
| "grad_norm": 1.1045544089627806, |
| "learning_rate": 5.376260870828918e-07, |
| "loss": 1.6532, |
| "step": 1646 |
| }, |
| { |
| "epoch": 0.22087617344658025, |
| "grad_norm": 1.1325732851922752, |
| "learning_rate": 5.375533426902441e-07, |
| "loss": 1.698, |
| "step": 1647 |
| }, |
| { |
| "epoch": 0.22101028162717926, |
| "grad_norm": 1.1364383071296065, |
| "learning_rate": 5.37480561449278e-07, |
| "loss": 1.6822, |
| "step": 1648 |
| }, |
| { |
| "epoch": 0.22114438980777829, |
| "grad_norm": 1.2662493806229793, |
| "learning_rate": 5.374077433729149e-07, |
| "loss": 1.6811, |
| "step": 1649 |
| }, |
| { |
| "epoch": 0.2212784979883773, |
| "grad_norm": 1.0631367908379292, |
| "learning_rate": 5.373348884740827e-07, |
| "loss": 1.6659, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.22141260616897632, |
| "grad_norm": 1.041940858543604, |
| "learning_rate": 5.372619967657157e-07, |
| "loss": 1.6331, |
| "step": 1651 |
| }, |
| { |
| "epoch": 0.22154671434957532, |
| "grad_norm": 1.1280546628953805, |
| "learning_rate": 5.37189068260755e-07, |
| "loss": 1.56, |
| "step": 1652 |
| }, |
| { |
| "epoch": 0.22168082253017435, |
| "grad_norm": 1.1849258060825412, |
| "learning_rate": 5.371161029721481e-07, |
| "loss": 1.7092, |
| "step": 1653 |
| }, |
| { |
| "epoch": 0.22181493071077335, |
| "grad_norm": 1.049528339776241, |
| "learning_rate": 5.370431009128489e-07, |
| "loss": 1.6428, |
| "step": 1654 |
| }, |
| { |
| "epoch": 0.22194903889137238, |
| "grad_norm": 1.0820046738092695, |
| "learning_rate": 5.36970062095818e-07, |
| "loss": 1.7025, |
| "step": 1655 |
| }, |
| { |
| "epoch": 0.22208314707197138, |
| "grad_norm": 1.154353230216256, |
| "learning_rate": 5.368969865340224e-07, |
| "loss": 1.6826, |
| "step": 1656 |
| }, |
| { |
| "epoch": 0.2222172552525704, |
| "grad_norm": 1.053650977152218, |
| "learning_rate": 5.368238742404357e-07, |
| "loss": 1.6172, |
| "step": 1657 |
| }, |
| { |
| "epoch": 0.2223513634331694, |
| "grad_norm": 1.1279575224119966, |
| "learning_rate": 5.367507252280381e-07, |
| "loss": 1.6856, |
| "step": 1658 |
| }, |
| { |
| "epoch": 0.22248547161376844, |
| "grad_norm": 1.084009451627439, |
| "learning_rate": 5.36677539509816e-07, |
| "loss": 1.7398, |
| "step": 1659 |
| }, |
| { |
| "epoch": 0.22261957979436744, |
| "grad_norm": 1.1545149862581074, |
| "learning_rate": 5.366043170987628e-07, |
| "loss": 1.7321, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.22275368797496647, |
| "grad_norm": 1.1304140083027916, |
| "learning_rate": 5.365310580078781e-07, |
| "loss": 1.773, |
| "step": 1661 |
| }, |
| { |
| "epoch": 0.22288779615556548, |
| "grad_norm": 1.0642630051886424, |
| "learning_rate": 5.364577622501681e-07, |
| "loss": 1.711, |
| "step": 1662 |
| }, |
| { |
| "epoch": 0.2230219043361645, |
| "grad_norm": 1.040347865228387, |
| "learning_rate": 5.363844298386453e-07, |
| "loss": 1.631, |
| "step": 1663 |
| }, |
| { |
| "epoch": 0.22315601251676354, |
| "grad_norm": 1.0625862966142028, |
| "learning_rate": 5.36311060786329e-07, |
| "loss": 1.7056, |
| "step": 1664 |
| }, |
| { |
| "epoch": 0.22329012069736254, |
| "grad_norm": 1.051398453698011, |
| "learning_rate": 5.36237655106245e-07, |
| "loss": 1.5779, |
| "step": 1665 |
| }, |
| { |
| "epoch": 0.22342422887796157, |
| "grad_norm": 1.0373708741511485, |
| "learning_rate": 5.361642128114253e-07, |
| "loss": 1.6937, |
| "step": 1666 |
| }, |
| { |
| "epoch": 0.22355833705856057, |
| "grad_norm": 1.0970775365230832, |
| "learning_rate": 5.360907339149088e-07, |
| "loss": 1.7652, |
| "step": 1667 |
| }, |
| { |
| "epoch": 0.2236924452391596, |
| "grad_norm": 1.0939499626158076, |
| "learning_rate": 5.360172184297405e-07, |
| "loss": 1.7164, |
| "step": 1668 |
| }, |
| { |
| "epoch": 0.2238265534197586, |
| "grad_norm": 1.2815989841015132, |
| "learning_rate": 5.359436663689721e-07, |
| "loss": 1.6641, |
| "step": 1669 |
| }, |
| { |
| "epoch": 0.22396066160035763, |
| "grad_norm": 1.143698149806719, |
| "learning_rate": 5.358700777456621e-07, |
| "loss": 1.6344, |
| "step": 1670 |
| }, |
| { |
| "epoch": 0.22409476978095663, |
| "grad_norm": 1.1716879090974532, |
| "learning_rate": 5.357964525728747e-07, |
| "loss": 1.6979, |
| "step": 1671 |
| }, |
| { |
| "epoch": 0.22422887796155566, |
| "grad_norm": 1.063819709741502, |
| "learning_rate": 5.357227908636814e-07, |
| "loss": 1.624, |
| "step": 1672 |
| }, |
| { |
| "epoch": 0.22436298614215466, |
| "grad_norm": 1.2013467122145707, |
| "learning_rate": 5.356490926311598e-07, |
| "loss": 1.6952, |
| "step": 1673 |
| }, |
| { |
| "epoch": 0.2244970943227537, |
| "grad_norm": 1.0555387980604758, |
| "learning_rate": 5.355753578883939e-07, |
| "loss": 1.6313, |
| "step": 1674 |
| }, |
| { |
| "epoch": 0.2246312025033527, |
| "grad_norm": 1.0893242689976388, |
| "learning_rate": 5.355015866484744e-07, |
| "loss": 1.6749, |
| "step": 1675 |
| }, |
| { |
| "epoch": 0.22476531068395172, |
| "grad_norm": 1.1013312078930966, |
| "learning_rate": 5.354277789244984e-07, |
| "loss": 1.6346, |
| "step": 1676 |
| }, |
| { |
| "epoch": 0.22489941886455073, |
| "grad_norm": 1.0396725082524636, |
| "learning_rate": 5.353539347295696e-07, |
| "loss": 1.6516, |
| "step": 1677 |
| }, |
| { |
| "epoch": 0.22503352704514976, |
| "grad_norm": 1.1068093515212976, |
| "learning_rate": 5.352800540767978e-07, |
| "loss": 1.6229, |
| "step": 1678 |
| }, |
| { |
| "epoch": 0.22516763522574876, |
| "grad_norm": 1.0984721962823492, |
| "learning_rate": 5.352061369792997e-07, |
| "loss": 1.6208, |
| "step": 1679 |
| }, |
| { |
| "epoch": 0.2253017434063478, |
| "grad_norm": 1.0826869933413177, |
| "learning_rate": 5.351321834501981e-07, |
| "loss": 1.677, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.22543585158694682, |
| "grad_norm": 1.084000373067938, |
| "learning_rate": 5.350581935026227e-07, |
| "loss": 1.7401, |
| "step": 1681 |
| }, |
| { |
| "epoch": 0.22556995976754582, |
| "grad_norm": 1.0851285225408938, |
| "learning_rate": 5.349841671497093e-07, |
| "loss": 1.7231, |
| "step": 1682 |
| }, |
| { |
| "epoch": 0.22570406794814485, |
| "grad_norm": 1.1364065037848023, |
| "learning_rate": 5.349101044046004e-07, |
| "loss": 1.6977, |
| "step": 1683 |
| }, |
| { |
| "epoch": 0.22583817612874385, |
| "grad_norm": 1.1009000528239055, |
| "learning_rate": 5.348360052804447e-07, |
| "loss": 1.7396, |
| "step": 1684 |
| }, |
| { |
| "epoch": 0.22597228430934288, |
| "grad_norm": 1.0627127199486133, |
| "learning_rate": 5.347618697903976e-07, |
| "loss": 1.6, |
| "step": 1685 |
| }, |
| { |
| "epoch": 0.22610639248994188, |
| "grad_norm": 1.0936508465446555, |
| "learning_rate": 5.346876979476206e-07, |
| "loss": 1.6898, |
| "step": 1686 |
| }, |
| { |
| "epoch": 0.2262405006705409, |
| "grad_norm": 1.158039404421018, |
| "learning_rate": 5.346134897652824e-07, |
| "loss": 1.6173, |
| "step": 1687 |
| }, |
| { |
| "epoch": 0.2263746088511399, |
| "grad_norm": 1.1476901068480616, |
| "learning_rate": 5.345392452565574e-07, |
| "loss": 1.6939, |
| "step": 1688 |
| }, |
| { |
| "epoch": 0.22650871703173894, |
| "grad_norm": 1.1331738396979525, |
| "learning_rate": 5.344649644346266e-07, |
| "loss": 1.7156, |
| "step": 1689 |
| }, |
| { |
| "epoch": 0.22664282521233794, |
| "grad_norm": 1.0799876163240634, |
| "learning_rate": 5.343906473126778e-07, |
| "loss": 1.716, |
| "step": 1690 |
| }, |
| { |
| "epoch": 0.22677693339293697, |
| "grad_norm": 1.082964627665107, |
| "learning_rate": 5.343162939039048e-07, |
| "loss": 1.7274, |
| "step": 1691 |
| }, |
| { |
| "epoch": 0.22691104157353598, |
| "grad_norm": 1.0606670008679837, |
| "learning_rate": 5.342419042215082e-07, |
| "loss": 1.6872, |
| "step": 1692 |
| }, |
| { |
| "epoch": 0.227045149754135, |
| "grad_norm": 1.2139606651511192, |
| "learning_rate": 5.341674782786949e-07, |
| "loss": 1.6144, |
| "step": 1693 |
| }, |
| { |
| "epoch": 0.227179257934734, |
| "grad_norm": 1.1259721685135795, |
| "learning_rate": 5.340930160886783e-07, |
| "loss": 1.682, |
| "step": 1694 |
| }, |
| { |
| "epoch": 0.22731336611533304, |
| "grad_norm": 1.1971458828681856, |
| "learning_rate": 5.340185176646779e-07, |
| "loss": 1.666, |
| "step": 1695 |
| }, |
| { |
| "epoch": 0.22744747429593204, |
| "grad_norm": 1.0623938370168757, |
| "learning_rate": 5.339439830199201e-07, |
| "loss": 1.6716, |
| "step": 1696 |
| }, |
| { |
| "epoch": 0.22758158247653107, |
| "grad_norm": 1.0291752731398527, |
| "learning_rate": 5.338694121676374e-07, |
| "loss": 1.5643, |
| "step": 1697 |
| }, |
| { |
| "epoch": 0.2277156906571301, |
| "grad_norm": 1.073415400659899, |
| "learning_rate": 5.33794805121069e-07, |
| "loss": 1.7113, |
| "step": 1698 |
| }, |
| { |
| "epoch": 0.2278497988377291, |
| "grad_norm": 1.0719841904118037, |
| "learning_rate": 5.337201618934604e-07, |
| "loss": 1.6904, |
| "step": 1699 |
| }, |
| { |
| "epoch": 0.22798390701832813, |
| "grad_norm": 1.0589482779303245, |
| "learning_rate": 5.336454824980633e-07, |
| "loss": 1.6258, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.22811801519892713, |
| "grad_norm": 1.1032497481356218, |
| "learning_rate": 5.335707669481362e-07, |
| "loss": 1.6656, |
| "step": 1701 |
| }, |
| { |
| "epoch": 0.22825212337952616, |
| "grad_norm": 1.0840451749643811, |
| "learning_rate": 5.334960152569437e-07, |
| "loss": 1.5383, |
| "step": 1702 |
| }, |
| { |
| "epoch": 0.22838623156012516, |
| "grad_norm": 1.2721911706046112, |
| "learning_rate": 5.334212274377572e-07, |
| "loss": 1.6877, |
| "step": 1703 |
| }, |
| { |
| "epoch": 0.2285203397407242, |
| "grad_norm": 1.113467373081235, |
| "learning_rate": 5.333464035038541e-07, |
| "loss": 1.7795, |
| "step": 1704 |
| }, |
| { |
| "epoch": 0.2286544479213232, |
| "grad_norm": 1.0985371740747398, |
| "learning_rate": 5.332715434685184e-07, |
| "loss": 1.646, |
| "step": 1705 |
| }, |
| { |
| "epoch": 0.22878855610192222, |
| "grad_norm": 1.0986088766126445, |
| "learning_rate": 5.331966473450405e-07, |
| "loss": 1.7123, |
| "step": 1706 |
| }, |
| { |
| "epoch": 0.22892266428252123, |
| "grad_norm": 1.0916765886457365, |
| "learning_rate": 5.331217151467172e-07, |
| "loss": 1.6558, |
| "step": 1707 |
| }, |
| { |
| "epoch": 0.22905677246312026, |
| "grad_norm": 1.1105626967058537, |
| "learning_rate": 5.330467468868518e-07, |
| "loss": 1.6464, |
| "step": 1708 |
| }, |
| { |
| "epoch": 0.22919088064371926, |
| "grad_norm": 1.060186115294533, |
| "learning_rate": 5.329717425787539e-07, |
| "loss": 1.7554, |
| "step": 1709 |
| }, |
| { |
| "epoch": 0.2293249888243183, |
| "grad_norm": 1.1194774279858801, |
| "learning_rate": 5.328967022357393e-07, |
| "loss": 1.6726, |
| "step": 1710 |
| }, |
| { |
| "epoch": 0.2294590970049173, |
| "grad_norm": 1.04897630046238, |
| "learning_rate": 5.328216258711307e-07, |
| "loss": 1.658, |
| "step": 1711 |
| }, |
| { |
| "epoch": 0.22959320518551632, |
| "grad_norm": 1.0978402523327002, |
| "learning_rate": 5.327465134982568e-07, |
| "loss": 1.7228, |
| "step": 1712 |
| }, |
| { |
| "epoch": 0.22972731336611532, |
| "grad_norm": 1.0849254385283391, |
| "learning_rate": 5.326713651304527e-07, |
| "loss": 1.5941, |
| "step": 1713 |
| }, |
| { |
| "epoch": 0.22986142154671435, |
| "grad_norm": 1.1076316095810992, |
| "learning_rate": 5.3259618078106e-07, |
| "loss": 1.6087, |
| "step": 1714 |
| }, |
| { |
| "epoch": 0.22999552972731335, |
| "grad_norm": 1.173053113513891, |
| "learning_rate": 5.325209604634268e-07, |
| "loss": 1.6916, |
| "step": 1715 |
| }, |
| { |
| "epoch": 0.23012963790791238, |
| "grad_norm": 1.0524457049873044, |
| "learning_rate": 5.324457041909073e-07, |
| "loss": 1.7742, |
| "step": 1716 |
| }, |
| { |
| "epoch": 0.2302637460885114, |
| "grad_norm": 1.0634034874984304, |
| "learning_rate": 5.323704119768625e-07, |
| "loss": 1.676, |
| "step": 1717 |
| }, |
| { |
| "epoch": 0.2303978542691104, |
| "grad_norm": 1.1156008079132087, |
| "learning_rate": 5.322950838346592e-07, |
| "loss": 1.7271, |
| "step": 1718 |
| }, |
| { |
| "epoch": 0.23053196244970944, |
| "grad_norm": 1.1047727328230366, |
| "learning_rate": 5.322197197776711e-07, |
| "loss": 1.7865, |
| "step": 1719 |
| }, |
| { |
| "epoch": 0.23066607063030845, |
| "grad_norm": 1.027356701503526, |
| "learning_rate": 5.321443198192781e-07, |
| "loss": 1.709, |
| "step": 1720 |
| }, |
| { |
| "epoch": 0.23080017881090747, |
| "grad_norm": 1.136877539749875, |
| "learning_rate": 5.320688839728663e-07, |
| "loss": 1.6582, |
| "step": 1721 |
| }, |
| { |
| "epoch": 0.23093428699150648, |
| "grad_norm": 1.0127690499338695, |
| "learning_rate": 5.319934122518285e-07, |
| "loss": 1.7492, |
| "step": 1722 |
| }, |
| { |
| "epoch": 0.2310683951721055, |
| "grad_norm": 1.0939228317341436, |
| "learning_rate": 5.319179046695635e-07, |
| "loss": 1.5875, |
| "step": 1723 |
| }, |
| { |
| "epoch": 0.2312025033527045, |
| "grad_norm": 1.1310800565403134, |
| "learning_rate": 5.318423612394769e-07, |
| "loss": 1.6674, |
| "step": 1724 |
| }, |
| { |
| "epoch": 0.23133661153330354, |
| "grad_norm": 1.1687734972345458, |
| "learning_rate": 5.317667819749803e-07, |
| "loss": 1.6984, |
| "step": 1725 |
| }, |
| { |
| "epoch": 0.23147071971390254, |
| "grad_norm": 1.3079097416665406, |
| "learning_rate": 5.316911668894917e-07, |
| "loss": 1.7021, |
| "step": 1726 |
| }, |
| { |
| "epoch": 0.23160482789450157, |
| "grad_norm": 1.121551582881909, |
| "learning_rate": 5.316155159964357e-07, |
| "loss": 1.6389, |
| "step": 1727 |
| }, |
| { |
| "epoch": 0.23173893607510057, |
| "grad_norm": 1.110653445896344, |
| "learning_rate": 5.31539829309243e-07, |
| "loss": 1.6069, |
| "step": 1728 |
| }, |
| { |
| "epoch": 0.2318730442556996, |
| "grad_norm": 1.0532131317248028, |
| "learning_rate": 5.314641068413509e-07, |
| "loss": 1.6365, |
| "step": 1729 |
| }, |
| { |
| "epoch": 0.2320071524362986, |
| "grad_norm": 1.0606458320174244, |
| "learning_rate": 5.313883486062026e-07, |
| "loss": 1.7264, |
| "step": 1730 |
| }, |
| { |
| "epoch": 0.23214126061689763, |
| "grad_norm": 1.341898889664279, |
| "learning_rate": 5.313125546172484e-07, |
| "loss": 1.6649, |
| "step": 1731 |
| }, |
| { |
| "epoch": 0.23227536879749663, |
| "grad_norm": 1.1400544409976623, |
| "learning_rate": 5.312367248879441e-07, |
| "loss": 1.7331, |
| "step": 1732 |
| }, |
| { |
| "epoch": 0.23240947697809566, |
| "grad_norm": 1.0680650695769265, |
| "learning_rate": 5.311608594317525e-07, |
| "loss": 1.6919, |
| "step": 1733 |
| }, |
| { |
| "epoch": 0.2325435851586947, |
| "grad_norm": 1.1255461157368476, |
| "learning_rate": 5.310849582621425e-07, |
| "loss": 1.6049, |
| "step": 1734 |
| }, |
| { |
| "epoch": 0.2326776933392937, |
| "grad_norm": 1.1072444623083968, |
| "learning_rate": 5.310090213925891e-07, |
| "loss": 1.5269, |
| "step": 1735 |
| }, |
| { |
| "epoch": 0.23281180151989272, |
| "grad_norm": 1.0710603367422178, |
| "learning_rate": 5.309330488365741e-07, |
| "loss": 1.5994, |
| "step": 1736 |
| }, |
| { |
| "epoch": 0.23294590970049173, |
| "grad_norm": 1.0644784872053028, |
| "learning_rate": 5.308570406075853e-07, |
| "loss": 1.7374, |
| "step": 1737 |
| }, |
| { |
| "epoch": 0.23308001788109076, |
| "grad_norm": 1.1498695736382247, |
| "learning_rate": 5.307809967191172e-07, |
| "loss": 1.7718, |
| "step": 1738 |
| }, |
| { |
| "epoch": 0.23321412606168976, |
| "grad_norm": 1.1460626302338928, |
| "learning_rate": 5.307049171846698e-07, |
| "loss": 1.7527, |
| "step": 1739 |
| }, |
| { |
| "epoch": 0.2333482342422888, |
| "grad_norm": 1.0375010028149447, |
| "learning_rate": 5.306288020177507e-07, |
| "loss": 1.6096, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.2334823424228878, |
| "grad_norm": 1.0840298111802271, |
| "learning_rate": 5.305526512318727e-07, |
| "loss": 1.6765, |
| "step": 1741 |
| }, |
| { |
| "epoch": 0.23361645060348682, |
| "grad_norm": 1.175481103771977, |
| "learning_rate": 5.304764648405554e-07, |
| "loss": 1.6737, |
| "step": 1742 |
| }, |
| { |
| "epoch": 0.23375055878408582, |
| "grad_norm": 1.0760963915335215, |
| "learning_rate": 5.304002428573248e-07, |
| "loss": 1.6407, |
| "step": 1743 |
| }, |
| { |
| "epoch": 0.23388466696468485, |
| "grad_norm": 1.0391117459687709, |
| "learning_rate": 5.303239852957129e-07, |
| "loss": 1.7296, |
| "step": 1744 |
| }, |
| { |
| "epoch": 0.23401877514528385, |
| "grad_norm": 1.2433142693729942, |
| "learning_rate": 5.302476921692584e-07, |
| "loss": 1.6453, |
| "step": 1745 |
| }, |
| { |
| "epoch": 0.23415288332588288, |
| "grad_norm": 1.1097947586973798, |
| "learning_rate": 5.30171363491506e-07, |
| "loss": 1.6873, |
| "step": 1746 |
| }, |
| { |
| "epoch": 0.23428699150648188, |
| "grad_norm": 1.044700396070487, |
| "learning_rate": 5.30094999276007e-07, |
| "loss": 1.5877, |
| "step": 1747 |
| }, |
| { |
| "epoch": 0.2344210996870809, |
| "grad_norm": 1.1166075784138738, |
| "learning_rate": 5.300185995363186e-07, |
| "loss": 1.6547, |
| "step": 1748 |
| }, |
| { |
| "epoch": 0.23455520786767992, |
| "grad_norm": 1.1455525392590689, |
| "learning_rate": 5.299421642860049e-07, |
| "loss": 1.6328, |
| "step": 1749 |
| }, |
| { |
| "epoch": 0.23468931604827895, |
| "grad_norm": 1.0432073116091243, |
| "learning_rate": 5.298656935386355e-07, |
| "loss": 1.6934, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.23482342422887797, |
| "grad_norm": 1.301933185584584, |
| "learning_rate": 5.297891873077872e-07, |
| "loss": 1.6322, |
| "step": 1751 |
| }, |
| { |
| "epoch": 0.23495753240947698, |
| "grad_norm": 1.1184463227985266, |
| "learning_rate": 5.297126456070423e-07, |
| "loss": 1.5901, |
| "step": 1752 |
| }, |
| { |
| "epoch": 0.235091640590076, |
| "grad_norm": 1.0894760385328393, |
| "learning_rate": 5.296360684499899e-07, |
| "loss": 1.6307, |
| "step": 1753 |
| }, |
| { |
| "epoch": 0.235225748770675, |
| "grad_norm": 1.0810964826554634, |
| "learning_rate": 5.295594558502254e-07, |
| "loss": 1.671, |
| "step": 1754 |
| }, |
| { |
| "epoch": 0.23535985695127404, |
| "grad_norm": 1.0867830593910155, |
| "learning_rate": 5.2948280782135e-07, |
| "loss": 1.5898, |
| "step": 1755 |
| }, |
| { |
| "epoch": 0.23549396513187304, |
| "grad_norm": 1.0826732184990124, |
| "learning_rate": 5.29406124376972e-07, |
| "loss": 1.6753, |
| "step": 1756 |
| }, |
| { |
| "epoch": 0.23562807331247207, |
| "grad_norm": 1.1750857610640004, |
| "learning_rate": 5.29329405530705e-07, |
| "loss": 1.6238, |
| "step": 1757 |
| }, |
| { |
| "epoch": 0.23576218149307107, |
| "grad_norm": 1.145244574282678, |
| "learning_rate": 5.292526512961698e-07, |
| "loss": 1.7374, |
| "step": 1758 |
| }, |
| { |
| "epoch": 0.2358962896736701, |
| "grad_norm": 1.0998728885819122, |
| "learning_rate": 5.291758616869928e-07, |
| "loss": 1.7178, |
| "step": 1759 |
| }, |
| { |
| "epoch": 0.2360303978542691, |
| "grad_norm": 1.122069140362572, |
| "learning_rate": 5.290990367168073e-07, |
| "loss": 1.634, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.23616450603486813, |
| "grad_norm": 1.1231670039812451, |
| "learning_rate": 5.290221763992522e-07, |
| "loss": 1.6238, |
| "step": 1761 |
| }, |
| { |
| "epoch": 0.23629861421546713, |
| "grad_norm": 1.0647516707650018, |
| "learning_rate": 5.289452807479734e-07, |
| "loss": 1.6579, |
| "step": 1762 |
| }, |
| { |
| "epoch": 0.23643272239606616, |
| "grad_norm": 1.2107894163734518, |
| "learning_rate": 5.288683497766222e-07, |
| "loss": 1.7207, |
| "step": 1763 |
| }, |
| { |
| "epoch": 0.23656683057666517, |
| "grad_norm": 1.1025744988730661, |
| "learning_rate": 5.287913834988569e-07, |
| "loss": 1.7006, |
| "step": 1764 |
| }, |
| { |
| "epoch": 0.2367009387572642, |
| "grad_norm": 1.0797524236014637, |
| "learning_rate": 5.287143819283421e-07, |
| "loss": 1.7584, |
| "step": 1765 |
| }, |
| { |
| "epoch": 0.2368350469378632, |
| "grad_norm": 1.0751286199968113, |
| "learning_rate": 5.286373450787481e-07, |
| "loss": 1.5611, |
| "step": 1766 |
| }, |
| { |
| "epoch": 0.23696915511846223, |
| "grad_norm": 1.0636517626500344, |
| "learning_rate": 5.285602729637518e-07, |
| "loss": 1.6433, |
| "step": 1767 |
| }, |
| { |
| "epoch": 0.23710326329906126, |
| "grad_norm": 1.048651758235017, |
| "learning_rate": 5.284831655970363e-07, |
| "loss": 1.6267, |
| "step": 1768 |
| }, |
| { |
| "epoch": 0.23723737147966026, |
| "grad_norm": 1.0862538156700035, |
| "learning_rate": 5.28406022992291e-07, |
| "loss": 1.591, |
| "step": 1769 |
| }, |
| { |
| "epoch": 0.2373714796602593, |
| "grad_norm": 1.112560210549691, |
| "learning_rate": 5.283288451632116e-07, |
| "loss": 1.6387, |
| "step": 1770 |
| }, |
| { |
| "epoch": 0.2375055878408583, |
| "grad_norm": 1.163175696596488, |
| "learning_rate": 5.282516321235001e-07, |
| "loss": 1.8051, |
| "step": 1771 |
| }, |
| { |
| "epoch": 0.23763969602145732, |
| "grad_norm": 1.112481677106296, |
| "learning_rate": 5.281743838868644e-07, |
| "loss": 1.5411, |
| "step": 1772 |
| }, |
| { |
| "epoch": 0.23777380420205632, |
| "grad_norm": 1.1911416700291582, |
| "learning_rate": 5.28097100467019e-07, |
| "loss": 1.6194, |
| "step": 1773 |
| }, |
| { |
| "epoch": 0.23790791238265535, |
| "grad_norm": 1.0990682965946412, |
| "learning_rate": 5.280197818776845e-07, |
| "loss": 1.6605, |
| "step": 1774 |
| }, |
| { |
| "epoch": 0.23804202056325435, |
| "grad_norm": 1.0591136451690275, |
| "learning_rate": 5.279424281325878e-07, |
| "loss": 1.6389, |
| "step": 1775 |
| }, |
| { |
| "epoch": 0.23817612874385338, |
| "grad_norm": 1.0683888995182673, |
| "learning_rate": 5.278650392454621e-07, |
| "loss": 1.6092, |
| "step": 1776 |
| }, |
| { |
| "epoch": 0.23831023692445238, |
| "grad_norm": 1.1224739302408693, |
| "learning_rate": 5.277876152300467e-07, |
| "loss": 1.6494, |
| "step": 1777 |
| }, |
| { |
| "epoch": 0.23844434510505141, |
| "grad_norm": 1.0723497695462585, |
| "learning_rate": 5.27710156100087e-07, |
| "loss": 1.7937, |
| "step": 1778 |
| }, |
| { |
| "epoch": 0.23857845328565042, |
| "grad_norm": 1.1351190756385903, |
| "learning_rate": 5.276326618693352e-07, |
| "loss": 1.7266, |
| "step": 1779 |
| }, |
| { |
| "epoch": 0.23871256146624945, |
| "grad_norm": 1.0579576318516895, |
| "learning_rate": 5.275551325515491e-07, |
| "loss": 1.6662, |
| "step": 1780 |
| }, |
| { |
| "epoch": 0.23884666964684845, |
| "grad_norm": 1.1337655082128173, |
| "learning_rate": 5.27477568160493e-07, |
| "loss": 1.6656, |
| "step": 1781 |
| }, |
| { |
| "epoch": 0.23898077782744748, |
| "grad_norm": 1.3625169955042795, |
| "learning_rate": 5.273999687099377e-07, |
| "loss": 1.6154, |
| "step": 1782 |
| }, |
| { |
| "epoch": 0.23911488600804648, |
| "grad_norm": 1.0606076186008175, |
| "learning_rate": 5.273223342136596e-07, |
| "loss": 1.6295, |
| "step": 1783 |
| }, |
| { |
| "epoch": 0.2392489941886455, |
| "grad_norm": 3.7952746706102753, |
| "learning_rate": 5.27244664685442e-07, |
| "loss": 1.593, |
| "step": 1784 |
| }, |
| { |
| "epoch": 0.2393831023692445, |
| "grad_norm": 1.1015598004917457, |
| "learning_rate": 5.271669601390737e-07, |
| "loss": 1.659, |
| "step": 1785 |
| }, |
| { |
| "epoch": 0.23951721054984354, |
| "grad_norm": 1.1429465431928834, |
| "learning_rate": 5.270892205883503e-07, |
| "loss": 1.7055, |
| "step": 1786 |
| }, |
| { |
| "epoch": 0.23965131873044257, |
| "grad_norm": 1.1572569512743107, |
| "learning_rate": 5.270114460470735e-07, |
| "loss": 1.75, |
| "step": 1787 |
| }, |
| { |
| "epoch": 0.23978542691104157, |
| "grad_norm": 1.1342505841464177, |
| "learning_rate": 5.269336365290511e-07, |
| "loss": 1.692, |
| "step": 1788 |
| }, |
| { |
| "epoch": 0.2399195350916406, |
| "grad_norm": 1.1491667363729234, |
| "learning_rate": 5.268557920480969e-07, |
| "loss": 1.6956, |
| "step": 1789 |
| }, |
| { |
| "epoch": 0.2400536432722396, |
| "grad_norm": 1.1290663441601718, |
| "learning_rate": 5.267779126180313e-07, |
| "loss": 1.7194, |
| "step": 1790 |
| }, |
| { |
| "epoch": 0.24018775145283863, |
| "grad_norm": 1.1068721597891535, |
| "learning_rate": 5.26699998252681e-07, |
| "loss": 1.6775, |
| "step": 1791 |
| }, |
| { |
| "epoch": 0.24032185963343763, |
| "grad_norm": 1.0965127649518425, |
| "learning_rate": 5.266220489658783e-07, |
| "loss": 1.7381, |
| "step": 1792 |
| }, |
| { |
| "epoch": 0.24045596781403666, |
| "grad_norm": 1.0539192312552248, |
| "learning_rate": 5.265440647714622e-07, |
| "loss": 1.6916, |
| "step": 1793 |
| }, |
| { |
| "epoch": 0.24059007599463567, |
| "grad_norm": 1.3925405964228643, |
| "learning_rate": 5.264660456832777e-07, |
| "loss": 1.6934, |
| "step": 1794 |
| }, |
| { |
| "epoch": 0.2407241841752347, |
| "grad_norm": 1.0796598245896871, |
| "learning_rate": 5.263879917151761e-07, |
| "loss": 1.6891, |
| "step": 1795 |
| }, |
| { |
| "epoch": 0.2408582923558337, |
| "grad_norm": 1.0549168383726284, |
| "learning_rate": 5.263099028810148e-07, |
| "loss": 1.6417, |
| "step": 1796 |
| }, |
| { |
| "epoch": 0.24099240053643273, |
| "grad_norm": 1.0854208022859217, |
| "learning_rate": 5.262317791946574e-07, |
| "loss": 1.6132, |
| "step": 1797 |
| }, |
| { |
| "epoch": 0.24112650871703173, |
| "grad_norm": 1.1038896542176981, |
| "learning_rate": 5.261536206699738e-07, |
| "loss": 1.6074, |
| "step": 1798 |
| }, |
| { |
| "epoch": 0.24126061689763076, |
| "grad_norm": 1.0646960968846464, |
| "learning_rate": 5.2607542732084e-07, |
| "loss": 1.601, |
| "step": 1799 |
| }, |
| { |
| "epoch": 0.24139472507822976, |
| "grad_norm": 1.1557060399556212, |
| "learning_rate": 5.259971991611381e-07, |
| "loss": 1.7684, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.2415288332588288, |
| "grad_norm": 1.0313305926934546, |
| "learning_rate": 5.259189362047565e-07, |
| "loss": 1.6322, |
| "step": 1801 |
| }, |
| { |
| "epoch": 0.2416629414394278, |
| "grad_norm": 1.0974406411588324, |
| "learning_rate": 5.258406384655897e-07, |
| "loss": 1.6857, |
| "step": 1802 |
| }, |
| { |
| "epoch": 0.24179704962002682, |
| "grad_norm": 1.1146673930740303, |
| "learning_rate": 5.257623059575385e-07, |
| "loss": 1.6456, |
| "step": 1803 |
| }, |
| { |
| "epoch": 0.24193115780062585, |
| "grad_norm": 1.0970256705246042, |
| "learning_rate": 5.256839386945097e-07, |
| "loss": 1.7583, |
| "step": 1804 |
| }, |
| { |
| "epoch": 0.24206526598122485, |
| "grad_norm": 1.107274760930789, |
| "learning_rate": 5.256055366904164e-07, |
| "loss": 1.6586, |
| "step": 1805 |
| }, |
| { |
| "epoch": 0.24219937416182388, |
| "grad_norm": 1.1073843937392611, |
| "learning_rate": 5.255270999591779e-07, |
| "loss": 1.7062, |
| "step": 1806 |
| }, |
| { |
| "epoch": 0.24233348234242288, |
| "grad_norm": 1.0566525499472572, |
| "learning_rate": 5.254486285147196e-07, |
| "loss": 1.6526, |
| "step": 1807 |
| }, |
| { |
| "epoch": 0.24246759052302191, |
| "grad_norm": 1.1537228290096582, |
| "learning_rate": 5.253701223709729e-07, |
| "loss": 1.6933, |
| "step": 1808 |
| }, |
| { |
| "epoch": 0.24260169870362092, |
| "grad_norm": 1.0990727257935735, |
| "learning_rate": 5.252915815418755e-07, |
| "loss": 1.7125, |
| "step": 1809 |
| }, |
| { |
| "epoch": 0.24273580688421995, |
| "grad_norm": 1.244262115292612, |
| "learning_rate": 5.252130060413716e-07, |
| "loss": 1.6264, |
| "step": 1810 |
| }, |
| { |
| "epoch": 0.24286991506481895, |
| "grad_norm": 1.1688493530359219, |
| "learning_rate": 5.251343958834107e-07, |
| "loss": 1.6785, |
| "step": 1811 |
| }, |
| { |
| "epoch": 0.24300402324541798, |
| "grad_norm": 1.2285366933673156, |
| "learning_rate": 5.250557510819494e-07, |
| "loss": 1.572, |
| "step": 1812 |
| }, |
| { |
| "epoch": 0.24313813142601698, |
| "grad_norm": 1.1296607396854323, |
| "learning_rate": 5.249770716509499e-07, |
| "loss": 1.6761, |
| "step": 1813 |
| }, |
| { |
| "epoch": 0.243272239606616, |
| "grad_norm": 1.1537668172261726, |
| "learning_rate": 5.248983576043808e-07, |
| "loss": 1.6839, |
| "step": 1814 |
| }, |
| { |
| "epoch": 0.243406347787215, |
| "grad_norm": 1.2774536095786413, |
| "learning_rate": 5.248196089562165e-07, |
| "loss": 1.6752, |
| "step": 1815 |
| }, |
| { |
| "epoch": 0.24354045596781404, |
| "grad_norm": 1.0391234761075887, |
| "learning_rate": 5.247408257204379e-07, |
| "loss": 1.713, |
| "step": 1816 |
| }, |
| { |
| "epoch": 0.24367456414841304, |
| "grad_norm": 1.1351662284778345, |
| "learning_rate": 5.24662007911032e-07, |
| "loss": 1.741, |
| "step": 1817 |
| }, |
| { |
| "epoch": 0.24380867232901207, |
| "grad_norm": 1.101327635041692, |
| "learning_rate": 5.245831555419915e-07, |
| "loss": 1.6196, |
| "step": 1818 |
| }, |
| { |
| "epoch": 0.24394278050961107, |
| "grad_norm": 1.0713266982503056, |
| "learning_rate": 5.24504268627316e-07, |
| "loss": 1.6454, |
| "step": 1819 |
| }, |
| { |
| "epoch": 0.2440768886902101, |
| "grad_norm": 1.1530834766346107, |
| "learning_rate": 5.244253471810106e-07, |
| "loss": 1.7217, |
| "step": 1820 |
| }, |
| { |
| "epoch": 0.24421099687080913, |
| "grad_norm": 1.121128499361746, |
| "learning_rate": 5.243463912170868e-07, |
| "loss": 1.635, |
| "step": 1821 |
| }, |
| { |
| "epoch": 0.24434510505140813, |
| "grad_norm": 1.1890728819475802, |
| "learning_rate": 5.242674007495621e-07, |
| "loss": 1.6498, |
| "step": 1822 |
| }, |
| { |
| "epoch": 0.24447921323200716, |
| "grad_norm": 1.0869958269746995, |
| "learning_rate": 5.241883757924604e-07, |
| "loss": 1.6685, |
| "step": 1823 |
| }, |
| { |
| "epoch": 0.24461332141260617, |
| "grad_norm": 1.072161128457571, |
| "learning_rate": 5.241093163598111e-07, |
| "loss": 1.613, |
| "step": 1824 |
| }, |
| { |
| "epoch": 0.2447474295932052, |
| "grad_norm": 1.0697959147126053, |
| "learning_rate": 5.240302224656507e-07, |
| "loss": 1.7839, |
| "step": 1825 |
| }, |
| { |
| "epoch": 0.2448815377738042, |
| "grad_norm": 1.0447563021570512, |
| "learning_rate": 5.239510941240209e-07, |
| "loss": 1.553, |
| "step": 1826 |
| }, |
| { |
| "epoch": 0.24501564595440323, |
| "grad_norm": 1.1246283994835846, |
| "learning_rate": 5.2387193134897e-07, |
| "loss": 1.7167, |
| "step": 1827 |
| }, |
| { |
| "epoch": 0.24514975413500223, |
| "grad_norm": 1.0539923982868098, |
| "learning_rate": 5.237927341545521e-07, |
| "loss": 1.6228, |
| "step": 1828 |
| }, |
| { |
| "epoch": 0.24528386231560126, |
| "grad_norm": 1.1056807313462267, |
| "learning_rate": 5.23713502554828e-07, |
| "loss": 1.6631, |
| "step": 1829 |
| }, |
| { |
| "epoch": 0.24541797049620026, |
| "grad_norm": 1.1081084022345968, |
| "learning_rate": 5.236342365638638e-07, |
| "loss": 1.7182, |
| "step": 1830 |
| }, |
| { |
| "epoch": 0.2455520786767993, |
| "grad_norm": 1.1259734401016548, |
| "learning_rate": 5.235549361957323e-07, |
| "loss": 1.6281, |
| "step": 1831 |
| }, |
| { |
| "epoch": 0.2456861868573983, |
| "grad_norm": 1.073575909581403, |
| "learning_rate": 5.234756014645123e-07, |
| "loss": 1.7089, |
| "step": 1832 |
| }, |
| { |
| "epoch": 0.24582029503799732, |
| "grad_norm": 1.182395764700481, |
| "learning_rate": 5.233962323842885e-07, |
| "loss": 1.6138, |
| "step": 1833 |
| }, |
| { |
| "epoch": 0.24595440321859632, |
| "grad_norm": 1.067652195605279, |
| "learning_rate": 5.233168289691518e-07, |
| "loss": 1.6409, |
| "step": 1834 |
| }, |
| { |
| "epoch": 0.24608851139919535, |
| "grad_norm": 1.0539945315127641, |
| "learning_rate": 5.232373912331994e-07, |
| "loss": 1.6632, |
| "step": 1835 |
| }, |
| { |
| "epoch": 0.24622261957979436, |
| "grad_norm": 1.1353497557175543, |
| "learning_rate": 5.231579191905341e-07, |
| "loss": 1.6481, |
| "step": 1836 |
| }, |
| { |
| "epoch": 0.24635672776039338, |
| "grad_norm": 1.0518079931176558, |
| "learning_rate": 5.230784128552653e-07, |
| "loss": 1.641, |
| "step": 1837 |
| }, |
| { |
| "epoch": 0.24649083594099241, |
| "grad_norm": 1.068415705515305, |
| "learning_rate": 5.229988722415082e-07, |
| "loss": 1.706, |
| "step": 1838 |
| }, |
| { |
| "epoch": 0.24662494412159142, |
| "grad_norm": 1.128403860172621, |
| "learning_rate": 5.229192973633844e-07, |
| "loss": 1.6095, |
| "step": 1839 |
| }, |
| { |
| "epoch": 0.24675905230219045, |
| "grad_norm": 1.069414952826673, |
| "learning_rate": 5.22839688235021e-07, |
| "loss": 1.6543, |
| "step": 1840 |
| }, |
| { |
| "epoch": 0.24689316048278945, |
| "grad_norm": 1.0821194973907244, |
| "learning_rate": 5.227600448705517e-07, |
| "loss": 1.556, |
| "step": 1841 |
| }, |
| { |
| "epoch": 0.24702726866338848, |
| "grad_norm": 1.084344318240152, |
| "learning_rate": 5.226803672841162e-07, |
| "loss": 1.6034, |
| "step": 1842 |
| }, |
| { |
| "epoch": 0.24716137684398748, |
| "grad_norm": 1.1202391548493928, |
| "learning_rate": 5.226006554898601e-07, |
| "loss": 1.6966, |
| "step": 1843 |
| }, |
| { |
| "epoch": 0.2472954850245865, |
| "grad_norm": 1.0911354590278528, |
| "learning_rate": 5.225209095019351e-07, |
| "loss": 1.6948, |
| "step": 1844 |
| }, |
| { |
| "epoch": 0.2474295932051855, |
| "grad_norm": 1.1062195036954834, |
| "learning_rate": 5.224411293344992e-07, |
| "loss": 1.5054, |
| "step": 1845 |
| }, |
| { |
| "epoch": 0.24756370138578454, |
| "grad_norm": 1.0581940583028457, |
| "learning_rate": 5.223613150017162e-07, |
| "loss": 1.6027, |
| "step": 1846 |
| }, |
| { |
| "epoch": 0.24769780956638354, |
| "grad_norm": 1.0564622037081781, |
| "learning_rate": 5.22281466517756e-07, |
| "loss": 1.6139, |
| "step": 1847 |
| }, |
| { |
| "epoch": 0.24783191774698257, |
| "grad_norm": 1.0965905968449954, |
| "learning_rate": 5.222015838967948e-07, |
| "loss": 1.6531, |
| "step": 1848 |
| }, |
| { |
| "epoch": 0.24796602592758157, |
| "grad_norm": 1.1162216415234159, |
| "learning_rate": 5.221216671530146e-07, |
| "loss": 1.6434, |
| "step": 1849 |
| }, |
| { |
| "epoch": 0.2481001341081806, |
| "grad_norm": 1.0760593930698765, |
| "learning_rate": 5.220417163006035e-07, |
| "loss": 1.7068, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.2482342422887796, |
| "grad_norm": 1.3461498868058117, |
| "learning_rate": 5.219617313537557e-07, |
| "loss": 1.6895, |
| "step": 1851 |
| }, |
| { |
| "epoch": 0.24836835046937863, |
| "grad_norm": 1.116707873551399, |
| "learning_rate": 5.218817123266716e-07, |
| "loss": 1.6986, |
| "step": 1852 |
| }, |
| { |
| "epoch": 0.24850245864997764, |
| "grad_norm": 1.0874229858859366, |
| "learning_rate": 5.218016592335574e-07, |
| "loss": 1.696, |
| "step": 1853 |
| }, |
| { |
| "epoch": 0.24863656683057667, |
| "grad_norm": 1.2149675834773461, |
| "learning_rate": 5.217215720886254e-07, |
| "loss": 1.6334, |
| "step": 1854 |
| }, |
| { |
| "epoch": 0.24877067501117567, |
| "grad_norm": 1.0673684982385807, |
| "learning_rate": 5.21641450906094e-07, |
| "loss": 1.6445, |
| "step": 1855 |
| }, |
| { |
| "epoch": 0.2489047831917747, |
| "grad_norm": 1.0639747826797143, |
| "learning_rate": 5.215612957001879e-07, |
| "loss": 1.7352, |
| "step": 1856 |
| }, |
| { |
| "epoch": 0.24903889137237373, |
| "grad_norm": 1.1955320747693832, |
| "learning_rate": 5.214811064851373e-07, |
| "loss": 1.6991, |
| "step": 1857 |
| }, |
| { |
| "epoch": 0.24917299955297273, |
| "grad_norm": 1.1925934103789766, |
| "learning_rate": 5.214008832751788e-07, |
| "loss": 1.6421, |
| "step": 1858 |
| }, |
| { |
| "epoch": 0.24930710773357176, |
| "grad_norm": 1.152167600482823, |
| "learning_rate": 5.21320626084555e-07, |
| "loss": 1.6614, |
| "step": 1859 |
| }, |
| { |
| "epoch": 0.24944121591417076, |
| "grad_norm": 1.115689117193753, |
| "learning_rate": 5.212403349275145e-07, |
| "loss": 1.67, |
| "step": 1860 |
| }, |
| { |
| "epoch": 0.2495753240947698, |
| "grad_norm": 1.0409261709804483, |
| "learning_rate": 5.211600098183119e-07, |
| "loss": 1.5712, |
| "step": 1861 |
| }, |
| { |
| "epoch": 0.2497094322753688, |
| "grad_norm": 1.1645359690711583, |
| "learning_rate": 5.210796507712078e-07, |
| "loss": 1.6747, |
| "step": 1862 |
| }, |
| { |
| "epoch": 0.24984354045596782, |
| "grad_norm": 1.1220835902669124, |
| "learning_rate": 5.209992578004688e-07, |
| "loss": 1.6994, |
| "step": 1863 |
| }, |
| { |
| "epoch": 0.24997764863656682, |
| "grad_norm": 1.0574464835321717, |
| "learning_rate": 5.209188309203678e-07, |
| "loss": 1.6434, |
| "step": 1864 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 7456, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 1864, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 498613144780800.0, |
| "train_batch_size": 3, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|