Upload exp_phase8_bridge_full_20260503_014256/log.jsonl with huggingface_hub
Browse files
exp_phase8_bridge_full_20260503_014256/log.jsonl
ADDED
|
@@ -0,0 +1,120 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"step": 0, "loss": 1.3350088596343994, "loss_mdlm": 1.3350088596343994, "lr": 0.0, "gnorm": 0.0008631343953311443, "bridge_gate_avg": 1.0, "bridge_out_proj_avg": 0.0, "elapsed_s": 3.226933717727661}
|
| 2 |
+
{"step": 25, "loss": 0.929928183555603, "loss_mdlm": 0.929928183555603, "lr": 6.25e-06, "gnorm": 0.017237037420272827, "bridge_gate_avg": 1.0000213384628296, "bridge_out_proj_avg": 2.3359855276794406e-05, "elapsed_s": 9.73179316520691}
|
| 3 |
+
{"step": 50, "loss": 1.4356095790863037, "loss_mdlm": 1.4356095790863037, "lr": 1.25e-05, "gnorm": 0.001643141731619835, "bridge_gate_avg": 1.0001156628131866, "bridge_out_proj_avg": 5.6675614359846804e-05, "elapsed_s": 16.02883291244507}
|
| 4 |
+
{"step": 75, "loss": 1.3385487794876099, "loss_mdlm": 1.3385487794876099, "lr": 1.8750000000000002e-05, "gnorm": 0.001847691717557609, "bridge_gate_avg": 1.00046044588089, "bridge_out_proj_avg": 0.00014194700634106994, "elapsed_s": 23.01852297782898}
|
| 5 |
+
{"step": 100, "loss": 1.349244236946106, "loss_mdlm": 1.349244236946106, "lr": 2.5e-05, "gnorm": 0.004260350484400988, "bridge_gate_avg": 1.0008547604084015, "bridge_out_proj_avg": 0.0002922099665738642, "elapsed_s": 29.830164909362793}
|
| 6 |
+
{"step": 125, "loss": 1.3196439743041992, "loss_mdlm": 1.3196439743041992, "lr": 3.125e-05, "gnorm": 0.0023719817399978638, "bridge_gate_avg": 1.0014528930187225, "bridge_out_proj_avg": 0.0005089049809612334, "elapsed_s": 36.31120443344116}
|
| 7 |
+
{"step": 150, "loss": 1.2959538698196411, "loss_mdlm": 1.2959538698196411, "lr": 3.7500000000000003e-05, "gnorm": 0.003303286386653781, "bridge_gate_avg": 1.002147376537323, "bridge_out_proj_avg": 0.0008533164655091241, "elapsed_s": 43.1836621761322}
|
| 8 |
+
{"step": 175, "loss": 1.3438066244125366, "loss_mdlm": 1.3438066244125366, "lr": 4.375e-05, "gnorm": 0.010145519860088825, "bridge_gate_avg": 1.002306967973709, "bridge_out_proj_avg": 0.0009551272814860567, "elapsed_s": 49.61898708343506}
|
| 9 |
+
{"step": 200, "loss": 1.3186841011047363, "loss_mdlm": 1.3186841011047363, "lr": 5e-05, "gnorm": 0.0017559075495228171, "bridge_gate_avg": 1.002426952123642, "bridge_out_proj_avg": 0.0010368003859184682, "elapsed_s": 56.71516227722168}
|
| 10 |
+
{"step": 225, "loss": 1.3411145210266113, "loss_mdlm": 1.3411145210266113, "lr": 4.999016565957633e-05, "gnorm": 0.01379581168293953, "bridge_gate_avg": 1.0025576949119568, "bridge_out_proj_avg": 0.0010985640401486307, "elapsed_s": 66.12661576271057}
|
| 11 |
+
{"step": 250, "loss": 1.3307899236679077, "loss_mdlm": 1.3307899236679077, "lr": 4.996067037544542e-05, "gnorm": 0.006714193616062403, "bridge_gate_avg": 1.0029683113098145, "bridge_out_proj_avg": 0.001207420020364225, "elapsed_s": 73.51633501052856}
|
| 12 |
+
{"step": 275, "loss": 1.3643791675567627, "loss_mdlm": 1.3643791675567627, "lr": 4.991153735294049e-05, "gnorm": 0.006569570861756802, "bridge_gate_avg": 1.0034425854682922, "bridge_out_proj_avg": 0.001301914278883487, "elapsed_s": 81.63057279586792}
|
| 13 |
+
{"step": 300, "loss": 1.3039356470108032, "loss_mdlm": 1.3039356470108032, "lr": 4.984280524733107e-05, "gnorm": 0.004216927103698254, "bridge_gate_avg": 1.0037167370319366, "bridge_out_proj_avg": 0.0013725196768064052, "elapsed_s": 89.55094051361084}
|
| 14 |
+
{"step": 325, "loss": 1.3282833099365234, "loss_mdlm": 1.3282833099365234, "lr": 4.975452813341114e-05, "gnorm": 0.007178005762398243, "bridge_gate_avg": 1.0037755966186523, "bridge_out_proj_avg": 0.0014118208491709083, "elapsed_s": 97.61864042282104}
|
| 15 |
+
{"step": 350, "loss": 1.3189504146575928, "loss_mdlm": 1.3189504146575928, "lr": 4.96467754629559e-05, "gnorm": 0.008438174612820148, "bridge_gate_avg": 1.00400048494339, "bridge_out_proj_avg": 0.0014430019946303219, "elapsed_s": 105.43219900131226}
|
| 16 |
+
{"step": 375, "loss": 1.285767674446106, "loss_mdlm": 1.285767674446106, "lr": 4.951963201008076e-05, "gnorm": 0.03572671860456467, "bridge_gate_avg": 1.0045197010040283, "bridge_out_proj_avg": 0.0014854204200673848, "elapsed_s": 113.48263096809387}
|
| 17 |
+
{"step": 400, "loss": 1.3450173139572144, "loss_mdlm": 1.3450173139572144, "lr": 4.937319780454559e-05, "gnorm": 0.012070699594914913, "bridge_gate_avg": 1.004637062549591, "bridge_out_proj_avg": 0.0015024780586827546, "elapsed_s": 121.28699278831482}
|
| 18 |
+
{"step": 425, "loss": 1.3963861465454102, "loss_mdlm": 1.3963861465454102, "lr": 4.9207588053056545e-05, "gnorm": 0.013354994356632233, "bridge_gate_avg": 1.0049593448638916, "bridge_out_proj_avg": 0.0015367259038612247, "elapsed_s": 129.39036345481873}
|
| 19 |
+
{"step": 450, "loss": 1.3203352689743042, "loss_mdlm": 1.3203352689743042, "lr": 4.9022933048627496e-05, "gnorm": 0.005471639335155487, "bridge_gate_avg": 1.0053081810474396, "bridge_out_proj_avg": 0.0015683616802562028, "elapsed_s": 137.49937558174133}
|
| 20 |
+
{"step": 475, "loss": 1.4292761087417603, "loss_mdlm": 1.4292761087417603, "lr": 4.881937806807241e-05, "gnorm": 0.015253941528499126, "bridge_gate_avg": 1.0056167244911194, "bridge_out_proj_avg": 0.0015903632738627493, "elapsed_s": 145.53031277656555}
|
| 21 |
+
{"step": 500, "loss": 1.3266990184783936, "loss_mdlm": 1.3266990184783936, "lr": 4.8597083257709194e-05, "gnorm": 0.01448027603328228, "bridge_gate_avg": 1.0057570934295654, "bridge_out_proj_avg": 0.0016160650120582432, "elapsed_s": 153.50053715705872}
|
| 22 |
+
{"step": 525, "loss": 1.3573657274246216, "loss_mdlm": 1.3573657274246216, "lr": 4.8356223507364996e-05, "gnorm": 0.009991348721086979, "bridge_gate_avg": 1.0062089264392853, "bridge_out_proj_avg": 0.0016517920012120157, "elapsed_s": 161.34812927246094}
|
| 23 |
+
{"step": 550, "loss": 1.3651301860809326, "loss_mdlm": 1.3651301860809326, "lr": 4.8096988312782174e-05, "gnorm": 0.05209238827228546, "bridge_gate_avg": 1.0065991282463074, "bridge_out_proj_avg": 0.0016935844614636153, "elapsed_s": 169.41809129714966}
|
| 24 |
+
{"step": 575, "loss": 1.3364684581756592, "loss_mdlm": 1.3364684581756592, "lr": 4.781958162653297e-05, "gnorm": 0.02638976275920868, "bridge_gate_avg": 1.0066078007221222, "bridge_out_proj_avg": 0.0017061419202946126, "elapsed_s": 177.1807987689972}
|
| 25 |
+
{"step": 600, "loss": 1.447853446006775, "loss_mdlm": 1.447853446006775, "lr": 4.752422169756048e-05, "gnorm": 0.02364787459373474, "bridge_gate_avg": 1.0067238807678223, "bridge_out_proj_avg": 0.00172378309071064, "elapsed_s": 185.2514624595642}
|
| 26 |
+
{"step": 625, "loss": 1.3136425018310547, "loss_mdlm": 1.3136425018310547, "lr": 4.721114089947181e-05, "gnorm": 0.0313662551343441, "bridge_gate_avg": 1.0069412887096405, "bridge_out_proj_avg": 0.0017510619363747537, "elapsed_s": 193.24636483192444}
|
| 27 |
+
{"step": 650, "loss": 1.3367892503738403, "loss_mdlm": 1.3367892503738403, "lr": 4.6880585547718845e-05, "gnorm": 0.03535309061408043, "bridge_gate_avg": 1.0070651471614838, "bridge_out_proj_avg": 0.001771737850503996, "elapsed_s": 201.31983184814453}
|
| 28 |
+
{"step": 675, "loss": 1.3426743745803833, "loss_mdlm": 1.3426743745803833, "lr": 4.653281570581023e-05, "gnorm": 0.008329181000590324, "bridge_gate_avg": 1.0070849657058716, "bridge_out_proj_avg": 0.0017827361007221043, "elapsed_s": 209.2542700767517}
|
| 29 |
+
{"step": 700, "loss": 1.3496735095977783, "loss_mdlm": 1.3496735095977783, "lr": 4.6168104980707107e-05, "gnorm": 0.007046983577311039, "bridge_gate_avg": 1.0070365369319916, "bridge_out_proj_avg": 0.0017873407632578164, "elapsed_s": 217.2925419807434}
|
| 30 |
+
{"step": 725, "loss": 1.3336832523345947, "loss_mdlm": 1.3336832523345947, "lr": 4.5786740307563636e-05, "gnorm": 0.024803245440125465, "bridge_gate_avg": 1.0072059631347656, "bridge_out_proj_avg": 0.001807853695936501, "elapsed_s": 225.362407207489}
|
| 31 |
+
{"step": 750, "loss": 1.330244779586792, "loss_mdlm": 1.330244779586792, "lr": 4.538902172398151e-05, "gnorm": 0.04239260032773018, "bridge_gate_avg": 1.0073598325252533, "bridge_out_proj_avg": 0.001821638026740402, "elapsed_s": 233.31029152870178}
|
| 32 |
+
{"step": 775, "loss": 1.2574050426483154, "loss_mdlm": 1.2574050426483154, "lr": 4.497526213395623e-05, "gnorm": 0.03416886553168297, "bridge_gate_avg": 1.007559061050415, "bridge_out_proj_avg": 0.001841113728005439, "elapsed_s": 241.26887798309326}
|
| 33 |
+
{"step": 800, "loss": 1.3081117868423462, "loss_mdlm": 1.3081117868423462, "lr": 4.454578706170075e-05, "gnorm": 0.008946528658270836, "bridge_gate_avg": 1.007555216550827, "bridge_out_proj_avg": 0.0018441084830556065, "elapsed_s": 249.18443274497986}
|
| 34 |
+
{"step": 825, "loss": 1.290758728981018, "loss_mdlm": 1.290758728981018, "lr": 4.410093439554019e-05, "gnorm": 0.03172270581126213, "bridge_gate_avg": 1.00770503282547, "bridge_out_proj_avg": 0.0018642670765984803, "elapsed_s": 257.5404088497162}
|
| 35 |
+
{"step": 850, "loss": 1.4003592729568481, "loss_mdlm": 1.4003592729568481, "lr": 4.364105412207914e-05, "gnorm": 0.009220565669238567, "bridge_gate_avg": 1.0078321695327759, "bridge_out_proj_avg": 0.0018810196779668331, "elapsed_s": 265.5530424118042}
|
| 36 |
+
{"step": 875, "loss": 1.3349496126174927, "loss_mdlm": 1.3349496126174927, "lr": 4.316650805085068e-05, "gnorm": 0.023226410150527954, "bridge_gate_avg": 1.0078971982002258, "bridge_out_proj_avg": 0.0018922605668194592, "elapsed_s": 273.7016909122467}
|
| 37 |
+
{"step": 900, "loss": 1.3511741161346436, "loss_mdlm": 1.3511741161346436, "lr": 4.267766952966369e-05, "gnorm": 0.015365976840257645, "bridge_gate_avg": 1.007929265499115, "bridge_out_proj_avg": 0.001901545183500275, "elapsed_s": 281.76511120796204}
|
| 38 |
+
{"step": 925, "loss": 1.3086625337600708, "loss_mdlm": 1.3086625337600708, "lr": 4.2174923150872544e-05, "gnorm": 0.02252776175737381, "bridge_gate_avg": 1.0080341398715973, "bridge_out_proj_avg": 0.0019094951858278364, "elapsed_s": 289.6709420681}
|
| 39 |
+
{"step": 950, "loss": 1.4201890230178833, "loss_mdlm": 1.4201890230178833, "lr": 4.16586644488001e-05, "gnorm": 0.018138449639081955, "bridge_gate_avg": 1.0081234574317932, "bridge_out_proj_avg": 0.001915277389343828, "elapsed_s": 297.6339020729065}
|
| 40 |
+
{"step": 975, "loss": 1.3405823707580566, "loss_mdlm": 1.3405823707580566, "lr": 4.1129299588552193e-05, "gnorm": 0.08290347456932068, "bridge_gate_avg": 1.0082698464393616, "bridge_out_proj_avg": 0.0019222536939196289, "elapsed_s": 305.6311445236206}
|
| 41 |
+
{"step": 1000, "loss": 1.326568603515625, "loss_mdlm": 1.326568603515625, "lr": 4.058724504646834e-05, "gnorm": 0.02022627182304859, "bridge_gate_avg": 1.0083131790161133, "bridge_out_proj_avg": 0.0019307156035210937, "elapsed_s": 313.7615716457367}
|
| 42 |
+
{"step": 1025, "loss": 1.3022381067276, "loss_mdlm": 1.3022381067276, "lr": 4.0032927282460146e-05, "gnorm": 0.01202322170138359, "bridge_gate_avg": 1.0082152783870697, "bridge_out_proj_avg": 0.0019274639489594847, "elapsed_s": 321.6638813018799}
|
| 43 |
+
{"step": 1050, "loss": 1.4113538265228271, "loss_mdlm": 1.4113538265228271, "lr": 3.946678240449515e-05, "gnorm": 0.11893358081579208, "bridge_gate_avg": 1.008417010307312, "bridge_out_proj_avg": 0.0019450518302619457, "elapsed_s": 329.662202835083}
|
| 44 |
+
{"step": 1075, "loss": 1.3235492706298828, "loss_mdlm": 1.3235492706298828, "lr": 3.888925582549006e-05, "gnorm": 0.017372669652104378, "bridge_gate_avg": 1.0084069669246674, "bridge_out_proj_avg": 0.0019462393538560718, "elapsed_s": 337.73804473876953}
|
| 45 |
+
{"step": 1100, "loss": 1.3197993040084839, "loss_mdlm": 1.3197993040084839, "lr": 3.830080191288342e-05, "gnorm": 0.03074599616229534, "bridge_gate_avg": 1.008537918329239, "bridge_out_proj_avg": 0.0019542801310308278, "elapsed_s": 345.81507539749146}
|
| 46 |
+
{"step": 1125, "loss": 1.3331305980682373, "loss_mdlm": 1.3331305980682373, "lr": 3.770188363116324e-05, "gnorm": 0.01479267980903387, "bridge_gate_avg": 1.008726805448532, "bridge_out_proj_avg": 0.001966973824892193, "elapsed_s": 354.07041668891907}
|
| 47 |
+
{"step": 1150, "loss": 1.3314369916915894, "loss_mdlm": 1.3314369916915894, "lr": 3.7092972177631e-05, "gnorm": 0.03399093821644783, "bridge_gate_avg": 1.0087586045265198, "bridge_out_proj_avg": 0.0019767061749007553, "elapsed_s": 361.7839365005493}
|
| 48 |
+
{"step": 1175, "loss": 1.329616904258728, "loss_mdlm": 1.329616904258728, "lr": 3.6474546611688445e-05, "gnorm": 0.012316204607486725, "bridge_gate_avg": 1.0089160799980164, "bridge_out_proj_avg": 0.001994571619434282, "elapsed_s": 369.9421286582947}
|
| 49 |
+
{"step": 1200, "loss": 1.3491356372833252, "loss_mdlm": 1.3491356372833252, "lr": 3.5847093477938956e-05, "gnorm": 0.028923826292157173, "bridge_gate_avg": 1.008913516998291, "bridge_out_proj_avg": 0.001993294368730858, "elapsed_s": 377.8282697200775}
|
| 50 |
+
{"step": 1225, "loss": 1.3353400230407715, "loss_mdlm": 1.3353400230407715, "lr": 3.521110642339991e-05, "gnorm": 0.01674208603799343, "bridge_gate_avg": 1.0089710354804993, "bridge_out_proj_avg": 0.001995297963730991, "elapsed_s": 385.7952802181244}
|
| 51 |
+
{"step": 1250, "loss": 1.3067643642425537, "loss_mdlm": 1.3067643642425537, "lr": 3.456708580912725e-05, "gnorm": 0.05245716869831085, "bridge_gate_avg": 1.0091062188148499, "bridge_out_proj_avg": 0.0020037244830746204, "elapsed_s": 393.7378911972046}
|
| 52 |
+
{"step": 1275, "loss": 1.3215919733047485, "loss_mdlm": 1.3215919733047485, "lr": 3.391553831655782e-05, "gnorm": 0.011571672745049, "bridge_gate_avg": 1.0091639161109924, "bridge_out_proj_avg": 0.002009290619753301, "elapsed_s": 401.61874508857727}
|
| 53 |
+
{"step": 1300, "loss": 1.3171278238296509, "loss_mdlm": 1.3171278238296509, "lr": 3.3256976548879184e-05, "gnorm": 0.023397604003548622, "bridge_gate_avg": 1.0090708136558533, "bridge_out_proj_avg": 0.002008602023124695, "elapsed_s": 409.70656752586365}
|
| 54 |
+
{"step": 1325, "loss": 1.2777515649795532, "loss_mdlm": 1.2777515649795532, "lr": 3.259191862774037e-05, "gnorm": 0.021396463736891747, "bridge_gate_avg": 1.0091650187969208, "bridge_out_proj_avg": 0.002016930462559685, "elapsed_s": 417.63711810112}
|
| 55 |
+
{"step": 1350, "loss": 1.3175032138824463, "loss_mdlm": 1.3175032138824463, "lr": 3.1920887785621235e-05, "gnorm": 0.010335907340049744, "bridge_gate_avg": 1.0092079639434814, "bridge_out_proj_avg": 0.002016954211285338, "elapsed_s": 425.64473390579224}
|
| 56 |
+
{"step": 1375, "loss": 1.37645423412323, "loss_mdlm": 1.37645423412323, "lr": 3.1244411954180676e-05, "gnorm": 0.017561519518494606, "bridge_gate_avg": 1.0092639029026031, "bridge_out_proj_avg": 0.002021310501731932, "elapsed_s": 433.36111760139465}
|
| 57 |
+
{"step": 1400, "loss": 1.2301217317581177, "loss_mdlm": 1.2301217317581177, "lr": 3.056302334890786e-05, "gnorm": 0.02544678933918476, "bridge_gate_avg": 1.0094216167926788, "bridge_out_proj_avg": 0.0020260042510926723, "elapsed_s": 441.08074712753296}
|
| 58 |
+
{"step": 1425, "loss": 1.3045871257781982, "loss_mdlm": 1.3045871257781982, "lr": 2.9877258050403212e-05, "gnorm": 0.06973150372505188, "bridge_gate_avg": 1.0094327628612518, "bridge_out_proj_avg": 0.0020252145477570593, "elapsed_s": 449.31917667388916}
|
| 59 |
+
{"step": 1450, "loss": 1.414459228515625, "loss_mdlm": 1.414459228515625, "lr": 2.918765558261841e-05, "gnorm": 0.018355047330260277, "bridge_gate_avg": 1.0094902217388153, "bridge_out_proj_avg": 0.002033653756370768, "elapsed_s": 457.35597944259644}
|
| 60 |
+
{"step": 1475, "loss": 1.3544243574142456, "loss_mdlm": 1.3544243574142456, "lr": 2.849475848838749e-05, "gnorm": 0.032203685492277145, "bridge_gate_avg": 1.0096079111099243, "bridge_out_proj_avg": 0.002043275919277221, "elapsed_s": 465.46030831336975}
|
| 61 |
+
{"step": 1500, "loss": 1.3272844552993774, "loss_mdlm": 1.3272844552993774, "lr": 2.7799111902582696e-05, "gnorm": 0.014333972707390785, "bridge_gate_avg": 1.0097345113754272, "bridge_out_proj_avg": 0.002051292307442054, "elapsed_s": 473.3915824890137}
|
| 62 |
+
{"step": 1525, "loss": 1.3993922472000122, "loss_mdlm": 1.3993922472000122, "lr": 2.710126312323119e-05, "gnorm": 0.01684023253619671, "bridge_gate_avg": 1.0098722577095032, "bridge_out_proj_avg": 0.0020610830397345126, "elapsed_s": 481.464595079422}
|
| 63 |
+
{"step": 1550, "loss": 1.2656347751617432, "loss_mdlm": 1.2656347751617432, "lr": 2.6401761180929797e-05, "gnorm": 0.036580007523298264, "bridge_gate_avg": 1.0099718570709229, "bridge_out_proj_avg": 0.002067008666926995, "elapsed_s": 489.66298604011536}
|
| 64 |
+
{"step": 1575, "loss": 1.4179701805114746, "loss_mdlm": 1.4179701805114746, "lr": 2.5701156406896725e-05, "gnorm": 0.016983667388558388, "bridge_gate_avg": 1.0099948644638062, "bridge_out_proj_avg": 0.0020692924153991044, "elapsed_s": 497.960458278656}
|
| 65 |
+
{"step": 1600, "loss": 1.326457142829895, "loss_mdlm": 1.326457142829895, "lr": 2.5e-05, "gnorm": 0.01637180894613266, "bridge_gate_avg": 1.0099992752075195, "bridge_out_proj_avg": 0.0020694831910077482, "elapsed_s": 506.068790435791}
|
| 66 |
+
{"step": 1625, "loss": 1.3278065919876099, "loss_mdlm": 1.3278065919876099, "lr": 2.429884359310328e-05, "gnorm": 0.015872539952397346, "bridge_gate_avg": 1.010011374950409, "bridge_out_proj_avg": 0.002072284201858565, "elapsed_s": 514.1484615802765}
|
| 67 |
+
{"step": 1650, "loss": 1.3066328763961792, "loss_mdlm": 1.3066328763961792, "lr": 2.3598238819070202e-05, "gnorm": 0.25800445675849915, "bridge_gate_avg": 1.0100992023944855, "bridge_out_proj_avg": 0.0020824930106755346, "elapsed_s": 522.3392903804779}
|
| 68 |
+
{"step": 1675, "loss": 1.329480767250061, "loss_mdlm": 1.329480767250061, "lr": 2.2898736876768815e-05, "gnorm": 0.07596909999847412, "bridge_gate_avg": 1.0100489258766174, "bridge_out_proj_avg": 0.0020788193214684725, "elapsed_s": 530.4772458076477}
|
| 69 |
+
{"step": 1700, "loss": 1.349837064743042, "loss_mdlm": 1.349837064743042, "lr": 2.2200888097417307e-05, "gnorm": 0.05957718938589096, "bridge_gate_avg": 1.0100629925727844, "bridge_out_proj_avg": 0.002079652389511466, "elapsed_s": 538.5018200874329}
|
| 70 |
+
{"step": 1725, "loss": 1.2975938320159912, "loss_mdlm": 1.2975938320159912, "lr": 2.1505241511612522e-05, "gnorm": 0.11773993819952011, "bridge_gate_avg": 1.0101116299629211, "bridge_out_proj_avg": 0.002083083469187841, "elapsed_s": 546.7128105163574}
|
| 71 |
+
{"step": 1750, "loss": 1.1023306846618652, "loss_mdlm": 1.1023306846618652, "lr": 2.0812344417381595e-05, "gnorm": 0.4406864643096924, "bridge_gate_avg": 1.010195940732956, "bridge_out_proj_avg": 0.00208892987575382, "elapsed_s": 554.8460667133331}
|
| 72 |
+
{"step": 1775, "loss": 1.3344700336456299, "loss_mdlm": 1.3344700336456299, "lr": 2.0122741949596797e-05, "gnorm": 0.06465676426887512, "bridge_gate_avg": 1.0102695226669312, "bridge_out_proj_avg": 0.0020951541664544493, "elapsed_s": 562.930554151535}
|
| 73 |
+
{"step": 1800, "loss": 1.327720046043396, "loss_mdlm": 1.327720046043396, "lr": 1.9436976651092144e-05, "gnorm": 0.038605522364377975, "bridge_gate_avg": 1.010201781988144, "bridge_out_proj_avg": 0.002090600348310545, "elapsed_s": 571.0186171531677}
|
| 74 |
+
{"step": 1825, "loss": 0.902443528175354, "loss_mdlm": 0.902443528175354, "lr": 1.8755588045819327e-05, "gnorm": 1.9574147462844849, "bridge_gate_avg": 1.0102148950099945, "bridge_out_proj_avg": 0.0020932540064677596, "elapsed_s": 579.1443431377411}
|
| 75 |
+
{"step": 1850, "loss": 1.3544490337371826, "loss_mdlm": 1.3544490337371826, "lr": 1.8079112214378768e-05, "gnorm": 0.18534229695796967, "bridge_gate_avg": 1.010213553905487, "bridge_out_proj_avg": 0.0020943334675394, "elapsed_s": 587.0288875102997}
|
| 76 |
+
{"step": 1875, "loss": 1.3636527061462402, "loss_mdlm": 1.3636527061462402, "lr": 1.7408081372259632e-05, "gnorm": 0.03621051087975502, "bridge_gate_avg": 1.0101966261863708, "bridge_out_proj_avg": 0.0020942514238413423, "elapsed_s": 594.9513039588928}
|
| 77 |
+
{"step": 1900, "loss": 1.3584319353103638, "loss_mdlm": 1.3584319353103638, "lr": 1.6743023451120832e-05, "gnorm": 0.07792751491069794, "bridge_gate_avg": 1.0102025866508484, "bridge_out_proj_avg": 0.0020943579729646444, "elapsed_s": 603.0894687175751}
|
| 78 |
+
{"step": 1925, "loss": 1.2792766094207764, "loss_mdlm": 1.2792766094207764, "lr": 1.6084461683442176e-05, "gnorm": 0.08034906536340714, "bridge_gate_avg": 1.0103090703487396, "bridge_out_proj_avg": 0.0021027041075285524, "elapsed_s": 611.2157959938049}
|
| 79 |
+
{"step": 1950, "loss": 1.3554704189300537, "loss_mdlm": 1.3554704189300537, "lr": 1.5432914190872757e-05, "gnorm": 0.026858368888497353, "bridge_gate_avg": 1.0102849304676056, "bridge_out_proj_avg": 0.0020986587915103883, "elapsed_s": 619.2618095874786}
|
| 80 |
+
{"step": 1975, "loss": 1.3238497972488403, "loss_mdlm": 1.3238497972488403, "lr": 1.4788893576600099e-05, "gnorm": 0.01429731585085392, "bridge_gate_avg": 1.0103401839733124, "bridge_out_proj_avg": 0.002102796919643879, "elapsed_s": 627.3732001781464}
|
| 81 |
+
{"step": 2000, "loss": 1.3346387147903442, "loss_mdlm": 1.3346387147903442, "lr": 1.4152906522061048e-05, "gnorm": 0.07194066047668457, "bridge_gate_avg": 1.0103724598884583, "bridge_out_proj_avg": 0.0021062501473352313, "elapsed_s": 635.3605828285217}
|
| 82 |
+
{"step": 2025, "loss": 1.337130069732666, "loss_mdlm": 1.337130069732666, "lr": 1.3525453388311554e-05, "gnorm": 0.05284663289785385, "bridge_gate_avg": 1.0103863775730133, "bridge_out_proj_avg": 0.0021069409558549523, "elapsed_s": 643.2069916725159}
|
| 83 |
+
{"step": 2050, "loss": 1.3509821891784668, "loss_mdlm": 1.3509821891784668, "lr": 1.2907027822369005e-05, "gnorm": 0.030630996450781822, "bridge_gate_avg": 1.0104325115680695, "bridge_out_proj_avg": 0.0021096720884088427, "elapsed_s": 651.2446117401123}
|
| 84 |
+
{"step": 2075, "loss": 1.3870130777359009, "loss_mdlm": 1.3870130777359009, "lr": 1.229811636883677e-05, "gnorm": 0.06803259253501892, "bridge_gate_avg": 1.0104916095733643, "bridge_out_proj_avg": 0.0021129383821971714, "elapsed_s": 659.2324018478394}
|
| 85 |
+
{"step": 2100, "loss": 1.3248709440231323, "loss_mdlm": 1.3248709440231323, "lr": 1.1699198087116589e-05, "gnorm": 0.03785916790366173, "bridge_gate_avg": 1.0105216801166534, "bridge_out_proj_avg": 0.002114879811415449, "elapsed_s": 667.1869938373566}
|
| 86 |
+
{"step": 2125, "loss": 1.29989492893219, "loss_mdlm": 1.29989492893219, "lr": 1.1110744174509952e-05, "gnorm": 0.025585897266864777, "bridge_gate_avg": 1.0105522572994232, "bridge_out_proj_avg": 0.002117121039191261, "elapsed_s": 675.2327582836151}
|
| 87 |
+
{"step": 2150, "loss": 1.441186785697937, "loss_mdlm": 1.441186785697937, "lr": 1.0533217595504858e-05, "gnorm": 0.14951063692569733, "bridge_gate_avg": 1.010562539100647, "bridge_out_proj_avg": 0.0021163585188332945, "elapsed_s": 683.2512891292572}
|
| 88 |
+
{"step": 2175, "loss": 1.332984209060669, "loss_mdlm": 1.332984209060669, "lr": 9.967072717539851e-06, "gnorm": 0.017735786736011505, "bridge_gate_avg": 1.0106114447116852, "bridge_out_proj_avg": 0.0021192034473642707, "elapsed_s": 691.2551686763763}
|
| 89 |
+
{"step": 2200, "loss": 1.3080377578735352, "loss_mdlm": 1.3080377578735352, "lr": 9.412754953531663e-06, "gnorm": 0.058305900543928146, "bridge_gate_avg": 1.010627269744873, "bridge_out_proj_avg": 0.0021204393124207854, "elapsed_s": 699.210141658783}
|
| 90 |
+
{"step": 2225, "loss": 1.2282413244247437, "loss_mdlm": 1.2282413244247437, "lr": 8.870700411447816e-06, "gnorm": 0.07571716606616974, "bridge_gate_avg": 1.0106176137924194, "bridge_out_proj_avg": 0.0021199712064117193, "elapsed_s": 707.2566974163055}
|
| 91 |
+
{"step": 2250, "loss": 0.8604096174240112, "loss_mdlm": 0.8604096174240112, "lr": 8.341335551199902e-06, "gnorm": 0.0668865218758583, "bridge_gate_avg": 1.0106317698955536, "bridge_out_proj_avg": 0.002120988501701504, "elapsed_s": 715.3690276145935}
|
| 92 |
+
{"step": 2275, "loss": 0.8858523964881897, "loss_mdlm": 0.8858523964881897, "lr": 7.825076849127458e-06, "gnorm": 0.025727413594722748, "bridge_gate_avg": 1.0106689631938934, "bridge_out_proj_avg": 0.0021239084599073976, "elapsed_s": 723.4580681324005}
|
| 93 |
+
{"step": 2300, "loss": 1.3212240934371948, "loss_mdlm": 1.3212240934371948, "lr": 7.3223304703363135e-06, "gnorm": 0.07376276701688766, "bridge_gate_avg": 1.0106940269470215, "bridge_out_proj_avg": 0.002126436564140022, "elapsed_s": 731.309942483902}
|
| 94 |
+
{"step": 2325, "loss": 1.4321484565734863, "loss_mdlm": 1.4321484565734863, "lr": 6.833491949149329e-06, "gnorm": 0.016173668205738068, "bridge_gate_avg": 1.0106967687606812, "bridge_out_proj_avg": 0.0021263293165247887, "elapsed_s": 739.3316965103149}
|
| 95 |
+
{"step": 2350, "loss": 1.3357278108596802, "loss_mdlm": 1.3357278108596802, "lr": 6.358945877920861e-06, "gnorm": 0.017323924228549004, "bridge_gate_avg": 1.0107187926769257, "bridge_out_proj_avg": 0.0021279707143548876, "elapsed_s": 747.4497122764587}
|
| 96 |
+
{"step": 2375, "loss": 1.225994348526001, "loss_mdlm": 1.225994348526001, "lr": 5.899065604459814e-06, "gnorm": 0.07549522072076797, "bridge_gate_avg": 1.0107397139072418, "bridge_out_proj_avg": 0.002129100204911083, "elapsed_s": 755.2791640758514}
|
| 97 |
+
{"step": 2400, "loss": 1.326917052268982, "loss_mdlm": 1.326917052268982, "lr": 5.454212938299255e-06, "gnorm": 0.01910386234521866, "bridge_gate_avg": 1.010728120803833, "bridge_out_proj_avg": 0.002127277635736391, "elapsed_s": 763.3910536766052}
|
| 98 |
+
{"step": 2425, "loss": 1.2741496562957764, "loss_mdlm": 1.2741496562957764, "lr": 5.02473786604378e-06, "gnorm": 0.1259249746799469, "bridge_gate_avg": 1.0107367932796478, "bridge_out_proj_avg": 0.0021282356756273657, "elapsed_s": 771.2241652011871}
|
| 99 |
+
{"step": 2450, "loss": 1.2159346342086792, "loss_mdlm": 1.2159346342086792, "lr": 4.610978276018496e-06, "gnorm": 0.04658730328083038, "bridge_gate_avg": 1.0107408463954926, "bridge_out_proj_avg": 0.0021283017413225025, "elapsed_s": 779.2933785915375}
|
| 100 |
+
{"step": 2475, "loss": 1.3654148578643799, "loss_mdlm": 1.3654148578643799, "lr": 4.213259692436367e-06, "gnorm": 0.016080966219305992, "bridge_gate_avg": 1.0107374489307404, "bridge_out_proj_avg": 0.0021283139649312943, "elapsed_s": 787.2330269813538}
|
| 101 |
+
{"step": 2500, "loss": 1.2795283794403076, "loss_mdlm": 1.2795283794403076, "lr": 3.831895019292897e-06, "gnorm": 0.05175573006272316, "bridge_gate_avg": 1.0107498168945312, "bridge_out_proj_avg": 0.002129097905708477, "elapsed_s": 795.3189837932587}
|
| 102 |
+
{"step": 2525, "loss": 1.371551275253296, "loss_mdlm": 1.371551275253296, "lr": 3.4671842941897765e-06, "gnorm": 0.01554819941520691, "bridge_gate_avg": 1.0107637643814087, "bridge_out_proj_avg": 0.0021301302185747772, "elapsed_s": 803.295378446579}
|
| 103 |
+
{"step": 2550, "loss": 1.31600821018219, "loss_mdlm": 1.31600821018219, "lr": 3.119414452281158e-06, "gnorm": 0.045637790113687515, "bridge_gate_avg": 1.0107727646827698, "bridge_out_proj_avg": 0.0021308715804480016, "elapsed_s": 811.0230979919434}
|
| 104 |
+
{"step": 2575, "loss": 1.3252209424972534, "loss_mdlm": 1.3252209424972534, "lr": 2.788859100528196e-06, "gnorm": 0.0407429039478302, "bridge_gate_avg": 1.0107791125774384, "bridge_out_proj_avg": 0.0021314163459464908, "elapsed_s": 819.2131276130676}
|
| 105 |
+
{"step": 2600, "loss": 1.3174384832382202, "loss_mdlm": 1.3174384832382202, "lr": 2.475778302439524e-06, "gnorm": 0.07532685995101929, "bridge_gate_avg": 1.0107779502868652, "bridge_out_proj_avg": 0.0021318349172361195, "elapsed_s": 827.2400255203247}
|
| 106 |
+
{"step": 2625, "loss": 1.3342723846435547, "loss_mdlm": 1.3342723846435547, "lr": 2.1804183734670277e-06, "gnorm": 0.021251453086733818, "bridge_gate_avg": 1.0107789635658264, "bridge_out_proj_avg": 0.0021317755163181573, "elapsed_s": 835.2849724292755}
|
| 107 |
+
{"step": 2650, "loss": 1.3365886211395264, "loss_mdlm": 1.3365886211395264, "lr": 1.9030116872178316e-06, "gnorm": 0.013639782555401325, "bridge_gate_avg": 1.0107766091823578, "bridge_out_proj_avg": 0.00213126782909967, "elapsed_s": 843.4481301307678}
|
| 108 |
+
{"step": 2675, "loss": 1.3208911418914795, "loss_mdlm": 1.3208911418914795, "lr": 1.6437764926350074e-06, "gnorm": 0.01850554347038269, "bridge_gate_avg": 1.010784775018692, "bridge_out_proj_avg": 0.002131944114807993, "elapsed_s": 851.4900822639465}
|
| 109 |
+
{"step": 2700, "loss": 1.3139535188674927, "loss_mdlm": 1.3139535188674927, "lr": 1.4029167422908107e-06, "gnorm": 0.01885322481393814, "bridge_gate_avg": 1.0107892155647278, "bridge_out_proj_avg": 0.0021321183594409376, "elapsed_s": 859.5920865535736}
|
| 110 |
+
{"step": 2725, "loss": 1.3879399299621582, "loss_mdlm": 1.3879399299621582, "lr": 1.180621931927592e-06, "gnorm": 0.02534790150821209, "bridge_gate_avg": 1.0107958912849426, "bridge_out_proj_avg": 0.002132739406079054, "elapsed_s": 867.441675901413}
|
| 111 |
+
{"step": 2750, "loss": 1.2860486507415771, "loss_mdlm": 1.2860486507415771, "lr": 9.770669513725128e-07, "gnorm": 0.09443916380405426, "bridge_gate_avg": 1.010798305273056, "bridge_out_proj_avg": 0.00213291501859203, "elapsed_s": 875.560797214508}
|
| 112 |
+
{"step": 2775, "loss": 1.3404808044433594, "loss_mdlm": 1.3404808044433594, "lr": 7.924119469434665e-07, "gnorm": 0.030679529532790184, "bridge_gate_avg": 1.0108004212379456, "bridge_out_proj_avg": 0.0021329571900423616, "elapsed_s": 883.6591744422913}
|
| 113 |
+
{"step": 2800, "loss": 1.4234193563461304, "loss_mdlm": 1.4234193563461304, "lr": 6.268021954544096e-07, "gnorm": 0.01839747093617916, "bridge_gate_avg": 1.010800451040268, "bridge_out_proj_avg": 0.0021327402791939676, "elapsed_s": 891.6821756362915}
|
| 114 |
+
{"step": 2825, "loss": 1.32918119430542, "loss_mdlm": 1.32918119430542, "lr": 4.803679899192392e-07, "gnorm": 0.029389990493655205, "bridge_gate_avg": 1.0108002722263336, "bridge_out_proj_avg": 0.0021326496498659253, "elapsed_s": 899.7663440704346}
|
| 115 |
+
{"step": 2850, "loss": 1.331886887550354, "loss_mdlm": 1.331886887550354, "lr": 3.5322453704410286e-07, "gnorm": 0.04778939113020897, "bridge_gate_avg": 1.0108002722263336, "bridge_out_proj_avg": 0.002132560854079202, "elapsed_s": 907.7715954780579}
|
| 116 |
+
{"step": 2875, "loss": 1.2257847785949707, "loss_mdlm": 1.2257847785949707, "lr": 2.454718665888589e-07, "gnorm": 0.051534730941057205, "bridge_gate_avg": 1.0108010470867157, "bridge_out_proj_avg": 0.0021325491252355278, "elapsed_s": 915.8986015319824}
|
| 117 |
+
{"step": 2900, "loss": 1.2944540977478027, "loss_mdlm": 1.2944540977478027, "lr": 1.571947526689349e-07, "gnorm": 0.011131064966320992, "bridge_gate_avg": 1.010802149772644, "bridge_out_proj_avg": 0.002132552326656878, "elapsed_s": 923.9163181781769}
|
| 118 |
+
{"step": 2925, "loss": 1.3044579029083252, "loss_mdlm": 1.3044579029083252, "lr": 8.846264705952289e-08, "gnorm": 0.02540924772620201, "bridge_gate_avg": 1.010802835226059, "bridge_out_proj_avg": 0.002132575260475278, "elapsed_s": 932.0137641429901}
|
| 119 |
+
{"step": 2950, "loss": 0.8945547342300415, "loss_mdlm": 0.8945547342300415, "lr": 3.9329624554584884e-08, "gnorm": 0.09662426263093948, "bridge_gate_avg": 1.010802835226059, "bridge_out_proj_avg": 0.0021325999114196748, "elapsed_s": 940.1131148338318}
|
| 120 |
+
{"step": 2975, "loss": 1.3259764909744263, "loss_mdlm": 1.3259764909744263, "lr": 9.834340423678368e-09, "gnorm": 0.008343135938048363, "bridge_gate_avg": 1.010802835226059, "bridge_out_proj_avg": 0.002132600551703945, "elapsed_s": 948.2726039886475}
|