explcre commited on
Commit
ad5b3ad
·
verified ·
1 Parent(s): 46d1ad7

Upload exp_t3_ntv3_650m_500bp_FIXED_HASH_20260506_074840/log.jsonl with huggingface_hub

Browse files
exp_t3_ntv3_650m_500bp_FIXED_HASH_20260506_074840/log.jsonl CHANGED
@@ -10,3 +10,199 @@
10
  {"step": 1000, "loss": 2.2444167137145996, "gnorm": 900.26416015625, "n_masked": 68, "elapsed_s": 353.32481050491333}
11
  {"step": 1100, "loss": 1.4160035848617554, "gnorm": 69.71623992919922, "n_masked": 639, "elapsed_s": 388.2918951511383}
12
  {"step": 1200, "loss": 2.3762495517730713, "gnorm": 675.8726806640625, "n_masked": 267, "elapsed_s": 423.330069065094}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  {"step": 1000, "loss": 2.2444167137145996, "gnorm": 900.26416015625, "n_masked": 68, "elapsed_s": 353.32481050491333}
11
  {"step": 1100, "loss": 1.4160035848617554, "gnorm": 69.71623992919922, "n_masked": 639, "elapsed_s": 388.2918951511383}
12
  {"step": 1200, "loss": 2.3762495517730713, "gnorm": 675.8726806640625, "n_masked": 267, "elapsed_s": 423.330069065094}
13
+ {"step": 1300, "loss": 1.4506486654281616, "gnorm": 78.70797729492188, "n_masked": 483, "elapsed_s": 458.38294768333435}
14
+ {"step": 1400, "loss": 1.5745760202407837, "gnorm": 179.05052185058594, "n_masked": 742, "elapsed_s": 493.5207726955414}
15
+ {"step": 1500, "loss": 1.6388559341430664, "gnorm": 80.24949645996094, "n_masked": 428, "elapsed_s": 533.5296359062195}
16
+ {"step": 1600, "loss": 1.4467231035232544, "gnorm": 208.5074005126953, "n_masked": 661, "elapsed_s": 568.5280933380127}
17
+ {"step": 1700, "loss": 1.3736953735351562, "gnorm": 22.254127502441406, "n_masked": 749, "elapsed_s": 609.2386786937714}
18
+ {"step": 1800, "loss": 1.36086106300354, "gnorm": 34.62132263183594, "n_masked": 256, "elapsed_s": 644.3455059528351}
19
+ {"step": 1900, "loss": 1.3950624465942383, "gnorm": 59.09011459350586, "n_masked": 341, "elapsed_s": 687.5047490596771}
20
+ {"step": 2000, "loss": 1.4191551208496094, "gnorm": 51.81608200073242, "n_masked": 832, "elapsed_s": 722.6527764797211}
21
+ {"step": 2100, "loss": 1.3698285818099976, "gnorm": 40.21640396118164, "n_masked": 318, "elapsed_s": 763.987238407135}
22
+ {"step": 2200, "loss": 1.4455608129501343, "gnorm": 35.702667236328125, "n_masked": 558, "elapsed_s": 799.1353981494904}
23
+ {"step": 2300, "loss": 1.3775945901870728, "gnorm": 49.560386657714844, "n_masked": 447, "elapsed_s": 838.7775135040283}
24
+ {"step": 2400, "loss": 1.3389371633529663, "gnorm": 56.92543411254883, "n_masked": 455, "elapsed_s": 873.9020960330963}
25
+ {"step": 2500, "loss": 1.4308192729949951, "gnorm": 16.389575958251953, "n_masked": 912, "elapsed_s": 1436.6621708869934}
26
+ {"step": 2500, "val_loss": 1.3740407729636834, "val_n_masked": 124933}
27
+ {"step": 2600, "loss": 1.3899890184402466, "gnorm": 6.82764196395874, "n_masked": 637, "elapsed_s": 4481.351215839386}
28
+ {"step": 2700, "loss": 1.3974850177764893, "gnorm": 6.7294721603393555, "n_masked": 476, "elapsed_s": 4516.457495927811}
29
+ {"step": 2800, "loss": 1.4410240650177002, "gnorm": 43.96825408935547, "n_masked": 547, "elapsed_s": 4551.330363750458}
30
+ {"step": 2900, "loss": 1.492596983909607, "gnorm": 72.34571075439453, "n_masked": 630, "elapsed_s": 4586.297803640366}
31
+ {"step": 3000, "loss": 1.3665837049484253, "gnorm": 118.17631530761719, "n_masked": 318, "elapsed_s": 4621.244218111038}
32
+ {"step": 3100, "loss": 1.3473235368728638, "gnorm": 25.174943923950195, "n_masked": 542, "elapsed_s": 4656.189078092575}
33
+ {"step": 3200, "loss": 1.408298373222351, "gnorm": 73.048095703125, "n_masked": 519, "elapsed_s": 4691.177469730377}
34
+ {"step": 3300, "loss": 1.3558359146118164, "gnorm": 69.914306640625, "n_masked": 597, "elapsed_s": 4726.021328926086}
35
+ {"step": 3400, "loss": 1.386448860168457, "gnorm": 25.44927215576172, "n_masked": 828, "elapsed_s": 4761.017758369446}
36
+ {"step": 3500, "loss": 1.4499478340148926, "gnorm": 69.83002471923828, "n_masked": 544, "elapsed_s": 4795.981695890427}
37
+ {"step": 3600, "loss": 1.3208485841751099, "gnorm": 41.443485260009766, "n_masked": 244, "elapsed_s": 4830.964000225067}
38
+ {"step": 3700, "loss": 1.3665913343429565, "gnorm": 15.523662567138672, "n_masked": 472, "elapsed_s": 4865.860482931137}
39
+ {"step": 3800, "loss": 1.3523682355880737, "gnorm": 31.80842399597168, "n_masked": 487, "elapsed_s": 4900.837449789047}
40
+ {"step": 3900, "loss": 1.369313359260559, "gnorm": 28.844648361206055, "n_masked": 510, "elapsed_s": 4935.843586683273}
41
+ {"step": 4000, "loss": 1.4081019163131714, "gnorm": 79.35284423828125, "n_masked": 450, "elapsed_s": 4970.894025087357}
42
+ {"step": 4100, "loss": 1.3439009189605713, "gnorm": 8.602263450622559, "n_masked": 253, "elapsed_s": 5005.798364639282}
43
+ {"step": 4200, "loss": 1.3389569520950317, "gnorm": 48.269100189208984, "n_masked": 152, "elapsed_s": 5040.754721403122}
44
+ {"step": 4300, "loss": 1.3525687456130981, "gnorm": 19.837886810302734, "n_masked": 530, "elapsed_s": 5075.739853858948}
45
+ {"step": 4400, "loss": 1.3027573823928833, "gnorm": 33.582427978515625, "n_masked": 310, "elapsed_s": 5110.74070930481}
46
+ {"step": 4500, "loss": 1.356981873512268, "gnorm": 6.407493591308594, "n_masked": 805, "elapsed_s": 5145.5921022892}
47
+ {"step": 4600, "loss": 1.3320189714431763, "gnorm": 8.370941162109375, "n_masked": 484, "elapsed_s": 5180.586127758026}
48
+ {"step": 4700, "loss": 1.3025579452514648, "gnorm": 6.2807536125183105, "n_masked": 245, "elapsed_s": 5215.56786441803}
49
+ {"step": 4800, "loss": 1.3608981370925903, "gnorm": 1.30360746383667, "n_masked": 716, "elapsed_s": 5250.600974321365}
50
+ {"step": 4900, "loss": 1.3767285346984863, "gnorm": 13.428191184997559, "n_masked": 294, "elapsed_s": 5285.550397872925}
51
+ {"step": 5000, "loss": 1.3199602365493774, "gnorm": 100.54936218261719, "n_masked": 237, "elapsed_s": 5320.59251832962}
52
+ {"step": 5000, "val_loss": 1.342064801384421, "val_n_masked": 125120}
53
+ {"step": 5100, "loss": 1.3597325086593628, "gnorm": 5.7053608894348145, "n_masked": 419, "elapsed_s": 7379.943660259247}
54
+ {"step": 5200, "loss": 1.3866119384765625, "gnorm": 12.63646125793457, "n_masked": 541, "elapsed_s": 7415.143207550049}
55
+ {"step": 5300, "loss": 1.3350417613983154, "gnorm": 38.037872314453125, "n_masked": 442, "elapsed_s": 7450.182459592819}
56
+ {"step": 5400, "loss": 1.392158031463623, "gnorm": 104.28556060791016, "n_masked": 179, "elapsed_s": 7485.0931804180145}
57
+ {"step": 5500, "loss": 1.3896971940994263, "gnorm": 8.202664375305176, "n_masked": 448, "elapsed_s": 7520.148466348648}
58
+ {"step": 5600, "loss": 1.2966992855072021, "gnorm": 2.196049213409424, "n_masked": 373, "elapsed_s": 7555.172767162323}
59
+ {"step": 5700, "loss": 1.340815544128418, "gnorm": 37.26991653442383, "n_masked": 395, "elapsed_s": 7590.183227300644}
60
+ {"step": 5800, "loss": 1.3740426301956177, "gnorm": 17.943693161010742, "n_masked": 693, "elapsed_s": 7625.100323438644}
61
+ {"step": 5900, "loss": 1.3825974464416504, "gnorm": 20.106971740722656, "n_masked": 353, "elapsed_s": 7660.096639633179}
62
+ {"step": 6000, "loss": 1.3233375549316406, "gnorm": 8.408491134643555, "n_masked": 341, "elapsed_s": 7695.062134504318}
63
+ {"step": 6100, "loss": 1.3308767080307007, "gnorm": 21.0144100189209, "n_masked": 168, "elapsed_s": 7730.057364702225}
64
+ {"step": 6200, "loss": 1.3268661499023438, "gnorm": 12.241264343261719, "n_masked": 540, "elapsed_s": 7765.141093492508}
65
+ {"step": 6300, "loss": 1.3503390550613403, "gnorm": 5.967979907989502, "n_masked": 672, "elapsed_s": 7799.989523649216}
66
+ {"step": 6400, "loss": 1.4056780338287354, "gnorm": 6.847990036010742, "n_masked": 143, "elapsed_s": 7835.013739109039}
67
+ {"step": 6500, "loss": 1.3490352630615234, "gnorm": 6.862911701202393, "n_masked": 728, "elapsed_s": 7870.067131280899}
68
+ {"step": 6600, "loss": 1.374297857284546, "gnorm": 7.815378189086914, "n_masked": 653, "elapsed_s": 7905.1020748615265}
69
+ {"step": 6700, "loss": 1.2737740278244019, "gnorm": 16.522258758544922, "n_masked": 107, "elapsed_s": 7939.999103069305}
70
+ {"step": 6800, "loss": 1.401519536972046, "gnorm": 78.16688537597656, "n_masked": 540, "elapsed_s": 7974.994282484055}
71
+ {"step": 6900, "loss": 1.330535650253296, "gnorm": 13.853470802307129, "n_masked": 381, "elapsed_s": 8009.946764469147}
72
+ {"step": 7000, "loss": 1.3486565351486206, "gnorm": 11.617114067077637, "n_masked": 406, "elapsed_s": 8044.912518501282}
73
+ {"step": 7100, "loss": 1.285589337348938, "gnorm": 16.424327850341797, "n_masked": 179, "elapsed_s": 8079.750417470932}
74
+ {"step": 7200, "loss": 1.2742160558700562, "gnorm": 31.698278427124023, "n_masked": 458, "elapsed_s": 8114.71746301651}
75
+ {"step": 7300, "loss": 1.2407269477844238, "gnorm": 11.096803665161133, "n_masked": 123, "elapsed_s": 8149.745268344879}
76
+ {"step": 7400, "loss": 1.3064358234405518, "gnorm": 46.603904724121094, "n_masked": 166, "elapsed_s": 8184.740423679352}
77
+ {"step": 7500, "loss": 1.2861576080322266, "gnorm": 2.987504720687866, "n_masked": 402, "elapsed_s": 8219.64723277092}
78
+ {"step": 7500, "val_loss": 1.3461982468055471, "val_n_masked": 124871}
79
+ {"step": 7600, "loss": 1.3431271314620972, "gnorm": 7.271148204803467, "n_masked": 698, "elapsed_s": 9333.68992805481}
80
+ {"step": 7700, "loss": 1.3511724472045898, "gnorm": 13.655619621276855, "n_masked": 343, "elapsed_s": 9368.758021831512}
81
+ {"step": 7800, "loss": 1.329745888710022, "gnorm": 10.057658195495605, "n_masked": 362, "elapsed_s": 9403.801835536957}
82
+ {"step": 7900, "loss": 1.30551016330719, "gnorm": 5.101137161254883, "n_masked": 255, "elapsed_s": 9438.713890314102}
83
+ {"step": 8000, "loss": 1.331566572189331, "gnorm": 11.613862037658691, "n_masked": 401, "elapsed_s": 9473.744375228882}
84
+ {"step": 8100, "loss": 1.326912760734558, "gnorm": 8.984637260437012, "n_masked": 559, "elapsed_s": 9508.765765190125}
85
+ {"step": 8200, "loss": 1.3339165449142456, "gnorm": 57.99892807006836, "n_masked": 81, "elapsed_s": 9543.799288511276}
86
+ {"step": 8300, "loss": 1.3456419706344604, "gnorm": 29.1489315032959, "n_masked": 493, "elapsed_s": 9578.700244426727}
87
+ {"step": 8400, "loss": 1.3423817157745361, "gnorm": 7.6728129386901855, "n_masked": 447, "elapsed_s": 9613.639131069183}
88
+ {"step": 8500, "loss": 1.2403051853179932, "gnorm": 3.4364893436431885, "n_masked": 505, "elapsed_s": 9648.573870182037}
89
+ {"step": 8600, "loss": 1.3978406190872192, "gnorm": 8.837202072143555, "n_masked": 453, "elapsed_s": 9683.557032585144}
90
+ {"step": 8700, "loss": 1.3425185680389404, "gnorm": 2.011258363723755, "n_masked": 426, "elapsed_s": 9718.36557173729}
91
+ {"step": 8800, "loss": 1.3208811283111572, "gnorm": 2.5779662132263184, "n_masked": 172, "elapsed_s": 9753.322005271912}
92
+ {"step": 8900, "loss": 1.3700566291809082, "gnorm": 4.516443252563477, "n_masked": 744, "elapsed_s": 9788.364897489548}
93
+ {"step": 9000, "loss": 1.3835967779159546, "gnorm": 8.976652145385742, "n_masked": 795, "elapsed_s": 9823.404417037964}
94
+ {"step": 9100, "loss": 1.431418538093567, "gnorm": 250.06468200683594, "n_masked": 752, "elapsed_s": 9858.40528678894}
95
+ {"step": 9200, "loss": 1.3595422506332397, "gnorm": 11.309383392333984, "n_masked": 730, "elapsed_s": 9893.296775579453}
96
+ {"step": 9300, "loss": 1.3622310161590576, "gnorm": 52.861427307128906, "n_masked": 383, "elapsed_s": 9928.340805053711}
97
+ {"step": 9400, "loss": 1.3269696235656738, "gnorm": 6.988961219787598, "n_masked": 594, "elapsed_s": 9963.350822210312}
98
+ {"step": 9500, "loss": 1.3579943180084229, "gnorm": 7.35931921005249, "n_masked": 592, "elapsed_s": 9998.371896982193}
99
+ {"step": 9600, "loss": 1.3575937747955322, "gnorm": 6.532353401184082, "n_masked": 589, "elapsed_s": 10033.260726928711}
100
+ {"step": 9700, "loss": 1.29998779296875, "gnorm": 7.136166095733643, "n_masked": 238, "elapsed_s": 10068.277053833008}
101
+ {"step": 9800, "loss": 1.3211146593093872, "gnorm": 6.5379180908203125, "n_masked": 411, "elapsed_s": 10103.264166355133}
102
+ {"step": 9900, "loss": 1.3348463773727417, "gnorm": 5.630400657653809, "n_masked": 468, "elapsed_s": 10138.287166833878}
103
+ {"step": 10000, "loss": 1.3313758373260498, "gnorm": 2.338728427886963, "n_masked": 337, "elapsed_s": 10173.16588973999}
104
+ {"step": 10000, "val_loss": 1.332536332357815, "val_n_masked": 125023}
105
+ {"step": 10100, "loss": 1.3177400827407837, "gnorm": 2.9404239654541016, "n_masked": 432, "elapsed_s": 12335.735327482224}
106
+ {"step": 10200, "loss": 1.3503239154815674, "gnorm": 7.821503639221191, "n_masked": 535, "elapsed_s": 12371.01549744606}
107
+ {"step": 10300, "loss": 1.3312655687332153, "gnorm": 5.129350662231445, "n_masked": 397, "elapsed_s": 12406.10012960434}
108
+ {"step": 10400, "loss": 1.3982607126235962, "gnorm": 10.374671936035156, "n_masked": 555, "elapsed_s": 12441.17598772049}
109
+ {"step": 10500, "loss": 1.36489999294281, "gnorm": 4.410340785980225, "n_masked": 630, "elapsed_s": 12476.089765787125}
110
+ {"step": 10600, "loss": 1.3197097778320312, "gnorm": 3.000126600265503, "n_masked": 75, "elapsed_s": 12511.099541664124}
111
+ {"step": 10700, "loss": 1.3693287372589111, "gnorm": 17.787830352783203, "n_masked": 350, "elapsed_s": 12546.134250879288}
112
+ {"step": 10800, "loss": 1.357347846031189, "gnorm": 5.764089107513428, "n_masked": 276, "elapsed_s": 12581.152743339539}
113
+ {"step": 10900, "loss": 1.2871124744415283, "gnorm": 14.276144027709961, "n_masked": 309, "elapsed_s": 12616.055626392365}
114
+ {"step": 11000, "loss": 1.335727334022522, "gnorm": 8.464067459106445, "n_masked": 196, "elapsed_s": 12651.063263177872}
115
+ {"step": 11100, "loss": 1.3017510175704956, "gnorm": 4.793827056884766, "n_masked": 482, "elapsed_s": 12686.074303150177}
116
+ {"step": 11200, "loss": 1.3079869747161865, "gnorm": 3.222154378890991, "n_masked": 326, "elapsed_s": 12721.11509180069}
117
+ {"step": 11300, "loss": 1.373738169670105, "gnorm": 5.913382530212402, "n_masked": 208, "elapsed_s": 12756.020750522614}
118
+ {"step": 11400, "loss": 1.3512437343597412, "gnorm": 1.7271109819412231, "n_masked": 645, "elapsed_s": 12791.055712223053}
119
+ {"step": 11500, "loss": 1.3102363348007202, "gnorm": 1.7731279134750366, "n_masked": 304, "elapsed_s": 12826.13824057579}
120
+ {"step": 11600, "loss": 1.3387160301208496, "gnorm": 5.413264274597168, "n_masked": 191, "elapsed_s": 12861.165941238403}
121
+ {"step": 11700, "loss": 1.3563424348831177, "gnorm": 2.0337867736816406, "n_masked": 778, "elapsed_s": 12896.065358161926}
122
+ {"step": 11800, "loss": 1.3991780281066895, "gnorm": 2.4934990406036377, "n_masked": 815, "elapsed_s": 12931.109330654144}
123
+ {"step": 11900, "loss": 1.3652909994125366, "gnorm": 1.0340943336486816, "n_masked": 824, "elapsed_s": 12966.141960382462}
124
+ {"step": 12000, "loss": 1.3564742803573608, "gnorm": 1.8864952325820923, "n_masked": 705, "elapsed_s": 13001.176522731781}
125
+ {"step": 12100, "loss": 1.3394527435302734, "gnorm": 1.967041254043579, "n_masked": 522, "elapsed_s": 13036.215588569641}
126
+ {"step": 12200, "loss": 1.3094977140426636, "gnorm": 1.7282376289367676, "n_masked": 531, "elapsed_s": 13071.127252578735}
127
+ {"step": 12300, "loss": 1.3609200716018677, "gnorm": 12.448420524597168, "n_masked": 720, "elapsed_s": 13106.150354623795}
128
+ {"step": 12400, "loss": 1.6018884181976318, "gnorm": 147.49122619628906, "n_masked": 705, "elapsed_s": 13141.174812793732}
129
+ {"step": 12500, "loss": 1.3192853927612305, "gnorm": 2.343360424041748, "n_masked": 420, "elapsed_s": 13176.22732257843}
130
+ {"step": 12500, "val_loss": 1.32992932762124, "val_n_masked": 124920}
131
+ {"step": 12600, "loss": 1.5380362272262573, "gnorm": 1095.1002197265625, "n_masked": 175, "elapsed_s": 15438.358446836472}
132
+ {"step": 12700, "loss": 1.3690907955169678, "gnorm": 1.725947380065918, "n_masked": 517, "elapsed_s": 15473.319992303848}
133
+ {"step": 12800, "loss": 1.2240573167800903, "gnorm": 6.051928997039795, "n_masked": 182, "elapsed_s": 15508.598113536835}
134
+ {"step": 12900, "loss": 1.3646562099456787, "gnorm": 3.8905832767486572, "n_masked": 485, "elapsed_s": 15543.643609523773}
135
+ {"step": 13000, "loss": 1.3660603761672974, "gnorm": 3.0796468257904053, "n_masked": 714, "elapsed_s": 15578.640979528427}
136
+ {"step": 13100, "loss": 1.2969322204589844, "gnorm": 6.552462577819824, "n_masked": 457, "elapsed_s": 15613.48075222969}
137
+ {"step": 13200, "loss": 1.358729362487793, "gnorm": 4.849985122680664, "n_masked": 620, "elapsed_s": 15648.469774961472}
138
+ {"step": 13300, "loss": 1.3740211725234985, "gnorm": 2.144876003265381, "n_masked": 982, "elapsed_s": 15683.428329467773}
139
+ {"step": 13400, "loss": 1.362549066543579, "gnorm": 2.2153356075286865, "n_masked": 413, "elapsed_s": 15718.36815738678}
140
+ {"step": 13500, "loss": 1.3427318334579468, "gnorm": 9.615581512451172, "n_masked": 472, "elapsed_s": 15753.212835550308}
141
+ {"step": 13600, "loss": 1.4032562971115112, "gnorm": 12.760854721069336, "n_masked": 677, "elapsed_s": 15788.169652700424}
142
+ {"step": 13700, "loss": 1.3683148622512817, "gnorm": 87.66454315185547, "n_masked": 357, "elapsed_s": 15823.125300168991}
143
+ {"step": 13800, "loss": 1.3699092864990234, "gnorm": 3.163581609725952, "n_masked": 765, "elapsed_s": 15858.080843925476}
144
+ {"step": 13900, "loss": 1.3258605003356934, "gnorm": 3.572298765182495, "n_masked": 617, "elapsed_s": 15892.924238920212}
145
+ {"step": 14000, "loss": 1.2990835905075073, "gnorm": 10.604793548583984, "n_masked": 329, "elapsed_s": 15927.885230064392}
146
+ {"step": 14100, "loss": 1.3277738094329834, "gnorm": 1.6273305416107178, "n_masked": 369, "elapsed_s": 15962.84838628769}
147
+ {"step": 14200, "loss": 1.3075618743896484, "gnorm": 1.6577447652816772, "n_masked": 457, "elapsed_s": 15997.804626464844}
148
+ {"step": 14300, "loss": 1.3652448654174805, "gnorm": 2.276348114013672, "n_masked": 819, "elapsed_s": 16032.636194229126}
149
+ {"step": 14400, "loss": 1.3025102615356445, "gnorm": 2.380383014678955, "n_masked": 387, "elapsed_s": 16067.585780858994}
150
+ {"step": 14500, "loss": 1.3447939157485962, "gnorm": 2.073723316192627, "n_masked": 428, "elapsed_s": 16102.543053150177}
151
+ {"step": 14600, "loss": 1.3697638511657715, "gnorm": 3.7335214614868164, "n_masked": 289, "elapsed_s": 16137.514839887619}
152
+ {"step": 14700, "loss": 1.4207888841629028, "gnorm": 2.8976926803588867, "n_masked": 109, "elapsed_s": 16172.356001615524}
153
+ {"step": 14800, "loss": 1.366150975227356, "gnorm": 2.68571138381958, "n_masked": 612, "elapsed_s": 16207.32058763504}
154
+ {"step": 14900, "loss": 1.359851598739624, "gnorm": 4.295720100402832, "n_masked": 556, "elapsed_s": 16242.278193950653}
155
+ {"step": 15000, "loss": 1.2660468816757202, "gnorm": 2.164249897003174, "n_masked": 273, "elapsed_s": 16277.261706590652}
156
+ {"step": 15000, "val_loss": 1.3297042911864756, "val_n_masked": 125235}
157
+ {"step": 15100, "loss": 1.3658894300460815, "gnorm": 1.2942863702774048, "n_masked": 475, "elapsed_s": 18257.048031568527}
158
+ {"step": 15200, "loss": 1.346961259841919, "gnorm": 2.4683775901794434, "n_masked": 539, "elapsed_s": 18292.105792999268}
159
+ {"step": 15300, "loss": 1.3459392786026, "gnorm": 0.5136907696723938, "n_masked": 528, "elapsed_s": 18327.415499687195}
160
+ {"step": 15400, "loss": 1.3114134073257446, "gnorm": 2.946037769317627, "n_masked": 447, "elapsed_s": 18362.869434833527}
161
+ {"step": 15500, "loss": 1.362864375114441, "gnorm": 2.085458755493164, "n_masked": 507, "elapsed_s": 18398.03946518898}
162
+ {"step": 15600, "loss": 1.3582828044891357, "gnorm": 8.14642333984375, "n_masked": 380, "elapsed_s": 18433.22500681877}
163
+ {"step": 15700, "loss": 1.3282861709594727, "gnorm": 1.6088746786117554, "n_masked": 392, "elapsed_s": 18468.31206202507}
164
+ {"step": 15800, "loss": 1.3169779777526855, "gnorm": 1.5004087686538696, "n_masked": 522, "elapsed_s": 18503.486285448074}
165
+ {"step": 15900, "loss": 1.3376208543777466, "gnorm": 1.860012412071228, "n_masked": 632, "elapsed_s": 18538.65664792061}
166
+ {"step": 16000, "loss": 1.3482871055603027, "gnorm": 1.2395230531692505, "n_masked": 334, "elapsed_s": 18573.80719089508}
167
+ {"step": 16100, "loss": 1.3216701745986938, "gnorm": 0.8125881552696228, "n_masked": 499, "elapsed_s": 18608.857580900192}
168
+ {"step": 16200, "loss": 1.346019983291626, "gnorm": 1.1180202960968018, "n_masked": 653, "elapsed_s": 18643.997255802155}
169
+ {"step": 16300, "loss": 1.3721370697021484, "gnorm": 7.9087419509887695, "n_masked": 715, "elapsed_s": 18679.145486593246}
170
+ {"step": 16400, "loss": 1.3603994846343994, "gnorm": 4.179591655731201, "n_masked": 687, "elapsed_s": 18714.271958589554}
171
+ {"step": 16500, "loss": 1.3528060913085938, "gnorm": 3.7661502361297607, "n_masked": 449, "elapsed_s": 18749.302349328995}
172
+ {"step": 16600, "loss": 1.3509521484375, "gnorm": 1.4439191818237305, "n_masked": 415, "elapsed_s": 18784.462653398514}
173
+ {"step": 16700, "loss": 1.2697807550430298, "gnorm": 1.5983490943908691, "n_masked": 482, "elapsed_s": 18819.618886232376}
174
+ {"step": 16800, "loss": 1.314881443977356, "gnorm": 2.8197600841522217, "n_masked": 417, "elapsed_s": 18854.7766289711}
175
+ {"step": 16900, "loss": 1.3391406536102295, "gnorm": 5.236271381378174, "n_masked": 574, "elapsed_s": 18889.814019203186}
176
+ {"step": 17000, "loss": 1.280563235282898, "gnorm": 4.5697102546691895, "n_masked": 383, "elapsed_s": 18924.972031116486}
177
+ {"step": 17100, "loss": 1.3873536586761475, "gnorm": 2.4489362239837646, "n_masked": 692, "elapsed_s": 18960.119480848312}
178
+ {"step": 17200, "loss": 1.3650811910629272, "gnorm": 2.4295473098754883, "n_masked": 639, "elapsed_s": 18995.25434112549}
179
+ {"step": 17300, "loss": 1.3583918809890747, "gnorm": 1.5854743719100952, "n_masked": 537, "elapsed_s": 19030.712035417557}
180
+ {"step": 17400, "loss": 1.297775149345398, "gnorm": 6.549415111541748, "n_masked": 369, "elapsed_s": 19065.99603319168}
181
+ {"step": 17500, "loss": 1.326299786567688, "gnorm": 4.516073226928711, "n_masked": 397, "elapsed_s": 19101.255742788315}
182
+ {"step": 17500, "val_loss": 1.3280105523568657, "val_n_masked": 124790}
183
+ {"step": 17600, "loss": 1.30617094039917, "gnorm": 1.4493821859359741, "n_masked": 421, "elapsed_s": 21023.37254500389}
184
+ {"step": 17700, "loss": 1.3262983560562134, "gnorm": 6.19812536239624, "n_masked": 680, "elapsed_s": 21058.679227352142}
185
+ {"step": 17800, "loss": 1.3519046306610107, "gnorm": 4.958584785461426, "n_masked": 444, "elapsed_s": 21093.707690238953}
186
+ {"step": 17900, "loss": 1.3280932903289795, "gnorm": 1.642270803451538, "n_masked": 369, "elapsed_s": 21128.83488869667}
187
+ {"step": 18000, "loss": 1.4084324836730957, "gnorm": 120.59577941894531, "n_masked": 570, "elapsed_s": 21164.00173306465}
188
+ {"step": 18100, "loss": 1.369340181350708, "gnorm": 2.3884437084198, "n_masked": 859, "elapsed_s": 21199.145361185074}
189
+ {"step": 18200, "loss": 1.3390743732452393, "gnorm": 2.5385193824768066, "n_masked": 629, "elapsed_s": 21234.186383247375}
190
+ {"step": 18300, "loss": 1.3345766067504883, "gnorm": 2.9841699600219727, "n_masked": 283, "elapsed_s": 21269.362609386444}
191
+ {"step": 18400, "loss": 1.3781012296676636, "gnorm": 2.2650625705718994, "n_masked": 718, "elapsed_s": 21304.543813467026}
192
+ {"step": 18500, "loss": 1.331494688987732, "gnorm": 3.352966070175171, "n_masked": 435, "elapsed_s": 21339.73446726799}
193
+ {"step": 18600, "loss": 1.2875906229019165, "gnorm": 2.0948596000671387, "n_masked": 380, "elapsed_s": 21374.89786839485}
194
+ {"step": 18700, "loss": 1.3418928384780884, "gnorm": 1.8478041887283325, "n_masked": 703, "elapsed_s": 21409.961636543274}
195
+ {"step": 18800, "loss": 1.3254817724227905, "gnorm": 2.9068520069122314, "n_masked": 382, "elapsed_s": 21445.11950993538}
196
+ {"step": 18900, "loss": 1.3817123174667358, "gnorm": 2.537318468093872, "n_masked": 636, "elapsed_s": 21480.269674301147}
197
+ {"step": 19000, "loss": 1.331003189086914, "gnorm": 1.6087671518325806, "n_masked": 314, "elapsed_s": 21515.467938899994}
198
+ {"step": 19100, "loss": 1.3546355962753296, "gnorm": 2.0837230682373047, "n_masked": 543, "elapsed_s": 21550.477276802063}
199
+ {"step": 19200, "loss": 1.3670083284378052, "gnorm": 2.4186134338378906, "n_masked": 338, "elapsed_s": 21585.64392876625}
200
+ {"step": 19300, "loss": 1.2483420372009277, "gnorm": 2.970231533050537, "n_masked": 134, "elapsed_s": 21620.823533296585}
201
+ {"step": 19400, "loss": 1.3296822309494019, "gnorm": 1.62014901638031, "n_masked": 253, "elapsed_s": 21656.012600898743}
202
+ {"step": 19500, "loss": 1.2921829223632812, "gnorm": 0.9020812511444092, "n_masked": 394, "elapsed_s": 21691.086144208908}
203
+ {"step": 19600, "loss": 1.3469867706298828, "gnorm": 1.226125955581665, "n_masked": 438, "elapsed_s": 21726.28050637245}
204
+ {"step": 19700, "loss": 1.3383677005767822, "gnorm": 1.119696021080017, "n_masked": 623, "elapsed_s": 21761.50352883339}
205
+ {"step": 19800, "loss": 1.317857265472412, "gnorm": 1.6388494968414307, "n_masked": 431, "elapsed_s": 21796.7032995224}
206
+ {"step": 19900, "loss": 1.3395962715148926, "gnorm": 0.6142444610595703, "n_masked": 370, "elapsed_s": 21831.83272099495}
207
+ {"step": 20000, "loss": 1.3526158332824707, "gnorm": 0.9940682649612427, "n_masked": 852, "elapsed_s": 21867.00915122032}
208
+ {"step": 20000, "val_loss": 1.322814276283301, "val_n_masked": 125076}