Upload exp_t3_ntv3_650m_500bp_FIXED_HASH_20260506_074840/log.jsonl with huggingface_hub
Browse files
exp_t3_ntv3_650m_500bp_FIXED_HASH_20260506_074840/log.jsonl
CHANGED
|
@@ -10,3 +10,199 @@
|
|
| 10 |
{"step": 1000, "loss": 2.2444167137145996, "gnorm": 900.26416015625, "n_masked": 68, "elapsed_s": 353.32481050491333}
|
| 11 |
{"step": 1100, "loss": 1.4160035848617554, "gnorm": 69.71623992919922, "n_masked": 639, "elapsed_s": 388.2918951511383}
|
| 12 |
{"step": 1200, "loss": 2.3762495517730713, "gnorm": 675.8726806640625, "n_masked": 267, "elapsed_s": 423.330069065094}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
{"step": 1000, "loss": 2.2444167137145996, "gnorm": 900.26416015625, "n_masked": 68, "elapsed_s": 353.32481050491333}
|
| 11 |
{"step": 1100, "loss": 1.4160035848617554, "gnorm": 69.71623992919922, "n_masked": 639, "elapsed_s": 388.2918951511383}
|
| 12 |
{"step": 1200, "loss": 2.3762495517730713, "gnorm": 675.8726806640625, "n_masked": 267, "elapsed_s": 423.330069065094}
|
| 13 |
+
{"step": 1300, "loss": 1.4506486654281616, "gnorm": 78.70797729492188, "n_masked": 483, "elapsed_s": 458.38294768333435}
|
| 14 |
+
{"step": 1400, "loss": 1.5745760202407837, "gnorm": 179.05052185058594, "n_masked": 742, "elapsed_s": 493.5207726955414}
|
| 15 |
+
{"step": 1500, "loss": 1.6388559341430664, "gnorm": 80.24949645996094, "n_masked": 428, "elapsed_s": 533.5296359062195}
|
| 16 |
+
{"step": 1600, "loss": 1.4467231035232544, "gnorm": 208.5074005126953, "n_masked": 661, "elapsed_s": 568.5280933380127}
|
| 17 |
+
{"step": 1700, "loss": 1.3736953735351562, "gnorm": 22.254127502441406, "n_masked": 749, "elapsed_s": 609.2386786937714}
|
| 18 |
+
{"step": 1800, "loss": 1.36086106300354, "gnorm": 34.62132263183594, "n_masked": 256, "elapsed_s": 644.3455059528351}
|
| 19 |
+
{"step": 1900, "loss": 1.3950624465942383, "gnorm": 59.09011459350586, "n_masked": 341, "elapsed_s": 687.5047490596771}
|
| 20 |
+
{"step": 2000, "loss": 1.4191551208496094, "gnorm": 51.81608200073242, "n_masked": 832, "elapsed_s": 722.6527764797211}
|
| 21 |
+
{"step": 2100, "loss": 1.3698285818099976, "gnorm": 40.21640396118164, "n_masked": 318, "elapsed_s": 763.987238407135}
|
| 22 |
+
{"step": 2200, "loss": 1.4455608129501343, "gnorm": 35.702667236328125, "n_masked": 558, "elapsed_s": 799.1353981494904}
|
| 23 |
+
{"step": 2300, "loss": 1.3775945901870728, "gnorm": 49.560386657714844, "n_masked": 447, "elapsed_s": 838.7775135040283}
|
| 24 |
+
{"step": 2400, "loss": 1.3389371633529663, "gnorm": 56.92543411254883, "n_masked": 455, "elapsed_s": 873.9020960330963}
|
| 25 |
+
{"step": 2500, "loss": 1.4308192729949951, "gnorm": 16.389575958251953, "n_masked": 912, "elapsed_s": 1436.6621708869934}
|
| 26 |
+
{"step": 2500, "val_loss": 1.3740407729636834, "val_n_masked": 124933}
|
| 27 |
+
{"step": 2600, "loss": 1.3899890184402466, "gnorm": 6.82764196395874, "n_masked": 637, "elapsed_s": 4481.351215839386}
|
| 28 |
+
{"step": 2700, "loss": 1.3974850177764893, "gnorm": 6.7294721603393555, "n_masked": 476, "elapsed_s": 4516.457495927811}
|
| 29 |
+
{"step": 2800, "loss": 1.4410240650177002, "gnorm": 43.96825408935547, "n_masked": 547, "elapsed_s": 4551.330363750458}
|
| 30 |
+
{"step": 2900, "loss": 1.492596983909607, "gnorm": 72.34571075439453, "n_masked": 630, "elapsed_s": 4586.297803640366}
|
| 31 |
+
{"step": 3000, "loss": 1.3665837049484253, "gnorm": 118.17631530761719, "n_masked": 318, "elapsed_s": 4621.244218111038}
|
| 32 |
+
{"step": 3100, "loss": 1.3473235368728638, "gnorm": 25.174943923950195, "n_masked": 542, "elapsed_s": 4656.189078092575}
|
| 33 |
+
{"step": 3200, "loss": 1.408298373222351, "gnorm": 73.048095703125, "n_masked": 519, "elapsed_s": 4691.177469730377}
|
| 34 |
+
{"step": 3300, "loss": 1.3558359146118164, "gnorm": 69.914306640625, "n_masked": 597, "elapsed_s": 4726.021328926086}
|
| 35 |
+
{"step": 3400, "loss": 1.386448860168457, "gnorm": 25.44927215576172, "n_masked": 828, "elapsed_s": 4761.017758369446}
|
| 36 |
+
{"step": 3500, "loss": 1.4499478340148926, "gnorm": 69.83002471923828, "n_masked": 544, "elapsed_s": 4795.981695890427}
|
| 37 |
+
{"step": 3600, "loss": 1.3208485841751099, "gnorm": 41.443485260009766, "n_masked": 244, "elapsed_s": 4830.964000225067}
|
| 38 |
+
{"step": 3700, "loss": 1.3665913343429565, "gnorm": 15.523662567138672, "n_masked": 472, "elapsed_s": 4865.860482931137}
|
| 39 |
+
{"step": 3800, "loss": 1.3523682355880737, "gnorm": 31.80842399597168, "n_masked": 487, "elapsed_s": 4900.837449789047}
|
| 40 |
+
{"step": 3900, "loss": 1.369313359260559, "gnorm": 28.844648361206055, "n_masked": 510, "elapsed_s": 4935.843586683273}
|
| 41 |
+
{"step": 4000, "loss": 1.4081019163131714, "gnorm": 79.35284423828125, "n_masked": 450, "elapsed_s": 4970.894025087357}
|
| 42 |
+
{"step": 4100, "loss": 1.3439009189605713, "gnorm": 8.602263450622559, "n_masked": 253, "elapsed_s": 5005.798364639282}
|
| 43 |
+
{"step": 4200, "loss": 1.3389569520950317, "gnorm": 48.269100189208984, "n_masked": 152, "elapsed_s": 5040.754721403122}
|
| 44 |
+
{"step": 4300, "loss": 1.3525687456130981, "gnorm": 19.837886810302734, "n_masked": 530, "elapsed_s": 5075.739853858948}
|
| 45 |
+
{"step": 4400, "loss": 1.3027573823928833, "gnorm": 33.582427978515625, "n_masked": 310, "elapsed_s": 5110.74070930481}
|
| 46 |
+
{"step": 4500, "loss": 1.356981873512268, "gnorm": 6.407493591308594, "n_masked": 805, "elapsed_s": 5145.5921022892}
|
| 47 |
+
{"step": 4600, "loss": 1.3320189714431763, "gnorm": 8.370941162109375, "n_masked": 484, "elapsed_s": 5180.586127758026}
|
| 48 |
+
{"step": 4700, "loss": 1.3025579452514648, "gnorm": 6.2807536125183105, "n_masked": 245, "elapsed_s": 5215.56786441803}
|
| 49 |
+
{"step": 4800, "loss": 1.3608981370925903, "gnorm": 1.30360746383667, "n_masked": 716, "elapsed_s": 5250.600974321365}
|
| 50 |
+
{"step": 4900, "loss": 1.3767285346984863, "gnorm": 13.428191184997559, "n_masked": 294, "elapsed_s": 5285.550397872925}
|
| 51 |
+
{"step": 5000, "loss": 1.3199602365493774, "gnorm": 100.54936218261719, "n_masked": 237, "elapsed_s": 5320.59251832962}
|
| 52 |
+
{"step": 5000, "val_loss": 1.342064801384421, "val_n_masked": 125120}
|
| 53 |
+
{"step": 5100, "loss": 1.3597325086593628, "gnorm": 5.7053608894348145, "n_masked": 419, "elapsed_s": 7379.943660259247}
|
| 54 |
+
{"step": 5200, "loss": 1.3866119384765625, "gnorm": 12.63646125793457, "n_masked": 541, "elapsed_s": 7415.143207550049}
|
| 55 |
+
{"step": 5300, "loss": 1.3350417613983154, "gnorm": 38.037872314453125, "n_masked": 442, "elapsed_s": 7450.182459592819}
|
| 56 |
+
{"step": 5400, "loss": 1.392158031463623, "gnorm": 104.28556060791016, "n_masked": 179, "elapsed_s": 7485.0931804180145}
|
| 57 |
+
{"step": 5500, "loss": 1.3896971940994263, "gnorm": 8.202664375305176, "n_masked": 448, "elapsed_s": 7520.148466348648}
|
| 58 |
+
{"step": 5600, "loss": 1.2966992855072021, "gnorm": 2.196049213409424, "n_masked": 373, "elapsed_s": 7555.172767162323}
|
| 59 |
+
{"step": 5700, "loss": 1.340815544128418, "gnorm": 37.26991653442383, "n_masked": 395, "elapsed_s": 7590.183227300644}
|
| 60 |
+
{"step": 5800, "loss": 1.3740426301956177, "gnorm": 17.943693161010742, "n_masked": 693, "elapsed_s": 7625.100323438644}
|
| 61 |
+
{"step": 5900, "loss": 1.3825974464416504, "gnorm": 20.106971740722656, "n_masked": 353, "elapsed_s": 7660.096639633179}
|
| 62 |
+
{"step": 6000, "loss": 1.3233375549316406, "gnorm": 8.408491134643555, "n_masked": 341, "elapsed_s": 7695.062134504318}
|
| 63 |
+
{"step": 6100, "loss": 1.3308767080307007, "gnorm": 21.0144100189209, "n_masked": 168, "elapsed_s": 7730.057364702225}
|
| 64 |
+
{"step": 6200, "loss": 1.3268661499023438, "gnorm": 12.241264343261719, "n_masked": 540, "elapsed_s": 7765.141093492508}
|
| 65 |
+
{"step": 6300, "loss": 1.3503390550613403, "gnorm": 5.967979907989502, "n_masked": 672, "elapsed_s": 7799.989523649216}
|
| 66 |
+
{"step": 6400, "loss": 1.4056780338287354, "gnorm": 6.847990036010742, "n_masked": 143, "elapsed_s": 7835.013739109039}
|
| 67 |
+
{"step": 6500, "loss": 1.3490352630615234, "gnorm": 6.862911701202393, "n_masked": 728, "elapsed_s": 7870.067131280899}
|
| 68 |
+
{"step": 6600, "loss": 1.374297857284546, "gnorm": 7.815378189086914, "n_masked": 653, "elapsed_s": 7905.1020748615265}
|
| 69 |
+
{"step": 6700, "loss": 1.2737740278244019, "gnorm": 16.522258758544922, "n_masked": 107, "elapsed_s": 7939.999103069305}
|
| 70 |
+
{"step": 6800, "loss": 1.401519536972046, "gnorm": 78.16688537597656, "n_masked": 540, "elapsed_s": 7974.994282484055}
|
| 71 |
+
{"step": 6900, "loss": 1.330535650253296, "gnorm": 13.853470802307129, "n_masked": 381, "elapsed_s": 8009.946764469147}
|
| 72 |
+
{"step": 7000, "loss": 1.3486565351486206, "gnorm": 11.617114067077637, "n_masked": 406, "elapsed_s": 8044.912518501282}
|
| 73 |
+
{"step": 7100, "loss": 1.285589337348938, "gnorm": 16.424327850341797, "n_masked": 179, "elapsed_s": 8079.750417470932}
|
| 74 |
+
{"step": 7200, "loss": 1.2742160558700562, "gnorm": 31.698278427124023, "n_masked": 458, "elapsed_s": 8114.71746301651}
|
| 75 |
+
{"step": 7300, "loss": 1.2407269477844238, "gnorm": 11.096803665161133, "n_masked": 123, "elapsed_s": 8149.745268344879}
|
| 76 |
+
{"step": 7400, "loss": 1.3064358234405518, "gnorm": 46.603904724121094, "n_masked": 166, "elapsed_s": 8184.740423679352}
|
| 77 |
+
{"step": 7500, "loss": 1.2861576080322266, "gnorm": 2.987504720687866, "n_masked": 402, "elapsed_s": 8219.64723277092}
|
| 78 |
+
{"step": 7500, "val_loss": 1.3461982468055471, "val_n_masked": 124871}
|
| 79 |
+
{"step": 7600, "loss": 1.3431271314620972, "gnorm": 7.271148204803467, "n_masked": 698, "elapsed_s": 9333.68992805481}
|
| 80 |
+
{"step": 7700, "loss": 1.3511724472045898, "gnorm": 13.655619621276855, "n_masked": 343, "elapsed_s": 9368.758021831512}
|
| 81 |
+
{"step": 7800, "loss": 1.329745888710022, "gnorm": 10.057658195495605, "n_masked": 362, "elapsed_s": 9403.801835536957}
|
| 82 |
+
{"step": 7900, "loss": 1.30551016330719, "gnorm": 5.101137161254883, "n_masked": 255, "elapsed_s": 9438.713890314102}
|
| 83 |
+
{"step": 8000, "loss": 1.331566572189331, "gnorm": 11.613862037658691, "n_masked": 401, "elapsed_s": 9473.744375228882}
|
| 84 |
+
{"step": 8100, "loss": 1.326912760734558, "gnorm": 8.984637260437012, "n_masked": 559, "elapsed_s": 9508.765765190125}
|
| 85 |
+
{"step": 8200, "loss": 1.3339165449142456, "gnorm": 57.99892807006836, "n_masked": 81, "elapsed_s": 9543.799288511276}
|
| 86 |
+
{"step": 8300, "loss": 1.3456419706344604, "gnorm": 29.1489315032959, "n_masked": 493, "elapsed_s": 9578.700244426727}
|
| 87 |
+
{"step": 8400, "loss": 1.3423817157745361, "gnorm": 7.6728129386901855, "n_masked": 447, "elapsed_s": 9613.639131069183}
|
| 88 |
+
{"step": 8500, "loss": 1.2403051853179932, "gnorm": 3.4364893436431885, "n_masked": 505, "elapsed_s": 9648.573870182037}
|
| 89 |
+
{"step": 8600, "loss": 1.3978406190872192, "gnorm": 8.837202072143555, "n_masked": 453, "elapsed_s": 9683.557032585144}
|
| 90 |
+
{"step": 8700, "loss": 1.3425185680389404, "gnorm": 2.011258363723755, "n_masked": 426, "elapsed_s": 9718.36557173729}
|
| 91 |
+
{"step": 8800, "loss": 1.3208811283111572, "gnorm": 2.5779662132263184, "n_masked": 172, "elapsed_s": 9753.322005271912}
|
| 92 |
+
{"step": 8900, "loss": 1.3700566291809082, "gnorm": 4.516443252563477, "n_masked": 744, "elapsed_s": 9788.364897489548}
|
| 93 |
+
{"step": 9000, "loss": 1.3835967779159546, "gnorm": 8.976652145385742, "n_masked": 795, "elapsed_s": 9823.404417037964}
|
| 94 |
+
{"step": 9100, "loss": 1.431418538093567, "gnorm": 250.06468200683594, "n_masked": 752, "elapsed_s": 9858.40528678894}
|
| 95 |
+
{"step": 9200, "loss": 1.3595422506332397, "gnorm": 11.309383392333984, "n_masked": 730, "elapsed_s": 9893.296775579453}
|
| 96 |
+
{"step": 9300, "loss": 1.3622310161590576, "gnorm": 52.861427307128906, "n_masked": 383, "elapsed_s": 9928.340805053711}
|
| 97 |
+
{"step": 9400, "loss": 1.3269696235656738, "gnorm": 6.988961219787598, "n_masked": 594, "elapsed_s": 9963.350822210312}
|
| 98 |
+
{"step": 9500, "loss": 1.3579943180084229, "gnorm": 7.35931921005249, "n_masked": 592, "elapsed_s": 9998.371896982193}
|
| 99 |
+
{"step": 9600, "loss": 1.3575937747955322, "gnorm": 6.532353401184082, "n_masked": 589, "elapsed_s": 10033.260726928711}
|
| 100 |
+
{"step": 9700, "loss": 1.29998779296875, "gnorm": 7.136166095733643, "n_masked": 238, "elapsed_s": 10068.277053833008}
|
| 101 |
+
{"step": 9800, "loss": 1.3211146593093872, "gnorm": 6.5379180908203125, "n_masked": 411, "elapsed_s": 10103.264166355133}
|
| 102 |
+
{"step": 9900, "loss": 1.3348463773727417, "gnorm": 5.630400657653809, "n_masked": 468, "elapsed_s": 10138.287166833878}
|
| 103 |
+
{"step": 10000, "loss": 1.3313758373260498, "gnorm": 2.338728427886963, "n_masked": 337, "elapsed_s": 10173.16588973999}
|
| 104 |
+
{"step": 10000, "val_loss": 1.332536332357815, "val_n_masked": 125023}
|
| 105 |
+
{"step": 10100, "loss": 1.3177400827407837, "gnorm": 2.9404239654541016, "n_masked": 432, "elapsed_s": 12335.735327482224}
|
| 106 |
+
{"step": 10200, "loss": 1.3503239154815674, "gnorm": 7.821503639221191, "n_masked": 535, "elapsed_s": 12371.01549744606}
|
| 107 |
+
{"step": 10300, "loss": 1.3312655687332153, "gnorm": 5.129350662231445, "n_masked": 397, "elapsed_s": 12406.10012960434}
|
| 108 |
+
{"step": 10400, "loss": 1.3982607126235962, "gnorm": 10.374671936035156, "n_masked": 555, "elapsed_s": 12441.17598772049}
|
| 109 |
+
{"step": 10500, "loss": 1.36489999294281, "gnorm": 4.410340785980225, "n_masked": 630, "elapsed_s": 12476.089765787125}
|
| 110 |
+
{"step": 10600, "loss": 1.3197097778320312, "gnorm": 3.000126600265503, "n_masked": 75, "elapsed_s": 12511.099541664124}
|
| 111 |
+
{"step": 10700, "loss": 1.3693287372589111, "gnorm": 17.787830352783203, "n_masked": 350, "elapsed_s": 12546.134250879288}
|
| 112 |
+
{"step": 10800, "loss": 1.357347846031189, "gnorm": 5.764089107513428, "n_masked": 276, "elapsed_s": 12581.152743339539}
|
| 113 |
+
{"step": 10900, "loss": 1.2871124744415283, "gnorm": 14.276144027709961, "n_masked": 309, "elapsed_s": 12616.055626392365}
|
| 114 |
+
{"step": 11000, "loss": 1.335727334022522, "gnorm": 8.464067459106445, "n_masked": 196, "elapsed_s": 12651.063263177872}
|
| 115 |
+
{"step": 11100, "loss": 1.3017510175704956, "gnorm": 4.793827056884766, "n_masked": 482, "elapsed_s": 12686.074303150177}
|
| 116 |
+
{"step": 11200, "loss": 1.3079869747161865, "gnorm": 3.222154378890991, "n_masked": 326, "elapsed_s": 12721.11509180069}
|
| 117 |
+
{"step": 11300, "loss": 1.373738169670105, "gnorm": 5.913382530212402, "n_masked": 208, "elapsed_s": 12756.020750522614}
|
| 118 |
+
{"step": 11400, "loss": 1.3512437343597412, "gnorm": 1.7271109819412231, "n_masked": 645, "elapsed_s": 12791.055712223053}
|
| 119 |
+
{"step": 11500, "loss": 1.3102363348007202, "gnorm": 1.7731279134750366, "n_masked": 304, "elapsed_s": 12826.13824057579}
|
| 120 |
+
{"step": 11600, "loss": 1.3387160301208496, "gnorm": 5.413264274597168, "n_masked": 191, "elapsed_s": 12861.165941238403}
|
| 121 |
+
{"step": 11700, "loss": 1.3563424348831177, "gnorm": 2.0337867736816406, "n_masked": 778, "elapsed_s": 12896.065358161926}
|
| 122 |
+
{"step": 11800, "loss": 1.3991780281066895, "gnorm": 2.4934990406036377, "n_masked": 815, "elapsed_s": 12931.109330654144}
|
| 123 |
+
{"step": 11900, "loss": 1.3652909994125366, "gnorm": 1.0340943336486816, "n_masked": 824, "elapsed_s": 12966.141960382462}
|
| 124 |
+
{"step": 12000, "loss": 1.3564742803573608, "gnorm": 1.8864952325820923, "n_masked": 705, "elapsed_s": 13001.176522731781}
|
| 125 |
+
{"step": 12100, "loss": 1.3394527435302734, "gnorm": 1.967041254043579, "n_masked": 522, "elapsed_s": 13036.215588569641}
|
| 126 |
+
{"step": 12200, "loss": 1.3094977140426636, "gnorm": 1.7282376289367676, "n_masked": 531, "elapsed_s": 13071.127252578735}
|
| 127 |
+
{"step": 12300, "loss": 1.3609200716018677, "gnorm": 12.448420524597168, "n_masked": 720, "elapsed_s": 13106.150354623795}
|
| 128 |
+
{"step": 12400, "loss": 1.6018884181976318, "gnorm": 147.49122619628906, "n_masked": 705, "elapsed_s": 13141.174812793732}
|
| 129 |
+
{"step": 12500, "loss": 1.3192853927612305, "gnorm": 2.343360424041748, "n_masked": 420, "elapsed_s": 13176.22732257843}
|
| 130 |
+
{"step": 12500, "val_loss": 1.32992932762124, "val_n_masked": 124920}
|
| 131 |
+
{"step": 12600, "loss": 1.5380362272262573, "gnorm": 1095.1002197265625, "n_masked": 175, "elapsed_s": 15438.358446836472}
|
| 132 |
+
{"step": 12700, "loss": 1.3690907955169678, "gnorm": 1.725947380065918, "n_masked": 517, "elapsed_s": 15473.319992303848}
|
| 133 |
+
{"step": 12800, "loss": 1.2240573167800903, "gnorm": 6.051928997039795, "n_masked": 182, "elapsed_s": 15508.598113536835}
|
| 134 |
+
{"step": 12900, "loss": 1.3646562099456787, "gnorm": 3.8905832767486572, "n_masked": 485, "elapsed_s": 15543.643609523773}
|
| 135 |
+
{"step": 13000, "loss": 1.3660603761672974, "gnorm": 3.0796468257904053, "n_masked": 714, "elapsed_s": 15578.640979528427}
|
| 136 |
+
{"step": 13100, "loss": 1.2969322204589844, "gnorm": 6.552462577819824, "n_masked": 457, "elapsed_s": 15613.48075222969}
|
| 137 |
+
{"step": 13200, "loss": 1.358729362487793, "gnorm": 4.849985122680664, "n_masked": 620, "elapsed_s": 15648.469774961472}
|
| 138 |
+
{"step": 13300, "loss": 1.3740211725234985, "gnorm": 2.144876003265381, "n_masked": 982, "elapsed_s": 15683.428329467773}
|
| 139 |
+
{"step": 13400, "loss": 1.362549066543579, "gnorm": 2.2153356075286865, "n_masked": 413, "elapsed_s": 15718.36815738678}
|
| 140 |
+
{"step": 13500, "loss": 1.3427318334579468, "gnorm": 9.615581512451172, "n_masked": 472, "elapsed_s": 15753.212835550308}
|
| 141 |
+
{"step": 13600, "loss": 1.4032562971115112, "gnorm": 12.760854721069336, "n_masked": 677, "elapsed_s": 15788.169652700424}
|
| 142 |
+
{"step": 13700, "loss": 1.3683148622512817, "gnorm": 87.66454315185547, "n_masked": 357, "elapsed_s": 15823.125300168991}
|
| 143 |
+
{"step": 13800, "loss": 1.3699092864990234, "gnorm": 3.163581609725952, "n_masked": 765, "elapsed_s": 15858.080843925476}
|
| 144 |
+
{"step": 13900, "loss": 1.3258605003356934, "gnorm": 3.572298765182495, "n_masked": 617, "elapsed_s": 15892.924238920212}
|
| 145 |
+
{"step": 14000, "loss": 1.2990835905075073, "gnorm": 10.604793548583984, "n_masked": 329, "elapsed_s": 15927.885230064392}
|
| 146 |
+
{"step": 14100, "loss": 1.3277738094329834, "gnorm": 1.6273305416107178, "n_masked": 369, "elapsed_s": 15962.84838628769}
|
| 147 |
+
{"step": 14200, "loss": 1.3075618743896484, "gnorm": 1.6577447652816772, "n_masked": 457, "elapsed_s": 15997.804626464844}
|
| 148 |
+
{"step": 14300, "loss": 1.3652448654174805, "gnorm": 2.276348114013672, "n_masked": 819, "elapsed_s": 16032.636194229126}
|
| 149 |
+
{"step": 14400, "loss": 1.3025102615356445, "gnorm": 2.380383014678955, "n_masked": 387, "elapsed_s": 16067.585780858994}
|
| 150 |
+
{"step": 14500, "loss": 1.3447939157485962, "gnorm": 2.073723316192627, "n_masked": 428, "elapsed_s": 16102.543053150177}
|
| 151 |
+
{"step": 14600, "loss": 1.3697638511657715, "gnorm": 3.7335214614868164, "n_masked": 289, "elapsed_s": 16137.514839887619}
|
| 152 |
+
{"step": 14700, "loss": 1.4207888841629028, "gnorm": 2.8976926803588867, "n_masked": 109, "elapsed_s": 16172.356001615524}
|
| 153 |
+
{"step": 14800, "loss": 1.366150975227356, "gnorm": 2.68571138381958, "n_masked": 612, "elapsed_s": 16207.32058763504}
|
| 154 |
+
{"step": 14900, "loss": 1.359851598739624, "gnorm": 4.295720100402832, "n_masked": 556, "elapsed_s": 16242.278193950653}
|
| 155 |
+
{"step": 15000, "loss": 1.2660468816757202, "gnorm": 2.164249897003174, "n_masked": 273, "elapsed_s": 16277.261706590652}
|
| 156 |
+
{"step": 15000, "val_loss": 1.3297042911864756, "val_n_masked": 125235}
|
| 157 |
+
{"step": 15100, "loss": 1.3658894300460815, "gnorm": 1.2942863702774048, "n_masked": 475, "elapsed_s": 18257.048031568527}
|
| 158 |
+
{"step": 15200, "loss": 1.346961259841919, "gnorm": 2.4683775901794434, "n_masked": 539, "elapsed_s": 18292.105792999268}
|
| 159 |
+
{"step": 15300, "loss": 1.3459392786026, "gnorm": 0.5136907696723938, "n_masked": 528, "elapsed_s": 18327.415499687195}
|
| 160 |
+
{"step": 15400, "loss": 1.3114134073257446, "gnorm": 2.946037769317627, "n_masked": 447, "elapsed_s": 18362.869434833527}
|
| 161 |
+
{"step": 15500, "loss": 1.362864375114441, "gnorm": 2.085458755493164, "n_masked": 507, "elapsed_s": 18398.03946518898}
|
| 162 |
+
{"step": 15600, "loss": 1.3582828044891357, "gnorm": 8.14642333984375, "n_masked": 380, "elapsed_s": 18433.22500681877}
|
| 163 |
+
{"step": 15700, "loss": 1.3282861709594727, "gnorm": 1.6088746786117554, "n_masked": 392, "elapsed_s": 18468.31206202507}
|
| 164 |
+
{"step": 15800, "loss": 1.3169779777526855, "gnorm": 1.5004087686538696, "n_masked": 522, "elapsed_s": 18503.486285448074}
|
| 165 |
+
{"step": 15900, "loss": 1.3376208543777466, "gnorm": 1.860012412071228, "n_masked": 632, "elapsed_s": 18538.65664792061}
|
| 166 |
+
{"step": 16000, "loss": 1.3482871055603027, "gnorm": 1.2395230531692505, "n_masked": 334, "elapsed_s": 18573.80719089508}
|
| 167 |
+
{"step": 16100, "loss": 1.3216701745986938, "gnorm": 0.8125881552696228, "n_masked": 499, "elapsed_s": 18608.857580900192}
|
| 168 |
+
{"step": 16200, "loss": 1.346019983291626, "gnorm": 1.1180202960968018, "n_masked": 653, "elapsed_s": 18643.997255802155}
|
| 169 |
+
{"step": 16300, "loss": 1.3721370697021484, "gnorm": 7.9087419509887695, "n_masked": 715, "elapsed_s": 18679.145486593246}
|
| 170 |
+
{"step": 16400, "loss": 1.3603994846343994, "gnorm": 4.179591655731201, "n_masked": 687, "elapsed_s": 18714.271958589554}
|
| 171 |
+
{"step": 16500, "loss": 1.3528060913085938, "gnorm": 3.7661502361297607, "n_masked": 449, "elapsed_s": 18749.302349328995}
|
| 172 |
+
{"step": 16600, "loss": 1.3509521484375, "gnorm": 1.4439191818237305, "n_masked": 415, "elapsed_s": 18784.462653398514}
|
| 173 |
+
{"step": 16700, "loss": 1.2697807550430298, "gnorm": 1.5983490943908691, "n_masked": 482, "elapsed_s": 18819.618886232376}
|
| 174 |
+
{"step": 16800, "loss": 1.314881443977356, "gnorm": 2.8197600841522217, "n_masked": 417, "elapsed_s": 18854.7766289711}
|
| 175 |
+
{"step": 16900, "loss": 1.3391406536102295, "gnorm": 5.236271381378174, "n_masked": 574, "elapsed_s": 18889.814019203186}
|
| 176 |
+
{"step": 17000, "loss": 1.280563235282898, "gnorm": 4.5697102546691895, "n_masked": 383, "elapsed_s": 18924.972031116486}
|
| 177 |
+
{"step": 17100, "loss": 1.3873536586761475, "gnorm": 2.4489362239837646, "n_masked": 692, "elapsed_s": 18960.119480848312}
|
| 178 |
+
{"step": 17200, "loss": 1.3650811910629272, "gnorm": 2.4295473098754883, "n_masked": 639, "elapsed_s": 18995.25434112549}
|
| 179 |
+
{"step": 17300, "loss": 1.3583918809890747, "gnorm": 1.5854743719100952, "n_masked": 537, "elapsed_s": 19030.712035417557}
|
| 180 |
+
{"step": 17400, "loss": 1.297775149345398, "gnorm": 6.549415111541748, "n_masked": 369, "elapsed_s": 19065.99603319168}
|
| 181 |
+
{"step": 17500, "loss": 1.326299786567688, "gnorm": 4.516073226928711, "n_masked": 397, "elapsed_s": 19101.255742788315}
|
| 182 |
+
{"step": 17500, "val_loss": 1.3280105523568657, "val_n_masked": 124790}
|
| 183 |
+
{"step": 17600, "loss": 1.30617094039917, "gnorm": 1.4493821859359741, "n_masked": 421, "elapsed_s": 21023.37254500389}
|
| 184 |
+
{"step": 17700, "loss": 1.3262983560562134, "gnorm": 6.19812536239624, "n_masked": 680, "elapsed_s": 21058.679227352142}
|
| 185 |
+
{"step": 17800, "loss": 1.3519046306610107, "gnorm": 4.958584785461426, "n_masked": 444, "elapsed_s": 21093.707690238953}
|
| 186 |
+
{"step": 17900, "loss": 1.3280932903289795, "gnorm": 1.642270803451538, "n_masked": 369, "elapsed_s": 21128.83488869667}
|
| 187 |
+
{"step": 18000, "loss": 1.4084324836730957, "gnorm": 120.59577941894531, "n_masked": 570, "elapsed_s": 21164.00173306465}
|
| 188 |
+
{"step": 18100, "loss": 1.369340181350708, "gnorm": 2.3884437084198, "n_masked": 859, "elapsed_s": 21199.145361185074}
|
| 189 |
+
{"step": 18200, "loss": 1.3390743732452393, "gnorm": 2.5385193824768066, "n_masked": 629, "elapsed_s": 21234.186383247375}
|
| 190 |
+
{"step": 18300, "loss": 1.3345766067504883, "gnorm": 2.9841699600219727, "n_masked": 283, "elapsed_s": 21269.362609386444}
|
| 191 |
+
{"step": 18400, "loss": 1.3781012296676636, "gnorm": 2.2650625705718994, "n_masked": 718, "elapsed_s": 21304.543813467026}
|
| 192 |
+
{"step": 18500, "loss": 1.331494688987732, "gnorm": 3.352966070175171, "n_masked": 435, "elapsed_s": 21339.73446726799}
|
| 193 |
+
{"step": 18600, "loss": 1.2875906229019165, "gnorm": 2.0948596000671387, "n_masked": 380, "elapsed_s": 21374.89786839485}
|
| 194 |
+
{"step": 18700, "loss": 1.3418928384780884, "gnorm": 1.8478041887283325, "n_masked": 703, "elapsed_s": 21409.961636543274}
|
| 195 |
+
{"step": 18800, "loss": 1.3254817724227905, "gnorm": 2.9068520069122314, "n_masked": 382, "elapsed_s": 21445.11950993538}
|
| 196 |
+
{"step": 18900, "loss": 1.3817123174667358, "gnorm": 2.537318468093872, "n_masked": 636, "elapsed_s": 21480.269674301147}
|
| 197 |
+
{"step": 19000, "loss": 1.331003189086914, "gnorm": 1.6087671518325806, "n_masked": 314, "elapsed_s": 21515.467938899994}
|
| 198 |
+
{"step": 19100, "loss": 1.3546355962753296, "gnorm": 2.0837230682373047, "n_masked": 543, "elapsed_s": 21550.477276802063}
|
| 199 |
+
{"step": 19200, "loss": 1.3670083284378052, "gnorm": 2.4186134338378906, "n_masked": 338, "elapsed_s": 21585.64392876625}
|
| 200 |
+
{"step": 19300, "loss": 1.2483420372009277, "gnorm": 2.970231533050537, "n_masked": 134, "elapsed_s": 21620.823533296585}
|
| 201 |
+
{"step": 19400, "loss": 1.3296822309494019, "gnorm": 1.62014901638031, "n_masked": 253, "elapsed_s": 21656.012600898743}
|
| 202 |
+
{"step": 19500, "loss": 1.2921829223632812, "gnorm": 0.9020812511444092, "n_masked": 394, "elapsed_s": 21691.086144208908}
|
| 203 |
+
{"step": 19600, "loss": 1.3469867706298828, "gnorm": 1.226125955581665, "n_masked": 438, "elapsed_s": 21726.28050637245}
|
| 204 |
+
{"step": 19700, "loss": 1.3383677005767822, "gnorm": 1.119696021080017, "n_masked": 623, "elapsed_s": 21761.50352883339}
|
| 205 |
+
{"step": 19800, "loss": 1.317857265472412, "gnorm": 1.6388494968414307, "n_masked": 431, "elapsed_s": 21796.7032995224}
|
| 206 |
+
{"step": 19900, "loss": 1.3395962715148926, "gnorm": 0.6142444610595703, "n_masked": 370, "elapsed_s": 21831.83272099495}
|
| 207 |
+
{"step": 20000, "loss": 1.3526158332824707, "gnorm": 0.9940682649612427, "n_masked": 852, "elapsed_s": 21867.00915122032}
|
| 208 |
+
{"step": 20000, "val_loss": 1.322814276283301, "val_n_masked": 125076}
|