explcre commited on
Commit
ee505a3
·
verified ·
1 Parent(s): bae059b

Upload exp_t3_ntv3_650m_500bp_continue_20260505_233107/log.jsonl with huggingface_hub

Browse files
exp_t3_ntv3_650m_500bp_continue_20260505_233107/log.jsonl ADDED
@@ -0,0 +1,234 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"step": 100, "loss": 1.3830344676971436, "gnorm": 1.1163034439086914, "n_masked": 982, "elapsed_s": 35.423495054244995}
2
+ {"step": 200, "loss": 1.3374390602111816, "gnorm": 1.19303297996521, "n_masked": 394, "elapsed_s": 69.84209108352661}
3
+ {"step": 300, "loss": 1.350731611251831, "gnorm": 1.3792247772216797, "n_masked": 564, "elapsed_s": 104.56218957901001}
4
+ {"step": 400, "loss": 1.3097847700119019, "gnorm": 1.9376723766326904, "n_masked": 433, "elapsed_s": 138.80252122879028}
5
+ {"step": 500, "loss": 1.367826223373413, "gnorm": 0.9066024422645569, "n_masked": 792, "elapsed_s": 173.2839047908783}
6
+ {"step": 600, "loss": 1.3668848276138306, "gnorm": 0.8215528726577759, "n_masked": 392, "elapsed_s": 207.5154848098755}
7
+ {"step": 700, "loss": 1.2710983753204346, "gnorm": 1.8111858367919922, "n_masked": 173, "elapsed_s": 241.77677130699158}
8
+ {"step": 800, "loss": 1.3481299877166748, "gnorm": 0.6367254853248596, "n_masked": 669, "elapsed_s": 276.031054019928}
9
+ {"step": 900, "loss": 1.3382371664047241, "gnorm": 0.7427734732627869, "n_masked": 687, "elapsed_s": 310.12622690200806}
10
+ {"step": 1000, "loss": 1.3211009502410889, "gnorm": 2.6366026401519775, "n_masked": 68, "elapsed_s": 344.3588490486145}
11
+ {"step": 1100, "loss": 1.3626405000686646, "gnorm": 0.7013235688209534, "n_masked": 639, "elapsed_s": 378.48239374160767}
12
+ {"step": 1200, "loss": 1.2668272256851196, "gnorm": 1.4165120124816895, "n_masked": 267, "elapsed_s": 412.71531987190247}
13
+ {"step": 1300, "loss": 1.337062120437622, "gnorm": 0.6832523941993713, "n_masked": 483, "elapsed_s": 446.82601833343506}
14
+ {"step": 1400, "loss": 1.342034101486206, "gnorm": 0.6357051730155945, "n_masked": 742, "elapsed_s": 481.08272886276245}
15
+ {"step": 1500, "loss": 1.301823377609253, "gnorm": 0.5989937782287598, "n_masked": 428, "elapsed_s": 515.2140727043152}
16
+ {"step": 1600, "loss": 1.335085391998291, "gnorm": 1.8837461471557617, "n_masked": 661, "elapsed_s": 549.4816102981567}
17
+ {"step": 1700, "loss": 1.3465816974639893, "gnorm": 0.8208540678024292, "n_masked": 749, "elapsed_s": 583.6350133419037}
18
+ {"step": 1800, "loss": 1.2955265045166016, "gnorm": 1.3682677745819092, "n_masked": 256, "elapsed_s": 617.893741607666}
19
+ {"step": 1900, "loss": 1.3186887502670288, "gnorm": 1.6838529109954834, "n_masked": 341, "elapsed_s": 652.0469737052917}
20
+ {"step": 2000, "loss": 1.3562228679656982, "gnorm": 0.9930366277694702, "n_masked": 832, "elapsed_s": 686.321905374527}
21
+ {"step": 2100, "loss": 1.3077714443206787, "gnorm": 1.1296271085739136, "n_masked": 318, "elapsed_s": 720.4657981395721}
22
+ {"step": 2200, "loss": 1.334072232246399, "gnorm": 0.7257285714149475, "n_masked": 558, "elapsed_s": 754.7772724628448}
23
+ {"step": 2300, "loss": 1.3580256700515747, "gnorm": 1.2143474817276, "n_masked": 447, "elapsed_s": 788.8872654438019}
24
+ {"step": 2400, "loss": 1.3156100511550903, "gnorm": 1.7139068841934204, "n_masked": 455, "elapsed_s": 823.1176600456238}
25
+ {"step": 2500, "loss": 1.3811101913452148, "gnorm": 0.41759198904037476, "n_masked": 912, "elapsed_s": 857.2137458324432}
26
+ {"step": 2500, "val_loss": 1.3213035502459005, "val_n_masked": 124933}
27
+ {"step": 2600, "loss": 1.3738276958465576, "gnorm": 0.7827552556991577, "n_masked": 637, "elapsed_s": 2697.960351705551}
28
+ {"step": 2700, "loss": 1.3737998008728027, "gnorm": 0.9642743468284607, "n_masked": 476, "elapsed_s": 2732.512922525406}
29
+ {"step": 2800, "loss": 1.3482304811477661, "gnorm": 1.3686306476593018, "n_masked": 547, "elapsed_s": 2766.846411705017}
30
+ {"step": 2900, "loss": 1.3407351970672607, "gnorm": 0.8344046473503113, "n_masked": 630, "elapsed_s": 2801.3901534080505}
31
+ {"step": 3000, "loss": 1.2770105600357056, "gnorm": 2.103945732116699, "n_masked": 318, "elapsed_s": 2835.6991379261017}
32
+ {"step": 3100, "loss": 1.3387221097946167, "gnorm": 2.07784366607666, "n_masked": 542, "elapsed_s": 2870.19776058197}
33
+ {"step": 3200, "loss": 1.3455257415771484, "gnorm": 2.910025119781494, "n_masked": 519, "elapsed_s": 2904.5538070201874}
34
+ {"step": 3300, "loss": 1.3346338272094727, "gnorm": 1.3126585483551025, "n_masked": 597, "elapsed_s": 2938.9833121299744}
35
+ {"step": 3400, "loss": 1.3461706638336182, "gnorm": 0.7908732295036316, "n_masked": 828, "elapsed_s": 2973.2658891677856}
36
+ {"step": 3500, "loss": 1.344023585319519, "gnorm": 1.5487197637557983, "n_masked": 544, "elapsed_s": 3007.6952702999115}
37
+ {"step": 3600, "loss": 1.2774889469146729, "gnorm": 1.676491141319275, "n_masked": 244, "elapsed_s": 3041.9573125839233}
38
+ {"step": 3700, "loss": 1.3687992095947266, "gnorm": 1.6768341064453125, "n_masked": 472, "elapsed_s": 3076.3627576828003}
39
+ {"step": 3800, "loss": 1.3285164833068848, "gnorm": 1.3950494527816772, "n_masked": 487, "elapsed_s": 3110.657835483551}
40
+ {"step": 3900, "loss": 1.314442753791809, "gnorm": 1.0416253805160522, "n_masked": 510, "elapsed_s": 3145.078438282013}
41
+ {"step": 4000, "loss": 1.3472981452941895, "gnorm": 1.4757922887802124, "n_masked": 450, "elapsed_s": 3179.374840736389}
42
+ {"step": 4100, "loss": 1.3092058897018433, "gnorm": 1.261914849281311, "n_masked": 253, "elapsed_s": 3213.8122742176056}
43
+ {"step": 4200, "loss": 1.3180474042892456, "gnorm": 1.486124038696289, "n_masked": 152, "elapsed_s": 3248.1669158935547}
44
+ {"step": 4300, "loss": 1.3332287073135376, "gnorm": 0.8236634731292725, "n_masked": 530, "elapsed_s": 3282.7099583148956}
45
+ {"step": 4400, "loss": 1.3015450239181519, "gnorm": 1.1651531457901, "n_masked": 310, "elapsed_s": 3317.248818874359}
46
+ {"step": 4500, "loss": 1.3600540161132812, "gnorm": 1.2381677627563477, "n_masked": 805, "elapsed_s": 3351.798796415329}
47
+ {"step": 4600, "loss": 1.303367257118225, "gnorm": 0.7431296706199646, "n_masked": 484, "elapsed_s": 3386.251144170761}
48
+ {"step": 4700, "loss": 1.2856405973434448, "gnorm": 0.9792025685310364, "n_masked": 245, "elapsed_s": 3420.8031866550446}
49
+ {"step": 4800, "loss": 1.3554794788360596, "gnorm": 0.9948565363883972, "n_masked": 716, "elapsed_s": 3455.1916856765747}
50
+ {"step": 4900, "loss": 1.332360863685608, "gnorm": 1.3455318212509155, "n_masked": 294, "elapsed_s": 3489.6636271476746}
51
+ {"step": 5000, "loss": 1.2757943868637085, "gnorm": 0.7502349019050598, "n_masked": 237, "elapsed_s": 3524.0655887126923}
52
+ {"step": 5000, "val_loss": 1.319067304274615, "val_n_masked": 125120}
53
+ {"step": 5100, "loss": 1.313077449798584, "gnorm": 0.6966049075126648, "n_masked": 419, "elapsed_s": 5026.9972767829895}
54
+ {"step": 5200, "loss": 1.343546748161316, "gnorm": 1.1718984842300415, "n_masked": 541, "elapsed_s": 5061.96658205986}
55
+ {"step": 5300, "loss": 1.3043097257614136, "gnorm": 0.6454617381095886, "n_masked": 442, "elapsed_s": 5096.375110387802}
56
+ {"step": 5400, "loss": 1.293885350227356, "gnorm": 1.4659545421600342, "n_masked": 179, "elapsed_s": 5131.246322393417}
57
+ {"step": 5500, "loss": 1.3622791767120361, "gnorm": 1.0189210176467896, "n_masked": 448, "elapsed_s": 5166.040766239166}
58
+ {"step": 5600, "loss": 1.2922393083572388, "gnorm": 0.8516228199005127, "n_masked": 373, "elapsed_s": 5200.60059094429}
59
+ {"step": 5700, "loss": 1.323327898979187, "gnorm": 1.4759441614151, "n_masked": 395, "elapsed_s": 5235.759196519852}
60
+ {"step": 5800, "loss": 1.3318572044372559, "gnorm": 0.8709594011306763, "n_masked": 693, "elapsed_s": 5271.750630617142}
61
+ {"step": 5900, "loss": 1.3221596479415894, "gnorm": 1.4884192943572998, "n_masked": 353, "elapsed_s": 5307.737992286682}
62
+ {"step": 6000, "loss": 1.2966548204421997, "gnorm": 1.0322213172912598, "n_masked": 341, "elapsed_s": 5343.911506414413}
63
+ {"step": 6100, "loss": 1.295523762702942, "gnorm": 2.4895920753479004, "n_masked": 168, "elapsed_s": 5379.717408657074}
64
+ {"step": 6200, "loss": 1.3048579692840576, "gnorm": 0.8597980737686157, "n_masked": 540, "elapsed_s": 5415.169981718063}
65
+ {"step": 6300, "loss": 1.3438040018081665, "gnorm": 1.191733956336975, "n_masked": 672, "elapsed_s": 5449.460247039795}
66
+ {"step": 6400, "loss": 1.3215078115463257, "gnorm": 1.8571734428405762, "n_masked": 143, "elapsed_s": 5484.203200817108}
67
+ {"step": 6500, "loss": 1.3426899909973145, "gnorm": 0.6195399165153503, "n_masked": 728, "elapsed_s": 5519.821945667267}
68
+ {"step": 6600, "loss": 1.35745108127594, "gnorm": 0.5898821353912354, "n_masked": 653, "elapsed_s": 5554.344435930252}
69
+ {"step": 6700, "loss": 1.2400013208389282, "gnorm": 1.774429202079773, "n_masked": 107, "elapsed_s": 5588.72710609436}
70
+ {"step": 6800, "loss": 1.3088068962097168, "gnorm": 1.0786213874816895, "n_masked": 540, "elapsed_s": 5623.3783826828}
71
+ {"step": 6900, "loss": 1.3161637783050537, "gnorm": 1.3763185739517212, "n_masked": 381, "elapsed_s": 5657.808826208115}
72
+ {"step": 7000, "loss": 1.356041669845581, "gnorm": 1.1215221881866455, "n_masked": 406, "elapsed_s": 5692.349307537079}
73
+ {"step": 7100, "loss": 1.257248878479004, "gnorm": 0.8930635452270508, "n_masked": 179, "elapsed_s": 5726.780150413513}
74
+ {"step": 7200, "loss": 1.2587780952453613, "gnorm": 1.544077754020691, "n_masked": 458, "elapsed_s": 5761.416617870331}
75
+ {"step": 7300, "loss": 1.216827392578125, "gnorm": 1.4544609785079956, "n_masked": 123, "elapsed_s": 5795.898987531662}
76
+ {"step": 7400, "loss": 1.2676042318344116, "gnorm": 2.583134174346924, "n_masked": 166, "elapsed_s": 5830.53270316124}
77
+ {"step": 7500, "loss": 1.2763606309890747, "gnorm": 1.0392528772354126, "n_masked": 402, "elapsed_s": 5865.006909370422}
78
+ {"step": 7500, "val_loss": 1.3180792265117602, "val_n_masked": 124871}
79
+ {"step": 7600, "loss": 1.3420323133468628, "gnorm": 0.9377066493034363, "n_masked": 698, "elapsed_s": 7884.570154905319}
80
+ {"step": 7700, "loss": 1.314950704574585, "gnorm": 1.6314916610717773, "n_masked": 343, "elapsed_s": 7919.44896030426}
81
+ {"step": 7800, "loss": 1.321216344833374, "gnorm": 1.233525276184082, "n_masked": 362, "elapsed_s": 7953.867955207825}
82
+ {"step": 7900, "loss": 1.2979745864868164, "gnorm": 0.7589095830917358, "n_masked": 255, "elapsed_s": 7988.399910926819}
83
+ {"step": 8000, "loss": 1.314009428024292, "gnorm": 1.2046326398849487, "n_masked": 401, "elapsed_s": 8022.819769859314}
84
+ {"step": 8100, "loss": 1.3020869493484497, "gnorm": 0.9781772494316101, "n_masked": 559, "elapsed_s": 8057.386839389801}
85
+ {"step": 8200, "loss": 1.2957555055618286, "gnorm": 2.1979458332061768, "n_masked": 81, "elapsed_s": 8091.930820226669}
86
+ {"step": 8300, "loss": 1.303449034690857, "gnorm": 1.6465024948120117, "n_masked": 493, "elapsed_s": 8126.434752702713}
87
+ {"step": 8400, "loss": 1.33748197555542, "gnorm": 1.118224024772644, "n_masked": 447, "elapsed_s": 8166.403180360794}
88
+ {"step": 8500, "loss": 1.2344213724136353, "gnorm": 1.1177277565002441, "n_masked": 505, "elapsed_s": 8200.789453029633}
89
+ {"step": 8600, "loss": 1.364675760269165, "gnorm": 1.7634682655334473, "n_masked": 453, "elapsed_s": 8242.845850229263}
90
+ {"step": 8700, "loss": 1.314816951751709, "gnorm": 0.9205825328826904, "n_masked": 426, "elapsed_s": 8277.284264087677}
91
+ {"step": 8800, "loss": 1.3079884052276611, "gnorm": 1.7208943367004395, "n_masked": 172, "elapsed_s": 8316.910477161407}
92
+ {"step": 8900, "loss": 1.358060598373413, "gnorm": 0.4738282263278961, "n_masked": 744, "elapsed_s": 8351.464619159698}
93
+ {"step": 9000, "loss": 1.3536900281906128, "gnorm": 0.6483593583106995, "n_masked": 795, "elapsed_s": 8395.509850978851}
94
+ {"step": 9100, "loss": 1.3426790237426758, "gnorm": 1.3543518781661987, "n_masked": 752, "elapsed_s": 8430.082409381866}
95
+ {"step": 9200, "loss": 1.3535500764846802, "gnorm": 1.047558069229126, "n_masked": 730, "elapsed_s": 9005.293690919876}
96
+ {"step": 9300, "loss": 1.3006731271743774, "gnorm": 1.3308043479919434, "n_masked": 383, "elapsed_s": 9040.062557220459}
97
+ {"step": 9400, "loss": 1.3214889764785767, "gnorm": 0.6744742393493652, "n_masked": 594, "elapsed_s": 9074.66439151764}
98
+ {"step": 9500, "loss": 1.3339165449142456, "gnorm": 0.5255136489868164, "n_masked": 592, "elapsed_s": 9109.131958961487}
99
+ {"step": 9600, "loss": 1.3455543518066406, "gnorm": 0.6943612098693848, "n_masked": 589, "elapsed_s": 9143.796741485596}
100
+ {"step": 9700, "loss": 1.281071424484253, "gnorm": 1.5173112154006958, "n_masked": 238, "elapsed_s": 9178.304121255875}
101
+ {"step": 9800, "loss": 1.308857798576355, "gnorm": 0.7714717984199524, "n_masked": 411, "elapsed_s": 9212.72718667984}
102
+ {"step": 9900, "loss": 1.3369557857513428, "gnorm": 1.1596732139587402, "n_masked": 468, "elapsed_s": 9251.13983464241}
103
+ {"step": 10000, "loss": 1.3163377046585083, "gnorm": 1.0236461162567139, "n_masked": 337, "elapsed_s": 9285.44360089302}
104
+ {"step": 10000, "val_loss": 1.3175605835010733, "val_n_masked": 125023}
105
+ {"step": 10100, "loss": 1.303292155265808, "gnorm": 0.8322035670280457, "n_masked": 432, "elapsed_s": 10887.769313097}
106
+ {"step": 10200, "loss": 1.31556236743927, "gnorm": 0.8032592535018921, "n_masked": 535, "elapsed_s": 10922.20627617836}
107
+ {"step": 10300, "loss": 1.3352210521697998, "gnorm": 1.2651633024215698, "n_masked": 397, "elapsed_s": 10956.69873714447}
108
+ {"step": 10400, "loss": 1.3556784391403198, "gnorm": 2.424103021621704, "n_masked": 555, "elapsed_s": 10991.062612771988}
109
+ {"step": 10500, "loss": 1.3536518812179565, "gnorm": 1.0799503326416016, "n_masked": 630, "elapsed_s": 11025.531599521637}
110
+ {"step": 10600, "loss": 1.313134789466858, "gnorm": 1.9833507537841797, "n_masked": 75, "elapsed_s": 11059.957385540009}
111
+ {"step": 10700, "loss": 1.3582969903945923, "gnorm": 1.3722000122070312, "n_masked": 350, "elapsed_s": 11094.30869436264}
112
+ {"step": 10800, "loss": 1.3317660093307495, "gnorm": 1.4594670534133911, "n_masked": 276, "elapsed_s": 11128.7721824646}
113
+ {"step": 10900, "loss": 1.2800016403198242, "gnorm": 1.1688188314437866, "n_masked": 309, "elapsed_s": 11163.159432888031}
114
+ {"step": 11000, "loss": 1.3207478523254395, "gnorm": 2.377474784851074, "n_masked": 196, "elapsed_s": 11197.588871955872}
115
+ {"step": 11100, "loss": 1.3006802797317505, "gnorm": 1.7951750755310059, "n_masked": 482, "elapsed_s": 11231.889517068863}
116
+ {"step": 11200, "loss": 1.3052030801773071, "gnorm": 1.4263187646865845, "n_masked": 326, "elapsed_s": 11266.3450319767}
117
+ {"step": 11300, "loss": 1.3507637977600098, "gnorm": 1.3452253341674805, "n_masked": 208, "elapsed_s": 11300.672224521637}
118
+ {"step": 11400, "loss": 1.3616812229156494, "gnorm": 1.278725028038025, "n_masked": 645, "elapsed_s": 11335.173861503601}
119
+ {"step": 11500, "loss": 1.2996561527252197, "gnorm": 0.8185257315635681, "n_masked": 304, "elapsed_s": 11369.46400642395}
120
+ {"step": 11600, "loss": 1.3243699073791504, "gnorm": 1.1888484954833984, "n_masked": 191, "elapsed_s": 11403.900014400482}
121
+ {"step": 11700, "loss": 1.342268705368042, "gnorm": 0.83338463306427, "n_masked": 778, "elapsed_s": 11438.211163282394}
122
+ {"step": 11800, "loss": 1.398474931716919, "gnorm": 0.8138265609741211, "n_masked": 815, "elapsed_s": 11472.661185264587}
123
+ {"step": 11900, "loss": 1.36148202419281, "gnorm": 0.5005528926849365, "n_masked": 824, "elapsed_s": 11507.101724386215}
124
+ {"step": 12000, "loss": 1.3549628257751465, "gnorm": 0.9878760576248169, "n_masked": 705, "elapsed_s": 11541.639989614487}
125
+ {"step": 12100, "loss": 1.3247058391571045, "gnorm": 1.0711158514022827, "n_masked": 522, "elapsed_s": 11576.077571630478}
126
+ {"step": 12200, "loss": 1.2978767156600952, "gnorm": 0.8946782350540161, "n_masked": 531, "elapsed_s": 11610.629450321198}
127
+ {"step": 12300, "loss": 1.3673425912857056, "gnorm": 1.7084158658981323, "n_masked": 720, "elapsed_s": 11644.991916179657}
128
+ {"step": 12400, "loss": 1.3247982263565063, "gnorm": 1.0777453184127808, "n_masked": 705, "elapsed_s": 11679.497666835785}
129
+ {"step": 12500, "loss": 1.324648141860962, "gnorm": 1.8216280937194824, "n_masked": 420, "elapsed_s": 11713.833210229874}
130
+ {"step": 12500, "val_loss": 1.3172706337720204, "val_n_masked": 124920}
131
+ {"step": 12600, "loss": 1.261396050453186, "gnorm": 0.9052006006240845, "n_masked": 175, "elapsed_s": 13128.721130609512}
132
+ {"step": 12700, "loss": 1.3707122802734375, "gnorm": 0.928581178188324, "n_masked": 517, "elapsed_s": 13163.877133607864}
133
+ {"step": 12800, "loss": 1.1942298412322998, "gnorm": 2.4389796257019043, "n_masked": 182, "elapsed_s": 13200.246938705444}
134
+ {"step": 12900, "loss": 1.3564614057540894, "gnorm": 0.7749509811401367, "n_masked": 485, "elapsed_s": 13238.198527812958}
135
+ {"step": 13000, "loss": 1.3663102388381958, "gnorm": 0.46462246775627136, "n_masked": 714, "elapsed_s": 13275.264184713364}
136
+ {"step": 13100, "loss": 1.2876695394515991, "gnorm": 1.2255427837371826, "n_masked": 457, "elapsed_s": 13315.335847139359}
137
+ {"step": 13200, "loss": 1.3608660697937012, "gnorm": 0.9774927496910095, "n_masked": 620, "elapsed_s": 13352.325953722}
138
+ {"step": 13300, "loss": 1.3813458681106567, "gnorm": 0.9626798033714294, "n_masked": 982, "elapsed_s": 13390.623339414597}
139
+ {"step": 13400, "loss": 1.3549284934997559, "gnorm": 0.7238370776176453, "n_masked": 413, "elapsed_s": 13425.117178678513}
140
+ {"step": 13500, "loss": 1.3209725618362427, "gnorm": 1.1324065923690796, "n_masked": 472, "elapsed_s": 13462.241424798965}
141
+ {"step": 13600, "loss": 1.355392336845398, "gnorm": 0.9494814872741699, "n_masked": 677, "elapsed_s": 13497.88616490364}
142
+ {"step": 13700, "loss": 1.294376015663147, "gnorm": 0.7883215546607971, "n_masked": 357, "elapsed_s": 13533.17192864418}
143
+ {"step": 13800, "loss": 1.3678468465805054, "gnorm": 1.1102325916290283, "n_masked": 765, "elapsed_s": 13572.609410524368}
144
+ {"step": 13900, "loss": 1.3196802139282227, "gnorm": 1.7207669019699097, "n_masked": 617, "elapsed_s": 13607.686510324478}
145
+ {"step": 14000, "loss": 1.2885034084320068, "gnorm": 1.1581189632415771, "n_masked": 329, "elapsed_s": 13642.975101709366}
146
+ {"step": 14100, "loss": 1.3146302700042725, "gnorm": 1.0239681005477905, "n_masked": 369, "elapsed_s": 13680.854554891586}
147
+ {"step": 14200, "loss": 1.3155291080474854, "gnorm": 0.9025008082389832, "n_masked": 457, "elapsed_s": 13722.280074357986}
148
+ {"step": 14300, "loss": 1.3651032447814941, "gnorm": 0.4341070055961609, "n_masked": 819, "elapsed_s": 13760.518018722534}
149
+ {"step": 14400, "loss": 1.3032701015472412, "gnorm": 0.8793322443962097, "n_masked": 387, "elapsed_s": 13797.61318397522}
150
+ {"step": 14500, "loss": 1.3462364673614502, "gnorm": 1.0340861082077026, "n_masked": 428, "elapsed_s": 13834.203877925873}
151
+ {"step": 14600, "loss": 1.3618968725204468, "gnorm": 1.1742701530456543, "n_masked": 289, "elapsed_s": 13873.229164123535}
152
+ {"step": 14700, "loss": 1.3748137950897217, "gnorm": 2.2719850540161133, "n_masked": 109, "elapsed_s": 13910.42895436287}
153
+ {"step": 14800, "loss": 1.3491384983062744, "gnorm": 1.0725470781326294, "n_masked": 612, "elapsed_s": 13944.947699785233}
154
+ {"step": 14900, "loss": 1.3477791547775269, "gnorm": 1.0608705282211304, "n_masked": 556, "elapsed_s": 13980.100586414337}
155
+ {"step": 15000, "loss": 1.2624638080596924, "gnorm": 1.3070021867752075, "n_masked": 273, "elapsed_s": 14018.236857652664}
156
+ {"step": 15000, "val_loss": 1.3178521571827777, "val_n_masked": 125235}
157
+ {"step": 15100, "loss": 1.3706765174865723, "gnorm": 0.983051598072052, "n_masked": 475, "elapsed_s": 15157.238924503326}
158
+ {"step": 15200, "loss": 1.3378452062606812, "gnorm": 1.1300468444824219, "n_masked": 539, "elapsed_s": 15192.575368881226}
159
+ {"step": 15300, "loss": 1.3358066082000732, "gnorm": 0.34175485372543335, "n_masked": 528, "elapsed_s": 15227.81292104721}
160
+ {"step": 15400, "loss": 1.2989941835403442, "gnorm": 1.1062347888946533, "n_masked": 447, "elapsed_s": 15263.175237178802}
161
+ {"step": 15500, "loss": 1.3640450239181519, "gnorm": 0.9182344079017639, "n_masked": 507, "elapsed_s": 15298.357216358185}
162
+ {"step": 15600, "loss": 1.3447662591934204, "gnorm": 1.1113808155059814, "n_masked": 380, "elapsed_s": 15333.60363316536}
163
+ {"step": 15700, "loss": 1.3260891437530518, "gnorm": 1.0357147455215454, "n_masked": 392, "elapsed_s": 15368.683472156525}
164
+ {"step": 15800, "loss": 1.318356990814209, "gnorm": 1.1849439144134521, "n_masked": 522, "elapsed_s": 15403.954351186752}
165
+ {"step": 15900, "loss": 1.3354240655899048, "gnorm": 1.0070284605026245, "n_masked": 632, "elapsed_s": 15438.94680595398}
166
+ {"step": 16000, "loss": 1.331886887550354, "gnorm": 1.3007965087890625, "n_masked": 334, "elapsed_s": 15474.187850475311}
167
+ {"step": 16100, "loss": 1.3237905502319336, "gnorm": 0.7074573040008545, "n_masked": 499, "elapsed_s": 15509.346292734146}
168
+ {"step": 16200, "loss": 1.3376857042312622, "gnorm": 0.7520108819007874, "n_masked": 653, "elapsed_s": 15544.469090461731}
169
+ {"step": 16300, "loss": 1.3622488975524902, "gnorm": 0.9934261441230774, "n_masked": 715, "elapsed_s": 15579.527799606323}
170
+ {"step": 16400, "loss": 1.3513139486312866, "gnorm": 2.3581771850585938, "n_masked": 687, "elapsed_s": 15614.784990549088}
171
+ {"step": 16500, "loss": 1.3492164611816406, "gnorm": 1.0623959302902222, "n_masked": 449, "elapsed_s": 15649.918784856796}
172
+ {"step": 16600, "loss": 1.3410255908966064, "gnorm": 1.2534805536270142, "n_masked": 415, "elapsed_s": 15685.134821414948}
173
+ {"step": 16700, "loss": 1.2602723836898804, "gnorm": 0.8716364502906799, "n_masked": 482, "elapsed_s": 15720.22796344757}
174
+ {"step": 16800, "loss": 1.3004027605056763, "gnorm": 1.1613261699676514, "n_masked": 417, "elapsed_s": 15755.475514888763}
175
+ {"step": 16900, "loss": 1.301991581916809, "gnorm": 1.8906989097595215, "n_masked": 574, "elapsed_s": 15790.535251379013}
176
+ {"step": 17000, "loss": 1.268080234527588, "gnorm": 1.3923863172531128, "n_masked": 383, "elapsed_s": 15825.831309318542}
177
+ {"step": 17100, "loss": 1.3785266876220703, "gnorm": 0.9663311839103699, "n_masked": 692, "elapsed_s": 15861.084788560867}
178
+ {"step": 17200, "loss": 1.3446965217590332, "gnorm": 1.1932350397109985, "n_masked": 639, "elapsed_s": 15896.208616256714}
179
+ {"step": 17300, "loss": 1.3563239574432373, "gnorm": 0.633642315864563, "n_masked": 537, "elapsed_s": 15931.605852603912}
180
+ {"step": 17400, "loss": 1.2954353094100952, "gnorm": 1.2286243438720703, "n_masked": 369, "elapsed_s": 15966.453760147095}
181
+ {"step": 17500, "loss": 1.3070257902145386, "gnorm": 1.6581391096115112, "n_masked": 397, "elapsed_s": 16008.032244205475}
182
+ {"step": 17500, "val_loss": 1.317975806288158, "val_n_masked": 124790}
183
+ {"step": 17600, "loss": 1.3046313524246216, "gnorm": 1.0101138353347778, "n_masked": 421, "elapsed_s": 18299.859781742096}
184
+ {"step": 17700, "loss": 1.3305699825286865, "gnorm": 6.362544059753418, "n_masked": 680, "elapsed_s": 18335.95463490486}
185
+ {"step": 17800, "loss": 1.3400428295135498, "gnorm": 2.0564124584198, "n_masked": 444, "elapsed_s": 18374.883467674255}
186
+ {"step": 17900, "loss": 1.3165717124938965, "gnorm": 1.4678601026535034, "n_masked": 369, "elapsed_s": 18412.60939359665}
187
+ {"step": 18000, "loss": 1.3444222211837769, "gnorm": 0.8611866235733032, "n_masked": 570, "elapsed_s": 18452.248302459717}
188
+ {"step": 18100, "loss": 1.368144154548645, "gnorm": 1.1438348293304443, "n_masked": 859, "elapsed_s": 18489.850305318832}
189
+ {"step": 18200, "loss": 1.3379366397857666, "gnorm": 1.2427281141281128, "n_masked": 629, "elapsed_s": 18526.488139390945}
190
+ {"step": 18300, "loss": 1.312794804573059, "gnorm": 1.2535253763198853, "n_masked": 283, "elapsed_s": 18563.19682621956}
191
+ {"step": 18400, "loss": 1.371649146080017, "gnorm": 1.1278809309005737, "n_masked": 718, "elapsed_s": 18600.919459104538}
192
+ {"step": 18500, "loss": 1.323801875114441, "gnorm": 1.3691540956497192, "n_masked": 435, "elapsed_s": 18639.18771147728}
193
+ {"step": 18600, "loss": 1.2869627475738525, "gnorm": 1.6267634630203247, "n_masked": 380, "elapsed_s": 18677.941189289093}
194
+ {"step": 18700, "loss": 1.3398501873016357, "gnorm": 1.3098835945129395, "n_masked": 703, "elapsed_s": 18713.175693273544}
195
+ {"step": 18800, "loss": 1.3321360349655151, "gnorm": 2.207050323486328, "n_masked": 382, "elapsed_s": 18751.87560415268}
196
+ {"step": 18900, "loss": 1.3655954599380493, "gnorm": 1.3322885036468506, "n_masked": 636, "elapsed_s": 18790.39333629608}
197
+ {"step": 19000, "loss": 1.329351782798767, "gnorm": 1.548621416091919, "n_masked": 314, "elapsed_s": 18825.43915271759}
198
+ {"step": 19100, "loss": 1.3384290933609009, "gnorm": 0.946575939655304, "n_masked": 543, "elapsed_s": 18860.557779073715}
199
+ {"step": 19200, "loss": 1.365357756614685, "gnorm": 1.521238923072815, "n_masked": 338, "elapsed_s": 18895.62021303177}
200
+ {"step": 19300, "loss": 1.2415380477905273, "gnorm": 1.8824259042739868, "n_masked": 134, "elapsed_s": 18930.74623799324}
201
+ {"step": 19400, "loss": 1.3157609701156616, "gnorm": 1.6662753820419312, "n_masked": 253, "elapsed_s": 18965.738100767136}
202
+ {"step": 19500, "loss": 1.2869493961334229, "gnorm": 0.6655048727989197, "n_masked": 394, "elapsed_s": 19000.892485141754}
203
+ {"step": 19600, "loss": 1.3358848094940186, "gnorm": 0.5790027976036072, "n_masked": 438, "elapsed_s": 19035.901350021362}
204
+ {"step": 19700, "loss": 1.3372118473052979, "gnorm": 0.4846288561820984, "n_masked": 623, "elapsed_s": 19071.063711881638}
205
+ {"step": 19800, "loss": 1.3172661066055298, "gnorm": 1.6005849838256836, "n_masked": 431, "elapsed_s": 19106.01187491417}
206
+ {"step": 19900, "loss": 1.3341882228851318, "gnorm": 0.5404767394065857, "n_masked": 370, "elapsed_s": 19141.1215364933}
207
+ {"step": 20000, "loss": 1.3569655418395996, "gnorm": 1.1522177457809448, "n_masked": 852, "elapsed_s": 19176.139762163162}
208
+ {"step": 20000, "val_loss": 1.3174054437597253, "val_n_masked": 125076}
209
+ {"step": 20100, "loss": 1.3480662107467651, "gnorm": 1.8155994415283203, "n_masked": 262, "elapsed_s": 20258.31278538704}
210
+ {"step": 20200, "loss": 1.3646107912063599, "gnorm": 1.2573291063308716, "n_masked": 557, "elapsed_s": 20293.156005620956}
211
+ {"step": 20300, "loss": 1.3014793395996094, "gnorm": 1.5292905569076538, "n_masked": 172, "elapsed_s": 20327.895439386368}
212
+ {"step": 20400, "loss": 1.3104994297027588, "gnorm": 1.5043563842773438, "n_masked": 365, "elapsed_s": 20362.71544933319}
213
+ {"step": 20500, "loss": 1.35153329372406, "gnorm": 0.6225059032440186, "n_masked": 741, "elapsed_s": 20397.551874160767}
214
+ {"step": 20600, "loss": 1.3440214395523071, "gnorm": 1.1846950054168701, "n_masked": 663, "elapsed_s": 20432.435864925385}
215
+ {"step": 20700, "loss": 1.3057355880737305, "gnorm": 0.9459697604179382, "n_masked": 644, "elapsed_s": 20467.140258312225}
216
+ {"step": 20800, "loss": 1.276693344116211, "gnorm": 1.7985931634902954, "n_masked": 428, "elapsed_s": 20502.0226957798}
217
+ {"step": 20900, "loss": 1.2957643270492554, "gnorm": 0.9924417734146118, "n_masked": 240, "elapsed_s": 20536.817549943924}
218
+ {"step": 21000, "loss": 1.2905820608139038, "gnorm": 4.2079901695251465, "n_masked": 288, "elapsed_s": 20571.7108232975}
219
+ {"step": 21100, "loss": 1.2653223276138306, "gnorm": 1.5142489671707153, "n_masked": 137, "elapsed_s": 20606.406132221222}
220
+ {"step": 21200, "loss": 1.335182547569275, "gnorm": 1.7895948886871338, "n_masked": 460, "elapsed_s": 20641.223502635956}
221
+ {"step": 21300, "loss": 1.292695164680481, "gnorm": 2.0176427364349365, "n_masked": 222, "elapsed_s": 20675.646859645844}
222
+ {"step": 21400, "loss": 1.342378854751587, "gnorm": 0.8973787426948547, "n_masked": 602, "elapsed_s": 20710.542212963104}
223
+ {"step": 21500, "loss": 1.330300211906433, "gnorm": 1.085950255393982, "n_masked": 438, "elapsed_s": 20745.288593053818}
224
+ {"step": 21600, "loss": 1.3556325435638428, "gnorm": 1.8111934661865234, "n_masked": 729, "elapsed_s": 20780.173976898193}
225
+ {"step": 21700, "loss": 1.3649168014526367, "gnorm": 1.2774056196212769, "n_masked": 487, "elapsed_s": 20814.894906044006}
226
+ {"step": 21800, "loss": 1.3410533666610718, "gnorm": 0.4653695821762085, "n_masked": 518, "elapsed_s": 20849.74692082405}
227
+ {"step": 21900, "loss": 1.3733563423156738, "gnorm": 2.01662015914917, "n_masked": 506, "elapsed_s": 20884.474002599716}
228
+ {"step": 22000, "loss": 1.370507836341858, "gnorm": 1.2459616661071777, "n_masked": 780, "elapsed_s": 20919.354765176773}
229
+ {"step": 22100, "loss": 1.3767611980438232, "gnorm": 1.1439718008041382, "n_masked": 757, "elapsed_s": 20954.073551893234}
230
+ {"step": 22200, "loss": 1.3324636220932007, "gnorm": 1.2154438495635986, "n_masked": 522, "elapsed_s": 20988.88253211975}
231
+ {"step": 22300, "loss": 1.3345513343811035, "gnorm": 1.2610150575637817, "n_masked": 390, "elapsed_s": 21023.608725070953}
232
+ {"step": 22400, "loss": 1.3622584342956543, "gnorm": 1.5326122045516968, "n_masked": 646, "elapsed_s": 21058.453843832016}
233
+ {"step": 22500, "loss": 1.3211618661880493, "gnorm": 0.708969235420227, "n_masked": 563, "elapsed_s": 21093.133412837982}
234
+ {"step": 22500, "val_loss": 1.3178662469629019, "val_n_masked": 124695}