explcre commited on
Commit
cba748f
·
verified ·
1 Parent(s): 49eaf11

Upload exp_t3_ntv3_650m_500bp_warmstart_20260506_021915/log.jsonl with huggingface_hub

Browse files
exp_t3_ntv3_650m_500bp_warmstart_20260506_021915/log.jsonl ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"step": 100, "loss": 1.3820974826812744, "gnorm": 1.2220863103866577, "n_masked": 982, "elapsed_s": 35.8488667011261}
2
+ {"step": 200, "loss": 1.3376681804656982, "gnorm": 1.1553181409835815, "n_masked": 394, "elapsed_s": 73.71185111999512}
3
+ {"step": 300, "loss": 1.3502181768417358, "gnorm": 1.2795896530151367, "n_masked": 564, "elapsed_s": 113.36348938941956}
4
+ {"step": 400, "loss": 1.308661699295044, "gnorm": 1.5509334802627563, "n_masked": 433, "elapsed_s": 151.99183320999146}
5
+ {"step": 500, "loss": 1.3670213222503662, "gnorm": 0.8056510090827942, "n_masked": 792, "elapsed_s": 194.890300989151}
6
+ {"step": 600, "loss": 1.367557406425476, "gnorm": 0.790529727935791, "n_masked": 392, "elapsed_s": 229.25425910949707}
7
+ {"step": 700, "loss": 1.2700945138931274, "gnorm": 1.8437680006027222, "n_masked": 173, "elapsed_s": 270.9777626991272}
8
+ {"step": 800, "loss": 1.347970962524414, "gnorm": 0.6144813895225525, "n_masked": 669, "elapsed_s": 305.54270577430725}
9
+ {"step": 900, "loss": 1.338914394378662, "gnorm": 0.7597417235374451, "n_masked": 687, "elapsed_s": 345.6030259132385}
10
+ {"step": 1000, "loss": 1.3203113079071045, "gnorm": 2.7369766235351562, "n_masked": 68, "elapsed_s": 380.1106147766113}
11
+ {"step": 1100, "loss": 1.3620147705078125, "gnorm": 0.7279233336448669, "n_masked": 639, "elapsed_s": 419.3245892524719}
12
+ {"step": 1200, "loss": 1.2678099870681763, "gnorm": 1.430052399635315, "n_masked": 267, "elapsed_s": 453.9119760990143}
13
+ {"step": 1300, "loss": 1.336962342262268, "gnorm": 0.7151319980621338, "n_masked": 483, "elapsed_s": 489.19561648368835}
14
+ {"step": 1400, "loss": 1.3428276777267456, "gnorm": 0.8378380537033081, "n_masked": 742, "elapsed_s": 523.9423537254333}
15
+ {"step": 1500, "loss": 1.3013797998428345, "gnorm": 0.5605701208114624, "n_masked": 428, "elapsed_s": 562.5866923332214}
16
+ {"step": 1600, "loss": 1.339953064918518, "gnorm": 1.6425215005874634, "n_masked": 661, "elapsed_s": 597.7421913146973}
17
+ {"step": 1700, "loss": 1.3473633527755737, "gnorm": 0.7184603810310364, "n_masked": 749, "elapsed_s": 635.0946760177612}
18
+ {"step": 1800, "loss": 1.2970633506774902, "gnorm": 1.4742635488510132, "n_masked": 256, "elapsed_s": 669.6070575714111}
19
+ {"step": 1900, "loss": 1.3189932107925415, "gnorm": 1.670884609222412, "n_masked": 341, "elapsed_s": 705.5491681098938}
20
+ {"step": 2000, "loss": 1.3548164367675781, "gnorm": 1.0109387636184692, "n_masked": 832, "elapsed_s": 740.0638422966003}
21
+ {"step": 2100, "loss": 1.30782949924469, "gnorm": 1.1383695602416992, "n_masked": 318, "elapsed_s": 778.1826136112213}
22
+ {"step": 2200, "loss": 1.3349287509918213, "gnorm": 0.7577791810035706, "n_masked": 558, "elapsed_s": 812.6900272369385}
23
+ {"step": 2300, "loss": 1.3570549488067627, "gnorm": 1.219936728477478, "n_masked": 447, "elapsed_s": 849.7111098766327}
24
+ {"step": 2400, "loss": 1.3159525394439697, "gnorm": 1.7732819318771362, "n_masked": 455, "elapsed_s": 884.2683169841766}
25
+ {"step": 2500, "loss": 1.3812034130096436, "gnorm": 0.42420706152915955, "n_masked": 912, "elapsed_s": 919.0479044914246}
26
+ {"step": 2500, "val_loss": 1.3207706567048063, "val_n_masked": 124933}
27
+ {"step": 2600, "loss": 1.3739628791809082, "gnorm": 0.7822402119636536, "n_masked": 637, "elapsed_s": 3294.7245783805847}
28
+ {"step": 2700, "loss": 1.3739837408065796, "gnorm": 0.9288198351860046, "n_masked": 476, "elapsed_s": 3333.049099445343}
29
+ {"step": 2800, "loss": 1.3490173816680908, "gnorm": 1.3891854286193848, "n_masked": 547, "elapsed_s": 3371.8371608257294}
30
+ {"step": 2900, "loss": 1.3424196243286133, "gnorm": 0.6380420327186584, "n_masked": 630, "elapsed_s": 3407.559161901474}
31
+ {"step": 3000, "loss": 1.2758604288101196, "gnorm": 2.0673468112945557, "n_masked": 318, "elapsed_s": 3442.6126568317413}
32
+ {"step": 3100, "loss": 1.3380697965621948, "gnorm": 2.063582420349121, "n_masked": 542, "elapsed_s": 3477.6725816726685}
33
+ {"step": 3200, "loss": 1.350239872932434, "gnorm": 2.6721367835998535, "n_masked": 519, "elapsed_s": 3512.596180677414}
34
+ {"step": 3300, "loss": 1.332556128501892, "gnorm": 1.0409064292907715, "n_masked": 597, "elapsed_s": 3547.590988636017}
35
+ {"step": 3400, "loss": 1.3462306261062622, "gnorm": 0.7332546710968018, "n_masked": 828, "elapsed_s": 3582.4444563388824}
36
+ {"step": 3500, "loss": 1.3413658142089844, "gnorm": 1.3429486751556396, "n_masked": 544, "elapsed_s": 3617.543942451477}
37
+ {"step": 3600, "loss": 1.2765685319900513, "gnorm": 1.5630594491958618, "n_masked": 244, "elapsed_s": 3652.396627664566}
38
+ {"step": 3700, "loss": 1.3687925338745117, "gnorm": 1.7061326503753662, "n_masked": 472, "elapsed_s": 3687.381470680237}
39
+ {"step": 3800, "loss": 1.3275479078292847, "gnorm": 1.4117976427078247, "n_masked": 487, "elapsed_s": 3722.2289412021637}
40
+ {"step": 3900, "loss": 1.315588116645813, "gnorm": 0.9783756732940674, "n_masked": 510, "elapsed_s": 3757.3020837306976}
41
+ {"step": 4000, "loss": 1.347726821899414, "gnorm": 1.2811682224273682, "n_masked": 450, "elapsed_s": 3792.0791490077972}
42
+ {"step": 4100, "loss": 1.30977463722229, "gnorm": 1.2951042652130127, "n_masked": 253, "elapsed_s": 3827.0903260707855}
43
+ {"step": 4200, "loss": 1.319828748703003, "gnorm": 1.4913302659988403, "n_masked": 152, "elapsed_s": 3861.893535375595}
44
+ {"step": 4300, "loss": 1.3338496685028076, "gnorm": 1.208323359489441, "n_masked": 530, "elapsed_s": 3896.823921918869}
45
+ {"step": 4400, "loss": 1.2995072603225708, "gnorm": 1.0674986839294434, "n_masked": 310, "elapsed_s": 3931.647414445877}
46
+ {"step": 4500, "loss": 1.3596218824386597, "gnorm": 1.0955146551132202, "n_masked": 805, "elapsed_s": 3966.6186838150024}
47
+ {"step": 4600, "loss": 1.3028804063796997, "gnorm": 0.6485550999641418, "n_masked": 484, "elapsed_s": 4001.462837457657}
48
+ {"step": 4700, "loss": 1.2857258319854736, "gnorm": 0.8690285682678223, "n_masked": 245, "elapsed_s": 4036.550687789917}
49
+ {"step": 4800, "loss": 1.3559362888336182, "gnorm": 1.0165315866470337, "n_masked": 716, "elapsed_s": 4071.3766589164734}
50
+ {"step": 4900, "loss": 1.3308452367782593, "gnorm": 1.2648040056228638, "n_masked": 294, "elapsed_s": 4106.318940639496}
51
+ {"step": 5000, "loss": 1.2766859531402588, "gnorm": 0.915342390537262, "n_masked": 237, "elapsed_s": 4141.131684780121}
52
+ {"step": 5000, "val_loss": 1.31967852658323, "val_n_masked": 125120}
53
+ {"step": 5100, "loss": 1.3131557703018188, "gnorm": 0.7355113625526428, "n_masked": 419, "elapsed_s": 7071.821239709854}
54
+ {"step": 5200, "loss": 1.3408268690109253, "gnorm": 1.096132516860962, "n_masked": 541, "elapsed_s": 7106.810987472534}
55
+ {"step": 5300, "loss": 1.307053804397583, "gnorm": 0.5693714022636414, "n_masked": 442, "elapsed_s": 7141.5278515815735}
56
+ {"step": 5400, "loss": 1.29517662525177, "gnorm": 1.3376681804656982, "n_masked": 179, "elapsed_s": 7176.441524267197}
57
+ {"step": 5500, "loss": 1.3639081716537476, "gnorm": 0.9462569952011108, "n_masked": 448, "elapsed_s": 7211.151086330414}
58
+ {"step": 5600, "loss": 1.2913233041763306, "gnorm": 0.8113807439804077, "n_masked": 373, "elapsed_s": 7246.005970478058}
59
+ {"step": 5700, "loss": 1.3261120319366455, "gnorm": 1.5461076498031616, "n_masked": 395, "elapsed_s": 7280.744534254074}
60
+ {"step": 5800, "loss": 1.3296393156051636, "gnorm": 0.9701949954032898, "n_masked": 693, "elapsed_s": 7315.613130807877}
61
+ {"step": 5900, "loss": 1.320681095123291, "gnorm": 1.6884491443634033, "n_masked": 353, "elapsed_s": 7350.318828821182}
62
+ {"step": 6000, "loss": 1.2966911792755127, "gnorm": 1.3964776992797852, "n_masked": 341, "elapsed_s": 7385.207570552826}
63
+ {"step": 6100, "loss": 1.2978414297103882, "gnorm": 2.4335546493530273, "n_masked": 168, "elapsed_s": 7419.989809036255}
64
+ {"step": 6200, "loss": 1.3053107261657715, "gnorm": 0.8775435090065002, "n_masked": 540, "elapsed_s": 7454.475150823593}
65
+ {"step": 6300, "loss": 1.3426384925842285, "gnorm": 1.2731138467788696, "n_masked": 672, "elapsed_s": 7488.987844705582}
66
+ {"step": 6400, "loss": 1.3264063596725464, "gnorm": 2.030742883682251, "n_masked": 143, "elapsed_s": 7523.395847797394}