explcre commited on
Commit
585d536
·
verified ·
1 Parent(s): c9d2103

Upload runs/exp_t1_ntv3_mdlm_20260427_h100/log.jsonl with huggingface_hub

Browse files
runs/exp_t1_ntv3_mdlm_20260427_h100/log.jsonl ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"step": 1, "loss": 2.795363426208496, "elapsed_s": 3.2, "head": "mdlm"}
2
+ {"step": 50, "loss": 6.155711650848389, "elapsed_s": 18.2, "head": "mdlm"}
3
+ {"step": 100, "loss": 5.337583065032959, "elapsed_s": 32.9, "head": "mdlm"}
4
+ {"step": 150, "loss": 5.485507011413574, "elapsed_s": 48.4, "head": "mdlm"}
5
+ {"step": 200, "loss": 4.109313011169434, "elapsed_s": 63.8, "head": "mdlm"}
6
+ {"step": 250, "loss": 3.4321703910827637, "elapsed_s": 80.6, "head": "mdlm"}
7
+ {"step": 300, "loss": 10.578981399536133, "elapsed_s": 95.3, "head": "mdlm"}
8
+ {"step": 350, "loss": 5.543044090270996, "elapsed_s": 112.8, "head": "mdlm"}
9
+ {"step": 400, "loss": 5.879223346710205, "elapsed_s": 127.1, "head": "mdlm"}
10
+ {"step": 450, "loss": 4.282629013061523, "elapsed_s": 142.3, "head": "mdlm"}
11
+ {"step": 500, "loss": 6.337652683258057, "elapsed_s": 157.7, "head": "mdlm"}
12
+ {"kind": "eval", "step": 500, "val_loss": 4.16948944064549, "val_n": 7000.0}
13
+ {"step": 550, "loss": 1.428766131401062, "elapsed_s": 264.4, "head": "mdlm"}
14
+ {"step": 600, "loss": 4.539824485778809, "elapsed_s": 275.9, "head": "mdlm"}
15
+ {"step": 650, "loss": 2.3447225093841553, "elapsed_s": 290.9, "head": "mdlm"}
16
+ {"step": 700, "loss": 1.6541640758514404, "elapsed_s": 303.5, "head": "mdlm"}
17
+ {"step": 750, "loss": 9.466487884521484, "elapsed_s": 317.4, "head": "mdlm"}
18
+ {"step": 800, "loss": 2.264054536819458, "elapsed_s": 328.7, "head": "mdlm"}
19
+ {"step": 850, "loss": 1.8505727052688599, "elapsed_s": 343.2, "head": "mdlm"}
20
+ {"step": 900, "loss": 10.363512992858887, "elapsed_s": 356.6, "head": "mdlm"}
21
+ {"step": 950, "loss": 4.542239665985107, "elapsed_s": 368.5, "head": "mdlm"}
22
+ {"step": 1000, "loss": 2.2402074337005615, "elapsed_s": 382.1, "head": "mdlm"}
23
+ {"kind": "eval", "step": 1000, "val_loss": 4.163767718860082, "val_n": 7000.0}
24
+ {"step": 1050, "loss": 2.780627727508545, "elapsed_s": 472.1, "head": "mdlm"}
25
+ {"step": 1100, "loss": 4.17878532409668, "elapsed_s": 482.1, "head": "mdlm"}
26
+ {"step": 1150, "loss": 4.788943767547607, "elapsed_s": 491.6, "head": "mdlm"}
27
+ {"step": 1200, "loss": 12.322632789611816, "elapsed_s": 499.6, "head": "mdlm"}
28
+ {"step": 1250, "loss": 2.1500649452209473, "elapsed_s": 507.1, "head": "mdlm"}
29
+ {"step": 1300, "loss": 2.901766300201416, "elapsed_s": 514.6, "head": "mdlm"}
30
+ {"step": 1350, "loss": 9.05850887298584, "elapsed_s": 522.0, "head": "mdlm"}
31
+ {"step": 1400, "loss": 5.482202529907227, "elapsed_s": 529.5, "head": "mdlm"}
32
+ {"step": 1450, "loss": 2.095883369445801, "elapsed_s": 537.0, "head": "mdlm"}
33
+ {"step": 1500, "loss": 3.4607601165771484, "elapsed_s": 544.4, "head": "mdlm"}
34
+ {"kind": "eval", "step": 1500, "val_loss": 4.247235411371504, "val_n": 7000.0}
35
+ {"step": 1550, "loss": 12.51127815246582, "elapsed_s": 605.4, "head": "mdlm"}
36
+ {"step": 1600, "loss": 8.475220680236816, "elapsed_s": 614.4, "head": "mdlm"}
37
+ {"step": 1650, "loss": 4.5788726806640625, "elapsed_s": 621.2, "head": "mdlm"}
38
+ {"step": 1700, "loss": 3.450150489807129, "elapsed_s": 627.6, "head": "mdlm"}
39
+ {"step": 1750, "loss": 7.308990478515625, "elapsed_s": 635.0, "head": "mdlm"}
40
+ {"step": 1800, "loss": 4.090757369995117, "elapsed_s": 642.4, "head": "mdlm"}
41
+ {"step": 1850, "loss": 4.363306045532227, "elapsed_s": 649.8, "head": "mdlm"}
42
+ {"step": 1900, "loss": 2.2114052772521973, "elapsed_s": 657.3, "head": "mdlm"}
43
+ {"step": 1950, "loss": 5.5423808097839355, "elapsed_s": 665.7, "head": "mdlm"}
44
+ {"step": 2000, "loss": 5.018580913543701, "elapsed_s": 673.2, "head": "mdlm"}
45
+ {"kind": "eval", "step": 2000, "val_loss": 4.234882655824934, "val_n": 7000.0}
46
+ {"step": 2050, "loss": 3.0218253135681152, "elapsed_s": 733.9, "head": "mdlm"}
47
+ {"step": 2100, "loss": 6.479369163513184, "elapsed_s": 741.5, "head": "mdlm"}
48
+ {"step": 2150, "loss": 2.037721633911133, "elapsed_s": 748.9, "head": "mdlm"}
49
+ {"step": 2200, "loss": 10.217229843139648, "elapsed_s": 756.5, "head": "mdlm"}
50
+ {"step": 2250, "loss": 4.1390156745910645, "elapsed_s": 763.9, "head": "mdlm"}
51
+ {"step": 2300, "loss": 2.3931548595428467, "elapsed_s": 771.4, "head": "mdlm"}
52
+ {"step": 2350, "loss": 3.477555274963379, "elapsed_s": 778.8, "head": "mdlm"}
53
+ {"step": 2400, "loss": 2.3971736431121826, "elapsed_s": 786.3, "head": "mdlm"}
54
+ {"step": 2450, "loss": 2.1365838050842285, "elapsed_s": 793.7, "head": "mdlm"}
55
+ {"step": 2500, "loss": 2.593061923980713, "elapsed_s": 801.2, "head": "mdlm"}
56
+ {"kind": "eval", "step": 2500, "val_loss": 4.1269595286505565, "val_n": 7000.0}
57
+ {"step": 2550, "loss": 4.815208435058594, "elapsed_s": 861.9, "head": "mdlm"}
58
+ {"step": 2600, "loss": 2.8337831497192383, "elapsed_s": 869.4, "head": "mdlm"}
59
+ {"step": 2650, "loss": 1.891696810722351, "elapsed_s": 876.8, "head": "mdlm"}
60
+ {"step": 2700, "loss": 9.263204574584961, "elapsed_s": 884.3, "head": "mdlm"}
61
+ {"step": 2750, "loss": 3.549997091293335, "elapsed_s": 891.8, "head": "mdlm"}
62
+ {"step": 2800, "loss": 2.1958565711975098, "elapsed_s": 899.2, "head": "mdlm"}
63
+ {"step": 2850, "loss": 7.001121997833252, "elapsed_s": 907.6, "head": "mdlm"}
64
+ {"step": 2900, "loss": 6.0332183837890625, "elapsed_s": 915.0, "head": "mdlm"}
65
+ {"step": 2950, "loss": 6.4090704917907715, "elapsed_s": 922.5, "head": "mdlm"}
66
+ {"step": 3000, "loss": 2.442589044570923, "elapsed_s": 929.9, "head": "mdlm"}
67
+ {"kind": "eval", "step": 3000, "val_loss": 4.155337644849505, "val_n": 7000.0}
68
+ {"step": 3050, "loss": 5.707138538360596, "elapsed_s": 981.6, "head": "mdlm"}
69
+ {"step": 3100, "loss": 2.2340731620788574, "elapsed_s": 989.1, "head": "mdlm"}
70
+ {"step": 3150, "loss": 6.731550216674805, "elapsed_s": 996.6, "head": "mdlm"}
71
+ {"step": 3200, "loss": 3.2922911643981934, "elapsed_s": 1004.1, "head": "mdlm"}
72
+ {"step": 3250, "loss": 6.406817436218262, "elapsed_s": 1011.6, "head": "mdlm"}
73
+ {"step": 3300, "loss": 1.6499327421188354, "elapsed_s": 1019.1, "head": "mdlm"}
74
+ {"step": 3350, "loss": 2.9262022972106934, "elapsed_s": 1029.0, "head": "mdlm"}
75
+ {"step": 3400, "loss": 2.0414791107177734, "elapsed_s": 1036.5, "head": "mdlm"}
76
+ {"step": 3450, "loss": 1.9802309274673462, "elapsed_s": 1044.9, "head": "mdlm"}
77
+ {"step": 3500, "loss": 2.3244688510894775, "elapsed_s": 1052.4, "head": "mdlm"}
78
+ {"kind": "eval", "step": 3500, "val_loss": 4.181723970958164, "val_n": 7000.0}
79
+ {"step": 3550, "loss": 4.080251693725586, "elapsed_s": 1113.0, "head": "mdlm"}
80
+ {"step": 3600, "loss": 9.369963645935059, "elapsed_s": 1120.5, "head": "mdlm"}
81
+ {"step": 3650, "loss": 1.6831507682800293, "elapsed_s": 1127.9, "head": "mdlm"}
82
+ {"step": 3700, "loss": 2.6897711753845215, "elapsed_s": 1135.4, "head": "mdlm"}
83
+ {"step": 3750, "loss": 2.285163164138794, "elapsed_s": 1142.8, "head": "mdlm"}
84
+ {"step": 3800, "loss": 9.629528045654297, "elapsed_s": 1150.3, "head": "mdlm"}
85
+ {"step": 3850, "loss": 2.4255635738372803, "elapsed_s": 1157.7, "head": "mdlm"}
86
+ {"step": 3900, "loss": 7.61187219619751, "elapsed_s": 1165.3, "head": "mdlm"}
87
+ {"step": 3950, "loss": 14.984149932861328, "elapsed_s": 1172.8, "head": "mdlm"}
88
+ {"step": 4000, "loss": 2.252829074859619, "elapsed_s": 1180.2, "head": "mdlm"}
89
+ {"kind": "eval", "step": 4000, "val_loss": 4.153912672315325, "val_n": 7000.0}
90
+ {"step": 4050, "loss": 2.7907609939575195, "elapsed_s": 1241.1, "head": "mdlm"}
91
+ {"step": 4100, "loss": 1.9941437244415283, "elapsed_s": 1248.7, "head": "mdlm"}
92
+ {"step": 4150, "loss": 11.366575241088867, "elapsed_s": 1257.0, "head": "mdlm"}
93
+ {"step": 4200, "loss": 3.3751235008239746, "elapsed_s": 1263.9, "head": "mdlm"}
94
+ {"step": 4250, "loss": 1.9477345943450928, "elapsed_s": 1270.3, "head": "mdlm"}
95
+ {"step": 4300, "loss": 13.381315231323242, "elapsed_s": 1277.8, "head": "mdlm"}
96
+ {"step": 4350, "loss": 4.84561824798584, "elapsed_s": 1285.2, "head": "mdlm"}
97
+ {"step": 4400, "loss": 2.055889129638672, "elapsed_s": 1292.7, "head": "mdlm"}
98
+ {"step": 4450, "loss": 2.376004695892334, "elapsed_s": 1300.2, "head": "mdlm"}
99
+ {"step": 4500, "loss": 6.021737575531006, "elapsed_s": 1307.7, "head": "mdlm"}
100
+ {"kind": "eval", "step": 4500, "val_loss": 4.228843774250575, "val_n": 7000.0}
101
+ {"step": 4550, "loss": 3.3566575050354004, "elapsed_s": 1369.8, "head": "mdlm"}
102
+ {"step": 4600, "loss": 4.956597328186035, "elapsed_s": 1377.3, "head": "mdlm"}
103
+ {"step": 4650, "loss": 4.08725118637085, "elapsed_s": 1384.8, "head": "mdlm"}
104
+ {"step": 4700, "loss": 1.7294867038726807, "elapsed_s": 1392.3, "head": "mdlm"}
105
+ {"step": 4750, "loss": 2.0501623153686523, "elapsed_s": 1400.8, "head": "mdlm"}
106
+ {"step": 4800, "loss": 1.7614240646362305, "elapsed_s": 1408.4, "head": "mdlm"}
107
+ {"step": 4850, "loss": 1.839538335800171, "elapsed_s": 1417.6, "head": "mdlm"}
108
+ {"step": 4900, "loss": 3.0949692726135254, "elapsed_s": 1425.3, "head": "mdlm"}
109
+ {"step": 4950, "loss": 3.7838327884674072, "elapsed_s": 1432.8, "head": "mdlm"}
110
+ {"step": 5000, "loss": 2.21063232421875, "elapsed_s": 1440.2, "head": "mdlm"}
111
+ {"kind": "eval", "step": 5000, "val_loss": 4.159483058248248, "val_n": 7000.0}