Upload runs/exp_t1_ntv3_mdlm_20260427_h100/log.jsonl with huggingface_hub
Browse files
runs/exp_t1_ntv3_mdlm_20260427_h100/log.jsonl
ADDED
|
@@ -0,0 +1,111 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"step": 1, "loss": 2.795363426208496, "elapsed_s": 3.2, "head": "mdlm"}
|
| 2 |
+
{"step": 50, "loss": 6.155711650848389, "elapsed_s": 18.2, "head": "mdlm"}
|
| 3 |
+
{"step": 100, "loss": 5.337583065032959, "elapsed_s": 32.9, "head": "mdlm"}
|
| 4 |
+
{"step": 150, "loss": 5.485507011413574, "elapsed_s": 48.4, "head": "mdlm"}
|
| 5 |
+
{"step": 200, "loss": 4.109313011169434, "elapsed_s": 63.8, "head": "mdlm"}
|
| 6 |
+
{"step": 250, "loss": 3.4321703910827637, "elapsed_s": 80.6, "head": "mdlm"}
|
| 7 |
+
{"step": 300, "loss": 10.578981399536133, "elapsed_s": 95.3, "head": "mdlm"}
|
| 8 |
+
{"step": 350, "loss": 5.543044090270996, "elapsed_s": 112.8, "head": "mdlm"}
|
| 9 |
+
{"step": 400, "loss": 5.879223346710205, "elapsed_s": 127.1, "head": "mdlm"}
|
| 10 |
+
{"step": 450, "loss": 4.282629013061523, "elapsed_s": 142.3, "head": "mdlm"}
|
| 11 |
+
{"step": 500, "loss": 6.337652683258057, "elapsed_s": 157.7, "head": "mdlm"}
|
| 12 |
+
{"kind": "eval", "step": 500, "val_loss": 4.16948944064549, "val_n": 7000.0}
|
| 13 |
+
{"step": 550, "loss": 1.428766131401062, "elapsed_s": 264.4, "head": "mdlm"}
|
| 14 |
+
{"step": 600, "loss": 4.539824485778809, "elapsed_s": 275.9, "head": "mdlm"}
|
| 15 |
+
{"step": 650, "loss": 2.3447225093841553, "elapsed_s": 290.9, "head": "mdlm"}
|
| 16 |
+
{"step": 700, "loss": 1.6541640758514404, "elapsed_s": 303.5, "head": "mdlm"}
|
| 17 |
+
{"step": 750, "loss": 9.466487884521484, "elapsed_s": 317.4, "head": "mdlm"}
|
| 18 |
+
{"step": 800, "loss": 2.264054536819458, "elapsed_s": 328.7, "head": "mdlm"}
|
| 19 |
+
{"step": 850, "loss": 1.8505727052688599, "elapsed_s": 343.2, "head": "mdlm"}
|
| 20 |
+
{"step": 900, "loss": 10.363512992858887, "elapsed_s": 356.6, "head": "mdlm"}
|
| 21 |
+
{"step": 950, "loss": 4.542239665985107, "elapsed_s": 368.5, "head": "mdlm"}
|
| 22 |
+
{"step": 1000, "loss": 2.2402074337005615, "elapsed_s": 382.1, "head": "mdlm"}
|
| 23 |
+
{"kind": "eval", "step": 1000, "val_loss": 4.163767718860082, "val_n": 7000.0}
|
| 24 |
+
{"step": 1050, "loss": 2.780627727508545, "elapsed_s": 472.1, "head": "mdlm"}
|
| 25 |
+
{"step": 1100, "loss": 4.17878532409668, "elapsed_s": 482.1, "head": "mdlm"}
|
| 26 |
+
{"step": 1150, "loss": 4.788943767547607, "elapsed_s": 491.6, "head": "mdlm"}
|
| 27 |
+
{"step": 1200, "loss": 12.322632789611816, "elapsed_s": 499.6, "head": "mdlm"}
|
| 28 |
+
{"step": 1250, "loss": 2.1500649452209473, "elapsed_s": 507.1, "head": "mdlm"}
|
| 29 |
+
{"step": 1300, "loss": 2.901766300201416, "elapsed_s": 514.6, "head": "mdlm"}
|
| 30 |
+
{"step": 1350, "loss": 9.05850887298584, "elapsed_s": 522.0, "head": "mdlm"}
|
| 31 |
+
{"step": 1400, "loss": 5.482202529907227, "elapsed_s": 529.5, "head": "mdlm"}
|
| 32 |
+
{"step": 1450, "loss": 2.095883369445801, "elapsed_s": 537.0, "head": "mdlm"}
|
| 33 |
+
{"step": 1500, "loss": 3.4607601165771484, "elapsed_s": 544.4, "head": "mdlm"}
|
| 34 |
+
{"kind": "eval", "step": 1500, "val_loss": 4.247235411371504, "val_n": 7000.0}
|
| 35 |
+
{"step": 1550, "loss": 12.51127815246582, "elapsed_s": 605.4, "head": "mdlm"}
|
| 36 |
+
{"step": 1600, "loss": 8.475220680236816, "elapsed_s": 614.4, "head": "mdlm"}
|
| 37 |
+
{"step": 1650, "loss": 4.5788726806640625, "elapsed_s": 621.2, "head": "mdlm"}
|
| 38 |
+
{"step": 1700, "loss": 3.450150489807129, "elapsed_s": 627.6, "head": "mdlm"}
|
| 39 |
+
{"step": 1750, "loss": 7.308990478515625, "elapsed_s": 635.0, "head": "mdlm"}
|
| 40 |
+
{"step": 1800, "loss": 4.090757369995117, "elapsed_s": 642.4, "head": "mdlm"}
|
| 41 |
+
{"step": 1850, "loss": 4.363306045532227, "elapsed_s": 649.8, "head": "mdlm"}
|
| 42 |
+
{"step": 1900, "loss": 2.2114052772521973, "elapsed_s": 657.3, "head": "mdlm"}
|
| 43 |
+
{"step": 1950, "loss": 5.5423808097839355, "elapsed_s": 665.7, "head": "mdlm"}
|
| 44 |
+
{"step": 2000, "loss": 5.018580913543701, "elapsed_s": 673.2, "head": "mdlm"}
|
| 45 |
+
{"kind": "eval", "step": 2000, "val_loss": 4.234882655824934, "val_n": 7000.0}
|
| 46 |
+
{"step": 2050, "loss": 3.0218253135681152, "elapsed_s": 733.9, "head": "mdlm"}
|
| 47 |
+
{"step": 2100, "loss": 6.479369163513184, "elapsed_s": 741.5, "head": "mdlm"}
|
| 48 |
+
{"step": 2150, "loss": 2.037721633911133, "elapsed_s": 748.9, "head": "mdlm"}
|
| 49 |
+
{"step": 2200, "loss": 10.217229843139648, "elapsed_s": 756.5, "head": "mdlm"}
|
| 50 |
+
{"step": 2250, "loss": 4.1390156745910645, "elapsed_s": 763.9, "head": "mdlm"}
|
| 51 |
+
{"step": 2300, "loss": 2.3931548595428467, "elapsed_s": 771.4, "head": "mdlm"}
|
| 52 |
+
{"step": 2350, "loss": 3.477555274963379, "elapsed_s": 778.8, "head": "mdlm"}
|
| 53 |
+
{"step": 2400, "loss": 2.3971736431121826, "elapsed_s": 786.3, "head": "mdlm"}
|
| 54 |
+
{"step": 2450, "loss": 2.1365838050842285, "elapsed_s": 793.7, "head": "mdlm"}
|
| 55 |
+
{"step": 2500, "loss": 2.593061923980713, "elapsed_s": 801.2, "head": "mdlm"}
|
| 56 |
+
{"kind": "eval", "step": 2500, "val_loss": 4.1269595286505565, "val_n": 7000.0}
|
| 57 |
+
{"step": 2550, "loss": 4.815208435058594, "elapsed_s": 861.9, "head": "mdlm"}
|
| 58 |
+
{"step": 2600, "loss": 2.8337831497192383, "elapsed_s": 869.4, "head": "mdlm"}
|
| 59 |
+
{"step": 2650, "loss": 1.891696810722351, "elapsed_s": 876.8, "head": "mdlm"}
|
| 60 |
+
{"step": 2700, "loss": 9.263204574584961, "elapsed_s": 884.3, "head": "mdlm"}
|
| 61 |
+
{"step": 2750, "loss": 3.549997091293335, "elapsed_s": 891.8, "head": "mdlm"}
|
| 62 |
+
{"step": 2800, "loss": 2.1958565711975098, "elapsed_s": 899.2, "head": "mdlm"}
|
| 63 |
+
{"step": 2850, "loss": 7.001121997833252, "elapsed_s": 907.6, "head": "mdlm"}
|
| 64 |
+
{"step": 2900, "loss": 6.0332183837890625, "elapsed_s": 915.0, "head": "mdlm"}
|
| 65 |
+
{"step": 2950, "loss": 6.4090704917907715, "elapsed_s": 922.5, "head": "mdlm"}
|
| 66 |
+
{"step": 3000, "loss": 2.442589044570923, "elapsed_s": 929.9, "head": "mdlm"}
|
| 67 |
+
{"kind": "eval", "step": 3000, "val_loss": 4.155337644849505, "val_n": 7000.0}
|
| 68 |
+
{"step": 3050, "loss": 5.707138538360596, "elapsed_s": 981.6, "head": "mdlm"}
|
| 69 |
+
{"step": 3100, "loss": 2.2340731620788574, "elapsed_s": 989.1, "head": "mdlm"}
|
| 70 |
+
{"step": 3150, "loss": 6.731550216674805, "elapsed_s": 996.6, "head": "mdlm"}
|
| 71 |
+
{"step": 3200, "loss": 3.2922911643981934, "elapsed_s": 1004.1, "head": "mdlm"}
|
| 72 |
+
{"step": 3250, "loss": 6.406817436218262, "elapsed_s": 1011.6, "head": "mdlm"}
|
| 73 |
+
{"step": 3300, "loss": 1.6499327421188354, "elapsed_s": 1019.1, "head": "mdlm"}
|
| 74 |
+
{"step": 3350, "loss": 2.9262022972106934, "elapsed_s": 1029.0, "head": "mdlm"}
|
| 75 |
+
{"step": 3400, "loss": 2.0414791107177734, "elapsed_s": 1036.5, "head": "mdlm"}
|
| 76 |
+
{"step": 3450, "loss": 1.9802309274673462, "elapsed_s": 1044.9, "head": "mdlm"}
|
| 77 |
+
{"step": 3500, "loss": 2.3244688510894775, "elapsed_s": 1052.4, "head": "mdlm"}
|
| 78 |
+
{"kind": "eval", "step": 3500, "val_loss": 4.181723970958164, "val_n": 7000.0}
|
| 79 |
+
{"step": 3550, "loss": 4.080251693725586, "elapsed_s": 1113.0, "head": "mdlm"}
|
| 80 |
+
{"step": 3600, "loss": 9.369963645935059, "elapsed_s": 1120.5, "head": "mdlm"}
|
| 81 |
+
{"step": 3650, "loss": 1.6831507682800293, "elapsed_s": 1127.9, "head": "mdlm"}
|
| 82 |
+
{"step": 3700, "loss": 2.6897711753845215, "elapsed_s": 1135.4, "head": "mdlm"}
|
| 83 |
+
{"step": 3750, "loss": 2.285163164138794, "elapsed_s": 1142.8, "head": "mdlm"}
|
| 84 |
+
{"step": 3800, "loss": 9.629528045654297, "elapsed_s": 1150.3, "head": "mdlm"}
|
| 85 |
+
{"step": 3850, "loss": 2.4255635738372803, "elapsed_s": 1157.7, "head": "mdlm"}
|
| 86 |
+
{"step": 3900, "loss": 7.61187219619751, "elapsed_s": 1165.3, "head": "mdlm"}
|
| 87 |
+
{"step": 3950, "loss": 14.984149932861328, "elapsed_s": 1172.8, "head": "mdlm"}
|
| 88 |
+
{"step": 4000, "loss": 2.252829074859619, "elapsed_s": 1180.2, "head": "mdlm"}
|
| 89 |
+
{"kind": "eval", "step": 4000, "val_loss": 4.153912672315325, "val_n": 7000.0}
|
| 90 |
+
{"step": 4050, "loss": 2.7907609939575195, "elapsed_s": 1241.1, "head": "mdlm"}
|
| 91 |
+
{"step": 4100, "loss": 1.9941437244415283, "elapsed_s": 1248.7, "head": "mdlm"}
|
| 92 |
+
{"step": 4150, "loss": 11.366575241088867, "elapsed_s": 1257.0, "head": "mdlm"}
|
| 93 |
+
{"step": 4200, "loss": 3.3751235008239746, "elapsed_s": 1263.9, "head": "mdlm"}
|
| 94 |
+
{"step": 4250, "loss": 1.9477345943450928, "elapsed_s": 1270.3, "head": "mdlm"}
|
| 95 |
+
{"step": 4300, "loss": 13.381315231323242, "elapsed_s": 1277.8, "head": "mdlm"}
|
| 96 |
+
{"step": 4350, "loss": 4.84561824798584, "elapsed_s": 1285.2, "head": "mdlm"}
|
| 97 |
+
{"step": 4400, "loss": 2.055889129638672, "elapsed_s": 1292.7, "head": "mdlm"}
|
| 98 |
+
{"step": 4450, "loss": 2.376004695892334, "elapsed_s": 1300.2, "head": "mdlm"}
|
| 99 |
+
{"step": 4500, "loss": 6.021737575531006, "elapsed_s": 1307.7, "head": "mdlm"}
|
| 100 |
+
{"kind": "eval", "step": 4500, "val_loss": 4.228843774250575, "val_n": 7000.0}
|
| 101 |
+
{"step": 4550, "loss": 3.3566575050354004, "elapsed_s": 1369.8, "head": "mdlm"}
|
| 102 |
+
{"step": 4600, "loss": 4.956597328186035, "elapsed_s": 1377.3, "head": "mdlm"}
|
| 103 |
+
{"step": 4650, "loss": 4.08725118637085, "elapsed_s": 1384.8, "head": "mdlm"}
|
| 104 |
+
{"step": 4700, "loss": 1.7294867038726807, "elapsed_s": 1392.3, "head": "mdlm"}
|
| 105 |
+
{"step": 4750, "loss": 2.0501623153686523, "elapsed_s": 1400.8, "head": "mdlm"}
|
| 106 |
+
{"step": 4800, "loss": 1.7614240646362305, "elapsed_s": 1408.4, "head": "mdlm"}
|
| 107 |
+
{"step": 4850, "loss": 1.839538335800171, "elapsed_s": 1417.6, "head": "mdlm"}
|
| 108 |
+
{"step": 4900, "loss": 3.0949692726135254, "elapsed_s": 1425.3, "head": "mdlm"}
|
| 109 |
+
{"step": 4950, "loss": 3.7838327884674072, "elapsed_s": 1432.8, "head": "mdlm"}
|
| 110 |
+
{"step": 5000, "loss": 2.21063232421875, "elapsed_s": 1440.2, "head": "mdlm"}
|
| 111 |
+
{"kind": "eval", "step": 5000, "val_loss": 4.159483058248248, "val_n": 7000.0}
|