diff --git a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/checkpoint/model-100000.pth b/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/checkpoint/model-100000.pth deleted file mode 100644 index 671e6265dfc1d093cb811a677d1424dadb5093f6..0000000000000000000000000000000000000000 --- a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/checkpoint/model-100000.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8e8f1f93d0a9ba20db74269108d3b0c8a6501c4e6ce0d9af6afde3e8574e3c78 -size 8147754376 diff --git a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/checkpoint/model-110000.pth b/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/checkpoint/model-110000.pth deleted file mode 100644 index 520fb8ec77dd58f85275cd612ac3a2c837593b6c..0000000000000000000000000000000000000000 --- a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/checkpoint/model-110000.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:58a66d2bb688e83cafbdd551f676d5ff2be84d6c13146695c984a7cc3441b21d -size 8147754376 diff --git a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/checkpoint/model-120000.pth b/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/checkpoint/model-120000.pth deleted file mode 100644 index 297503aaf15e13bf19b281b32fc491465d8e693d..0000000000000000000000000000000000000000 --- a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/checkpoint/model-120000.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2807c386da8e9c152b25faf97320eb63ff574b99aaa780909932573fe18a972d -size 8147754376 diff --git a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/checkpoint/model-130000.pth b/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/checkpoint/model-130000.pth deleted file mode 100644 index 9916a270d09fc2840806b099dbff5c9b1df7d2e6..0000000000000000000000000000000000000000 --- a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/checkpoint/model-130000.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:acdc308f7e7d93c20c39062eefd6c225d52409b2f895843ebe45733a7dce1832 -size 8147754376 diff --git a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/checkpoint/model-140000.pth b/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/checkpoint/model-140000.pth deleted file mode 100644 index b2e197edb278856e984aef9c31745e35bc43461f..0000000000000000000000000000000000000000 --- a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/checkpoint/model-140000.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:98423b257a6605e589ea2c89cea11f582aae6bd687732f7f73bec9c5a25c662a -size 8147754376 diff --git a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/checkpoint/model-150000.pth b/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/checkpoint/model-150000.pth deleted file mode 100644 index f09e3034a90a41d1578d8574a43b1da7bfc8d109..0000000000000000000000000000000000000000 --- a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/checkpoint/model-150000.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ed4565aa06e4f44f4967a8de21500fc4c883a4f7539d69a4d5943585b16e553a -size 8147754376 diff --git a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/checkpoint/model-160000.pth b/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/checkpoint/model-160000.pth deleted file mode 100644 index 158d3941dfc6ad3d861116fe8b9caec8f74cd925..0000000000000000000000000000000000000000 --- a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/checkpoint/model-160000.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8d1bb1509657e1f9a067947d57b44c657720fdd58c63e0e83e08d285527d5a70 -size 8147754376 diff --git a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/checkpoint/model-170000.pth b/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/checkpoint/model-170000.pth deleted file mode 100644 index 018a1b9e37fb0f68518397cf09ea30d28fdad60d..0000000000000000000000000000000000000000 --- a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/checkpoint/model-170000.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f9b8dcccfe422ed25a9f12f30f39325243cf0be695f9de48ee12cd369468839a -size 8147754376 diff --git a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/checkpoint/model-180000.pth b/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/checkpoint/model-180000.pth deleted file mode 100644 index 5a5b0b227bd4622dd350019de6341b799c58c90f..0000000000000000000000000000000000000000 --- a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/checkpoint/model-180000.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:94735963fe218ca786bd271b07d46a70e02862a1cc379bda146faeffdd0b51f6 -size 8147754376 diff --git a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/checkpoint/model-190000.pth b/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/checkpoint/model-190000.pth deleted file mode 100644 index f22da257a3a2d5f8640dab11d6e025f9014732aa..0000000000000000000000000000000000000000 --- a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/checkpoint/model-190000.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b74b586ee9f9ac13dc9fa7a51a73c658c5f16d44d8948e3256268941dab8e42c -size 8147754376 diff --git a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/checkpoint/model-20000.pth b/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/checkpoint/model-20000.pth deleted file mode 100644 index 26cb2678b6e04ccab60f586ecccc6dd27fad5114..0000000000000000000000000000000000000000 --- a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/checkpoint/model-20000.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:aff429fe62427a1cbe005a5e3a410d0083747ba7171822b49a2d4eac3910d134 -size 8140908610 diff --git a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/checkpoint/model-200000.pth b/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/checkpoint/model-200000.pth deleted file mode 100644 index 41500da0cbdda501db4955ab8c91382e6a2e90aa..0000000000000000000000000000000000000000 --- a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/checkpoint/model-200000.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:79d38ef69f393b17d0ab6e585aa4f178af5a6e03bbf2a346d8feefc43a974a6e -size 8147754376 diff --git a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/checkpoint/model-205000.pth b/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/checkpoint/model-205000.pth deleted file mode 100644 index fd6049d744bb519d3b7938e41f327dc24bdde1cb..0000000000000000000000000000000000000000 --- a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/checkpoint/model-205000.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:29c48e87b638cf60aca669563bcda7358eb884d69983744078be55e7a56289a5 -size 8147935240 diff --git a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/checkpoint/model-210000.pth b/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/checkpoint/model-210000.pth deleted file mode 100644 index cfc97475c30fe724abc654582ee41e0b2609e16f..0000000000000000000000000000000000000000 --- a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/checkpoint/model-210000.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ac6b50b8d5b06e1619a60ede5eaeacc0af72103496cdc4cee205ae52f5507636 -size 8147754376 diff --git a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/checkpoint/model-220000.pth b/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/checkpoint/model-220000.pth deleted file mode 100644 index 43f37def135b70bd751fc37576fbf8e14581c495..0000000000000000000000000000000000000000 --- a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/checkpoint/model-220000.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:701290fb94b57969d80478bb829d8810641b394433581d5efa0023c9da6533ea -size 8147754376 diff --git a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/checkpoint/model-230000.pth b/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/checkpoint/model-230000.pth deleted file mode 100644 index 2eb3fb24fd92bca16fb95104605f79b46d3f0551..0000000000000000000000000000000000000000 --- a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/checkpoint/model-230000.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2fa24624e89fc1ab1c5df48053cc245b80441575ec6ac640dde739ded18be1af -size 8147754376 diff --git a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/checkpoint/model-240000.pth b/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/checkpoint/model-240000.pth deleted file mode 100644 index 18291c32887c87ffc6e2c818c82466ec0bb8e1fd..0000000000000000000000000000000000000000 --- a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/checkpoint/model-240000.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:596a52cb99437fc94843eb1a5128a5a0aa68b743dfdce0d12877bf22462fd506 -size 8147754376 diff --git a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/checkpoint/model-250000.pth b/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/checkpoint/model-250000.pth deleted file mode 100644 index 7aa2d52b4b13a1bf3cefd4ba11a47f87ea0a2692..0000000000000000000000000000000000000000 --- a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/checkpoint/model-250000.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6336c649566ac9fdd205f220d7dd8bca8b4ec7a7a6c99ac20d3ac4e73f27a526 -size 8147754376 diff --git a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/checkpoint/model-260000.pth b/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/checkpoint/model-260000.pth deleted file mode 100644 index e4b4776d68912fe652d01e5b9cfc4ab2deca6622..0000000000000000000000000000000000000000 --- a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/checkpoint/model-260000.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:dc3bc764a0bd98d73b0c583d6679edd1d90e7811ab481408881e5c685dc616de -size 8147754376 diff --git a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/checkpoint/model-270000.pth b/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/checkpoint/model-270000.pth deleted file mode 100644 index a8bd300ec980a4c5d4e72dc25527c1287c8ddf18..0000000000000000000000000000000000000000 --- a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/checkpoint/model-270000.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:5264e1787c5d66548200ce4694c9f1aa56f222278e04a692278b8240bc9eec6b -size 8147754376 diff --git a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/checkpoint/model-280000.pth b/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/checkpoint/model-280000.pth deleted file mode 100644 index 72da4105c3ef29184a624632886096c044b6e524..0000000000000000000000000000000000000000 --- a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/checkpoint/model-280000.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:eb3756e5f79f57e5f80c14f89c79940e95bc3536a7bf3e87efa9383cd6bc03cf -size 8147754376 diff --git a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/checkpoint/model-290000.pth b/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/checkpoint/model-290000.pth deleted file mode 100644 index ffe037f85e399a467bf12781622313e7cde4de35..0000000000000000000000000000000000000000 --- a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/checkpoint/model-290000.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2294d9b793d760e915c12e3a76961060c60bc4b449d68040628a07e0d8175020 -size 8147754376 diff --git a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/checkpoint/model-30000.pth b/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/checkpoint/model-30000.pth deleted file mode 100644 index 267b321bb1d3ad688c0dfbe5864a076019eb2696..0000000000000000000000000000000000000000 --- a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/checkpoint/model-30000.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e9ba7a739650d5c043eaedf313c1e2774d97f430b26b96e210a2c7fcc43eed82 -size 8147754376 diff --git a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/checkpoint/model-40000.pth b/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/checkpoint/model-40000.pth deleted file mode 100644 index 455e843a30596b28cfc4923a2cae71f00ab21af3..0000000000000000000000000000000000000000 --- a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/checkpoint/model-40000.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:78d0eebe383e0f7e87e7386bc3eadfe6543f0d80710bf16dbb8fb8a73014664e -size 8147754312 diff --git a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/checkpoint/model-60000.pth b/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/checkpoint/model-60000.pth deleted file mode 100644 index a74282d90f58159192b26fe55b60c03ed6b361b7..0000000000000000000000000000000000000000 --- a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/checkpoint/model-60000.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8b92824e3431e80f3ff5b83c636282d22cce9127d2779692bfe8e4372cb7d115 -size 8140908610 diff --git a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/checkpoint/model-70000.pth b/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/checkpoint/model-70000.pth deleted file mode 100644 index f6ca490a244804db90a4ba489d46363419b45dc9..0000000000000000000000000000000000000000 --- a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/checkpoint/model-70000.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:82d8dae8e15b1aa9fa2bac94a0fe5d1a2440a623d8bd75144e9d45408306d6c4 -size 8147754376 diff --git a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/checkpoint/model-80000.pth b/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/checkpoint/model-80000.pth deleted file mode 100644 index 54949149fcf3db46de166e70fef64c20836d2b30..0000000000000000000000000000000000000000 --- a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/checkpoint/model-80000.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b0ca2aafcc360c2cf46e4995d48a8a5246230a0ab4f7a3dd0025f6cdceabf8a6 -size 8147754376 diff --git a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/checkpoint/model-90000.pth b/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/checkpoint/model-90000.pth deleted file mode 100644 index 8e2aa36a33b5d534a18defe9c475a0169b91b06e..0000000000000000000000000000000000000000 --- a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/checkpoint/model-90000.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:4747e25c1481500547e6c2875514842aa780ffae969fbed218c7443806d2fab6 -size 8147754376 diff --git a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/competesmoe/events.out.tfevents.1753615266.SPP00018465.3308355.0 b/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/competesmoe/events.out.tfevents.1753615266.SPP00018465.3308355.0 deleted file mode 100644 index 418a271deedd631748ec81e0e174e4d8674bd8db..0000000000000000000000000000000000000000 --- a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/competesmoe/events.out.tfevents.1753615266.SPP00018465.3308355.0 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:69a6058903b1a0ec20f6411e23a080b09debef1de7d73f1dc5f37c78e80d12e5 -size 208392400 diff --git a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/competesmoe/events.out.tfevents.1753692351.SPP00018465.1406893.0 b/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/competesmoe/events.out.tfevents.1753692351.SPP00018465.1406893.0 deleted file mode 100644 index 0e678b9dc238a4be48a2ef27e727d44214ea35ea..0000000000000000000000000000000000000000 --- a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/competesmoe/events.out.tfevents.1753692351.SPP00018465.1406893.0 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:74946a527fe2dabab7e1aed8a09947a36f903b7da6fc67f24cefa746d9d7d03e -size 88 diff --git a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/competesmoe/events.out.tfevents.1753692378.SPP00018465.1407946.0 b/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/competesmoe/events.out.tfevents.1753692378.SPP00018465.1407946.0 deleted file mode 100644 index 1bcc43f83313f65ddbc42046b17710ab381a97ef..0000000000000000000000000000000000000000 --- a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/competesmoe/events.out.tfevents.1753692378.SPP00018465.1407946.0 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:1a3b4a7336fc81be2d15e95e286049725a142afe2738d9036d9d7c0e48c0fb0a -size 5255738 diff --git a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/competesmoe/events.out.tfevents.1753695179.SPP00018465.1496769.0 b/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/competesmoe/events.out.tfevents.1753695179.SPP00018465.1496769.0 deleted file mode 100644 index d26c3ae44f4befb9ccd5c04821b97723c9648cbe..0000000000000000000000000000000000000000 --- a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/competesmoe/events.out.tfevents.1753695179.SPP00018465.1496769.0 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:23926b9a6438499f9877a5ae72df2ec8d3b16497d9fc05744d54fc8b870e2019 -size 457 diff --git a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/competesmoe/events.out.tfevents.1753695548.SPP00018465.1509166.0 b/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/competesmoe/events.out.tfevents.1753695548.SPP00018465.1509166.0 deleted file mode 100644 index eceb603631efa983d0f4ec6986328c9022401700..0000000000000000000000000000000000000000 --- a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/competesmoe/events.out.tfevents.1753695548.SPP00018465.1509166.0 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f335eda9dcd3fbfef3ec80c45bb8c6c2144fc37d1ea8529fc376db1dfe43affb -size 10328391 diff --git a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/competesmoe/events.out.tfevents.1753699482.SPP00018465.1619327.0 b/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/competesmoe/events.out.tfevents.1753699482.SPP00018465.1619327.0 deleted file mode 100644 index df4bf746f24c8ad14615f6562a954f61f0a9d91f..0000000000000000000000000000000000000000 --- a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/competesmoe/events.out.tfevents.1753699482.SPP00018465.1619327.0 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:5a998b5c5988c98abddde14365455a7c537d5186b2a4a38f5b7b3069769bc81d -size 88 diff --git a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/competesmoe/events.out.tfevents.1753701583.SPP00018465.1685582.0 b/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/competesmoe/events.out.tfevents.1753701583.SPP00018465.1685582.0 deleted file mode 100644 index bd327b32f256f2058a428e144af713ef2d2ab0d2..0000000000000000000000000000000000000000 --- a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/competesmoe/events.out.tfevents.1753701583.SPP00018465.1685582.0 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:baee4ca8bf69450ce327d4c28d1de7482647381f52eaebbb6a4fae5ea8a36c40 -size 10553409 diff --git a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/competesmoe/events.out.tfevents.1753712122.SPP00018465.2028189.0 b/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/competesmoe/events.out.tfevents.1753712122.SPP00018465.2028189.0 deleted file mode 100644 index af975affaed6376a24774a2c3b59755a2643eb58..0000000000000000000000000000000000000000 --- a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/competesmoe/events.out.tfevents.1753712122.SPP00018465.2028189.0 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:279da3bad444fc077a656813cd22c640005fccbebfac1a9af15f1b7d3f3e8fd9 -size 24713433 diff --git a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/competesmoe/events.out.tfevents.1753792574.SPP00018465.243245 copy.0 b/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/competesmoe/events.out.tfevents.1753792574.SPP00018465.243245 copy.0 deleted file mode 100644 index 504e52267ea768874c325399445fdc0f21833120..0000000000000000000000000000000000000000 --- a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/competesmoe/events.out.tfevents.1753792574.SPP00018465.243245 copy.0 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:5563f4102c62e3696d051d2525325ae99e1e3f65f33518c4040850b62c79e88f -size 66365191 diff --git a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/competesmoe/events.out.tfevents.1753792574.SPP00018465.243245.0 b/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/competesmoe/events.out.tfevents.1753792574.SPP00018465.243245.0 deleted file mode 100644 index 08b3e704ad147a785ed4977996e6e5c771a9050b..0000000000000000000000000000000000000000 --- a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/competesmoe/events.out.tfevents.1753792574.SPP00018465.243245.0 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:cc9dd4d3a31ae39b47bccc05edadaa07797755ca86821a6b541a7a7d1a1e1b05 -size 47281647 diff --git a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/competesmoe_div0.01/model-20000.pth.txt b/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/competesmoe_div0.01/model-20000.pth.txt deleted file mode 100644 index 056bc46b34aa06b9a42dbe55cd1be46258051d24..0000000000000000000000000000000000000000 --- a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/competesmoe_div0.01/model-20000.pth.txt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a22b209d9b071e5312671f016117e06a64ab58f0269efad09acb712f88736188 -size 8147935048 diff --git a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/competesmoe_down_coef/model-20000.pth b/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/competesmoe_down_coef/model-20000.pth deleted file mode 100644 index 855c3cb938dad7fba3afbeaa39fbcfb738c13c02..0000000000000000000000000000000000000000 --- a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/competesmoe_down_coef/model-20000.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:caf94715eda6caaa344c3a5d53701c6817e15669c23b93c71981d8fa092030d6 -size 8147935240 diff --git a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/competesmoe_down_weights/events.out.tfevents.1753730850.SPP00018465.2580524.0 b/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/competesmoe_down_weights/events.out.tfevents.1753730850.SPP00018465.2580524.0 deleted file mode 100644 index 0014f07e40825c4db53d52df17a8d7d10744c3d6..0000000000000000000000000000000000000000 --- a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/competesmoe_down_weights/events.out.tfevents.1753730850.SPP00018465.2580524.0 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:32830d4e75e0a6bf078b84af069353b1315e83d15dd8b97613f76312a2b3dade -size 19544867 diff --git a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/competesmoe_inscrease_w/events.out.tfevents.1753738358.SPP00018465.2790111.0 b/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/competesmoe_inscrease_w/events.out.tfevents.1753738358.SPP00018465.2790111.0 deleted file mode 100644 index de35c1132f07429f8870f23610e65f12d042625e..0000000000000000000000000000000000000000 --- a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/competesmoe_inscrease_w/events.out.tfevents.1753738358.SPP00018465.2790111.0 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:453a6a68ffafb4cb7440daf64afa09a6dd771e849dcb81a62e6d88e432ac249e -size 322 diff --git a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/competesmoe_inscrease_w/events.out.tfevents.1753738659.SPP00018465.2801366.0 b/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/competesmoe_inscrease_w/events.out.tfevents.1753738659.SPP00018465.2801366.0 deleted file mode 100644 index 3e0abd0a5b096cdb484ad3e7deec12949a9a144d..0000000000000000000000000000000000000000 --- a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/competesmoe_inscrease_w/events.out.tfevents.1753738659.SPP00018465.2801366.0 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d12ac99cc0d4ccca2b92c9c7e6378ce22419905acbd5a0988d384438716290c3 -size 322 diff --git a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/competesmoe_inscrease_w/events.out.tfevents.1753739024.SPP00018465.2814872.0 b/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/competesmoe_inscrease_w/events.out.tfevents.1753739024.SPP00018465.2814872.0 deleted file mode 100644 index 0d8537b6ece9fd9519d5bd9f2e9010d189777694..0000000000000000000000000000000000000000 --- a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/competesmoe_inscrease_w/events.out.tfevents.1753739024.SPP00018465.2814872.0 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:959c43f9cb17d83a5e266d7d69867ac629b117a7c272826d13136fbfa7a9934a -size 322 diff --git a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/competesmoe_inscrease_w/events.out.tfevents.1753739404.SPP00018465.2828799.0 b/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/competesmoe_inscrease_w/events.out.tfevents.1753739404.SPP00018465.2828799.0 deleted file mode 100644 index 6c5732fdc550f9c110d0cd156a89ea33d5d1eee2..0000000000000000000000000000000000000000 --- a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/competesmoe_inscrease_w/events.out.tfevents.1753739404.SPP00018465.2828799.0 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f4781bb15774d330e147c3eee958d8e64969e3d9732d066af0e24b0f59f61d18 -size 322 diff --git a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/competesmoe_inscrease_w/events.out.tfevents.1753739792.SPP00018465.2847905.0 b/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/competesmoe_inscrease_w/events.out.tfevents.1753739792.SPP00018465.2847905.0 deleted file mode 100644 index 0e85d879fb56a0e6f2b48b65d8481f9e4f76f978..0000000000000000000000000000000000000000 --- a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/competesmoe_inscrease_w/events.out.tfevents.1753739792.SPP00018465.2847905.0 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f21a01ff85e82a71c298764f42cae246172f3fa7c2ecb88ae9602729f28bbff5 -size 88 diff --git a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/competesmoe_inscrease_w/events.out.tfevents.1753739925.SPP00018465.2853726.0 b/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/competesmoe_inscrease_w/events.out.tfevents.1753739925.SPP00018465.2853726.0 deleted file mode 100644 index d6c9465131a57e220df19e5ef577cd167e309ae3..0000000000000000000000000000000000000000 --- a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/competesmoe_inscrease_w/events.out.tfevents.1753739925.SPP00018465.2853726.0 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8b596ae2d890d793f221bc2bddbb8a86a9d67cb530f051fa739d17c5b676c836 -size 322 diff --git a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/competesmoe_inscrease_w/events.out.tfevents.1753740472.SPP00018465.2872732.0 b/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/competesmoe_inscrease_w/events.out.tfevents.1753740472.SPP00018465.2872732.0 deleted file mode 100644 index 7ba433116b9e9b10b30d7a36039bc0ff8a2befeb..0000000000000000000000000000000000000000 --- a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/competesmoe_inscrease_w/events.out.tfevents.1753740472.SPP00018465.2872732.0 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ce101817991b07f03a1c71673c7a485fe4cfb469439cb5f06828a4cb1edb0c16 -size 322 diff --git a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/competesmoe_inscrease_w/events.out.tfevents.1753740955.SPP00018465.2888720.0 b/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/competesmoe_inscrease_w/events.out.tfevents.1753740955.SPP00018465.2888720.0 deleted file mode 100644 index b079b5b954b78ad8f4d48ccab4d3383dd688dfe2..0000000000000000000000000000000000000000 --- a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/competesmoe_inscrease_w/events.out.tfevents.1753740955.SPP00018465.2888720.0 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:87a26b5a5723ac08e5ce272bf4f6f312a98c81758ad914c37a218c9a5bb01568 -size 322 diff --git a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/competesmoe_inscrease_w/events.out.tfevents.1753741226.SPP00018465.2898794.0 b/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/competesmoe_inscrease_w/events.out.tfevents.1753741226.SPP00018465.2898794.0 deleted file mode 100644 index 6b6020ed186b9078af7ab84a67d00d866ce5e7e7..0000000000000000000000000000000000000000 --- a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/competesmoe_inscrease_w/events.out.tfevents.1753741226.SPP00018465.2898794.0 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9336f7caebf1be2ba05c597b863afe3bd99f86b9636abf2c40603dc1aa8a6981 -size 322 diff --git a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/competesmoe_inscrease_w/events.out.tfevents.1753741460.SPP00018465.2907691.0 b/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/competesmoe_inscrease_w/events.out.tfevents.1753741460.SPP00018465.2907691.0 deleted file mode 100644 index 1be071ae8f802ea8ceeba36a493617213cd8d740..0000000000000000000000000000000000000000 --- a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/competesmoe_inscrease_w/events.out.tfevents.1753741460.SPP00018465.2907691.0 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a1e66bea645e1fcdd6a954906f11489b74769b9834e4871a873079e79ba8d243 -size 322 diff --git a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/competesmoe_inscrease_w/events.out.tfevents.1753742047.SPP00018465.2927229.0 b/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/competesmoe_inscrease_w/events.out.tfevents.1753742047.SPP00018465.2927229.0 deleted file mode 100644 index f3d4079ecf5fe434873134030eb516255a843a22..0000000000000000000000000000000000000000 --- a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/competesmoe_inscrease_w/events.out.tfevents.1753742047.SPP00018465.2927229.0 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ca9c055f8ecfd58d13da1383162052691b2613259967f051f4709ffb22af2a30 -size 322 diff --git a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/competesmoe_inscrease_w/events.out.tfevents.1753742645.SPP00018465.2947902.0 b/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/competesmoe_inscrease_w/events.out.tfevents.1753742645.SPP00018465.2947902.0 deleted file mode 100644 index c0ae3b076c7b5ee938be3af14482a84f9e1b3294..0000000000000000000000000000000000000000 --- a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/competesmoe_inscrease_w/events.out.tfevents.1753742645.SPP00018465.2947902.0 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8f0ef1ef89c74d761c2d18cbccd65297b31ce5df73bfd45bcef166bc8e10dc9b -size 78946782 diff --git a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/deepseek/tensorboard/events.out.tfevents.1744037787.ithndgx005.277258.0 b/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/deepseek/tensorboard/events.out.tfevents.1744037787.ithndgx005.277258.0 deleted file mode 100644 index 13cd294598079e7d6729e98672c72a1f50567f9b..0000000000000000000000000000000000000000 --- a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/deepseek/tensorboard/events.out.tfevents.1744037787.ithndgx005.277258.0 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3b2c3b930a3cabb7399776ac79067b9207a2a6f95221751039ed5c499eb223b6 -size 383939596 diff --git a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/export/result-model-160000.pth.json b/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/export/result-model-160000.pth.json deleted file mode 100644 index 458d369e8929d99f2b908a2c952ca7db9be25259..0000000000000000000000000000000000000000 --- a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/export/result-model-160000.pth.json +++ /dev/null @@ -1 +0,0 @@ -{"val/loss": 2.4003583846553678, "val/accuracy": 0.5051988170992944, "val/perplexity": 11.027127625900379, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.3034290763902368, "lambada/accuracy/total": 0.3297748447204969, "lambada/accuracy/openai_last_token": 0.7915372670807453, "lambada/perplexity": 7.824656120039497, "lambada/lm_loss": 2.9716990422471468, "lambada/lm_perplexity": 19.525065340939612, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.41748683090989563, "mean_loss": 2.3518937305228023, "blimp/accuracy/passive_2": 0.905, "blimp/accuracy/determiner_noun_agreement_2": 0.981, "blimp/accuracy/ellipsis_n_bar_1": 0.874, "blimp/accuracy/tough_vs_raising_2": 0.878, "blimp/accuracy/tough_vs_raising_1": 0.577, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.909, "blimp/accuracy/principle_A_reconstruction": 0.301, "blimp/accuracy/wh_vs_that_with_gap": 0.455, "blimp/accuracy/principle_A_domain_2": 0.883, "blimp/accuracy/determiner_noun_agreement_1": 0.99, "blimp/accuracy/ellipsis_n_bar_2": 0.907, "blimp/accuracy/principle_A_domain_3": 0.604, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.93, "blimp/accuracy/animate_subject_trans": 0.898, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.908, "blimp/accuracy/distractor_agreement_relative_clause": 0.672, "blimp/accuracy/transitive": 0.903, "blimp/accuracy/sentential_subject_island": 0.286, "blimp/accuracy/adjunct_island": 0.889, "blimp/accuracy/intransitive": 0.758, "blimp/accuracy/existential_there_subject_raising": 0.896, "blimp/accuracy/irregular_past_participle_adjectives": 0.975, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.681, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.317, "blimp/accuracy/only_npi_scope": 0.711, "blimp/accuracy/superlative_quantifiers_2": 0.852, "blimp/accuracy/passive_1": 0.902, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.934, "blimp/accuracy/inchoative": 0.623, "blimp/accuracy/anaphor_gender_agreement": 0.966, "blimp/accuracy/principle_A_c_command": 0.64, "blimp/accuracy/only_npi_licensor_present": 0.446, "blimp/accuracy/expletive_it_object_raising": 0.759, "blimp/accuracy/left_branch_island_simple_question": 0.785, "blimp/accuracy/wh_questions_subject_gap": 0.927, "blimp/accuracy/existential_there_quantifiers_2": 0.492, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.942, "blimp/accuracy/sentential_negation_npi_scope": 0.672, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.838, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.907, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.904, "blimp/accuracy/principle_A_case_2": 0.943, "blimp/accuracy/distractor_agreement_relational_noun": 0.85, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.977, "blimp/accuracy/superlative_quantifiers_1": 0.685, "blimp/accuracy/wh_island": 0.807, "blimp/accuracy/principle_A_domain_1": 0.992, "blimp/accuracy/complex_NP_island": 0.637, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.974, "blimp/accuracy/irregular_past_participle_verbs": 0.925, "blimp/accuracy/drop_argument": 0.724, "blimp/accuracy/wh_questions_object_gap": 0.84, "blimp/accuracy/animate_subject_passive": 0.788, "blimp/accuracy/existential_there_quantifiers_1": 0.971, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.909, "blimp/accuracy/npi_present_2": 0.566, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.976, "blimp/accuracy/anaphor_number_agreement": 0.994, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.961, "blimp/accuracy/existential_there_object_raising": 0.887, "blimp/accuracy/matrix_question_npi_licensor_present": 0.303, "blimp/accuracy/npi_present_1": 0.551, "blimp/accuracy/wh_vs_that_no_gap": 0.977, "blimp/accuracy/left_branch_island_echo_question": 0.452, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.951, "blimp/accuracy/causative": 0.759, "blimp/accuracy/group_average": 0.7921791044776119, "blimp/accuracy/seq_average": 0.792179104477612, "cbt/accuracy/NE": 0.8100961538461539, "cbt/accuracy/V": 0.9284, "cbt/accuracy/CN": 0.8804, "cbt/accuracy/P": 0.91, "cbt/accuracy/group_average": 0.8822240384615385, "cbt/accuracy/seq_average": 0.8822529011604642, "hellaswag/accuracy/val": 0.3392750448117905, "hellaswag/accuracy/group_average": 0.3392750448117905, "hellaswag/accuracy/seq_average": 0.3392750448117905, "piqa/accuracy/val": 0.6235038084874864, "piqa/accuracy/group_average": 0.6235038084874864, "piqa/accuracy/seq_average": 0.6235038084874864, "ai2arc/accuracy/ARC-Easy": 0.3704016913319239, "ai2arc/accuracy/ARC-Challenge": 0.23605150214592274, "ai2arc/accuracy/group_average": 0.3032265967389233, "ai2arc/accuracy/seq_average": 0.32606232294617565, "mmlu/accuracy/MMLU": 0.2632105827672506, "mmlu/accuracy/group_average": 0.2632105827672506, "mmlu/accuracy/seq_average": 0.2632105827672506, "openbookqa/accuracy/test": 0.28, "openbookqa/accuracy/group_average": 0.28, "openbookqa/accuracy/seq_average": 0.28, "race/accuracy/test/high": 0.2830188679245283, "race/accuracy/test/middle": 0.3628133704735376, "race/accuracy/group_average": 0.32291611919903296, "race/accuracy/seq_average": 0.3062423996757195, "siqa/accuracy/dev": 0.3638689866939611, "siqa/accuracy/group_average": 0.3638689866939611, "siqa/accuracy/seq_average": 0.3638689866939611, "winogrande/accuracy/dev": 0.5011838989739542, "winogrande/accuracy/group_average": 0.5011838989739542, "winogrande/accuracy/seq_average": 0.5011838989739542, "commonsenseqa/accuracy/dev_rand_split": 0.2620802620802621, "commonsenseqa/accuracy/group_average": 0.2620802620802621, "commonsenseqa/accuracy/seq_average": 0.2620802620802621} \ No newline at end of file diff --git a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/export/result-model-180000.pth.json b/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/export/result-model-180000.pth.json deleted file mode 100644 index 0532eeaf2deba3bd3d8601c60e89cdde7f338f21..0000000000000000000000000000000000000000 --- a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/export/result-model-180000.pth.json +++ /dev/null @@ -1 +0,0 @@ -{"val/loss": 2.364891173347594, "val/accuracy": 0.5107199048239087, "val/perplexity": 10.64288052542509, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.5708572672020575, "lambada/accuracy/total": 0.3421972049689441, "lambada/accuracy/openai_last_token": 0.797360248447205, "lambada/perplexity": 7.406868178335139, "lambada/lm_loss": 2.9628792196336193, "lambada/lm_perplexity": 19.353614920677348, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.4264585548964264, "mean_loss": 2.467874220274826, "blimp/accuracy/passive_2": 0.915, "blimp/accuracy/determiner_noun_agreement_2": 0.981, "blimp/accuracy/ellipsis_n_bar_1": 0.861, "blimp/accuracy/tough_vs_raising_2": 0.874, "blimp/accuracy/tough_vs_raising_1": 0.648, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.907, "blimp/accuracy/principle_A_reconstruction": 0.28, "blimp/accuracy/wh_vs_that_with_gap": 0.421, "blimp/accuracy/principle_A_domain_2": 0.888, "blimp/accuracy/determiner_noun_agreement_1": 0.987, "blimp/accuracy/ellipsis_n_bar_2": 0.897, "blimp/accuracy/principle_A_domain_3": 0.6, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.922, "blimp/accuracy/animate_subject_trans": 0.907, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.905, "blimp/accuracy/distractor_agreement_relative_clause": 0.701, "blimp/accuracy/transitive": 0.896, "blimp/accuracy/sentential_subject_island": 0.354, "blimp/accuracy/adjunct_island": 0.855, "blimp/accuracy/intransitive": 0.756, "blimp/accuracy/existential_there_subject_raising": 0.889, "blimp/accuracy/irregular_past_participle_adjectives": 0.915, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.696, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.306, "blimp/accuracy/only_npi_scope": 0.736, "blimp/accuracy/superlative_quantifiers_2": 0.832, "blimp/accuracy/passive_1": 0.895, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.938, "blimp/accuracy/inchoative": 0.609, "blimp/accuracy/anaphor_gender_agreement": 0.972, "blimp/accuracy/principle_A_c_command": 0.685, "blimp/accuracy/only_npi_licensor_present": 0.535, "blimp/accuracy/expletive_it_object_raising": 0.768, "blimp/accuracy/left_branch_island_simple_question": 0.796, "blimp/accuracy/wh_questions_subject_gap": 0.942, "blimp/accuracy/existential_there_quantifiers_2": 0.56, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.941, "blimp/accuracy/sentential_negation_npi_scope": 0.662, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.811, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.933, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.891, "blimp/accuracy/principle_A_case_2": 0.937, "blimp/accuracy/distractor_agreement_relational_noun": 0.821, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.97, "blimp/accuracy/superlative_quantifiers_1": 0.688, "blimp/accuracy/wh_island": 0.741, "blimp/accuracy/principle_A_domain_1": 0.995, "blimp/accuracy/complex_NP_island": 0.594, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.97, "blimp/accuracy/irregular_past_participle_verbs": 0.909, "blimp/accuracy/drop_argument": 0.734, "blimp/accuracy/wh_questions_object_gap": 0.841, "blimp/accuracy/animate_subject_passive": 0.787, "blimp/accuracy/existential_there_quantifiers_1": 0.975, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.892, "blimp/accuracy/npi_present_2": 0.542, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.941, "blimp/accuracy/anaphor_number_agreement": 0.988, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.957, "blimp/accuracy/existential_there_object_raising": 0.883, "blimp/accuracy/matrix_question_npi_licensor_present": 0.349, "blimp/accuracy/npi_present_1": 0.526, "blimp/accuracy/wh_vs_that_no_gap": 0.978, "blimp/accuracy/left_branch_island_echo_question": 0.437, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.959, "blimp/accuracy/causative": 0.739, "blimp/accuracy/group_average": 0.7913432835820896, "blimp/accuracy/seq_average": 0.7913432835820896, "cbt/accuracy/NE": 0.8052884615384616, "cbt/accuracy/V": 0.9328, "cbt/accuracy/CN": 0.88, "cbt/accuracy/P": 0.9216, "cbt/accuracy/group_average": 0.8849221153846154, "cbt/accuracy/seq_average": 0.884953981592637, "hellaswag/accuracy/val": 0.34624576777534355, "hellaswag/accuracy/group_average": 0.34624576777534355, "hellaswag/accuracy/seq_average": 0.34624576777534355, "piqa/accuracy/val": 0.6376496191512514, "piqa/accuracy/group_average": 0.6376496191512514, "piqa/accuracy/seq_average": 0.6376496191512514, "ai2arc/accuracy/ARC-Easy": 0.3708245243128964, "ai2arc/accuracy/ARC-Challenge": 0.22832618025751072, "ai2arc/accuracy/group_average": 0.2995753522852036, "ai2arc/accuracy/seq_average": 0.32379603399433426, "mmlu/accuracy/MMLU": 0.2606363961387201, "mmlu/accuracy/group_average": 0.2606363961387201, "mmlu/accuracy/seq_average": 0.2606363961387201, "openbookqa/accuracy/test": 0.272, "openbookqa/accuracy/group_average": 0.272, "openbookqa/accuracy/seq_average": 0.272, "race/accuracy/test/high": 0.2850200114351058, "race/accuracy/test/middle": 0.3628133704735376, "race/accuracy/group_average": 0.32391669095432174, "race/accuracy/seq_average": 0.30766112687474667, "siqa/accuracy/dev": 0.36284544524053225, "siqa/accuracy/group_average": 0.36284544524053225, "siqa/accuracy/seq_average": 0.36284544524053225, "winogrande/accuracy/dev": 0.5011838989739542, "winogrande/accuracy/group_average": 0.5011838989739542, "winogrande/accuracy/seq_average": 0.5011838989739542, "commonsenseqa/accuracy/dev_rand_split": 0.26453726453726456, "commonsenseqa/accuracy/group_average": 0.26453726453726456, "commonsenseqa/accuracy/seq_average": 0.26453726453726456} \ No newline at end of file diff --git a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/export/result-model-2000000.pth.json b/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/export/result-model-2000000.pth.json deleted file mode 100644 index 58f711cf88ee7db7aa47b0100ebfbc169cf7cf09..0000000000000000000000000000000000000000 --- a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/export/result-model-2000000.pth.json +++ /dev/null @@ -1 +0,0 @@ -{"val/loss": 2.345306396484375, "val/accuracy": 0.5136234343998016, "val/perplexity": 10.436469935414774, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.367531154466712, "lambada/accuracy/total": 0.3125, "lambada/accuracy/openai_last_token": 0.7882375776397516, "lambada/perplexity": 8.643184532218681, "lambada/lm_loss": 2.9319599466122455, "lambada/lm_perplexity": 18.764371646204605, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.4130617171999008, "mean_loss": 2.3564187754755435, "blimp/accuracy/passive_2": 0.897, "blimp/accuracy/determiner_noun_agreement_2": 0.982, "blimp/accuracy/ellipsis_n_bar_1": 0.845, "blimp/accuracy/tough_vs_raising_2": 0.887, "blimp/accuracy/tough_vs_raising_1": 0.572, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.895, "blimp/accuracy/principle_A_reconstruction": 0.433, "blimp/accuracy/wh_vs_that_with_gap": 0.491, "blimp/accuracy/principle_A_domain_2": 0.886, "blimp/accuracy/determiner_noun_agreement_1": 0.993, "blimp/accuracy/ellipsis_n_bar_2": 0.891, "blimp/accuracy/principle_A_domain_3": 0.617, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.937, "blimp/accuracy/animate_subject_trans": 0.902, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.926, "blimp/accuracy/distractor_agreement_relative_clause": 0.684, "blimp/accuracy/transitive": 0.876, "blimp/accuracy/sentential_subject_island": 0.35, "blimp/accuracy/adjunct_island": 0.878, "blimp/accuracy/intransitive": 0.757, "blimp/accuracy/existential_there_subject_raising": 0.878, "blimp/accuracy/irregular_past_participle_adjectives": 0.869, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.728, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.382, "blimp/accuracy/only_npi_scope": 0.728, "blimp/accuracy/superlative_quantifiers_2": 0.741, "blimp/accuracy/passive_1": 0.903, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.933, "blimp/accuracy/inchoative": 0.624, "blimp/accuracy/anaphor_gender_agreement": 0.972, "blimp/accuracy/principle_A_c_command": 0.678, "blimp/accuracy/only_npi_licensor_present": 0.561, "blimp/accuracy/expletive_it_object_raising": 0.77, "blimp/accuracy/left_branch_island_simple_question": 0.826, "blimp/accuracy/wh_questions_subject_gap": 0.923, "blimp/accuracy/existential_there_quantifiers_2": 0.435, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.944, "blimp/accuracy/sentential_negation_npi_scope": 0.663, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.837, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.87, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.903, "blimp/accuracy/principle_A_case_2": 0.916, "blimp/accuracy/distractor_agreement_relational_noun": 0.83, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.987, "blimp/accuracy/superlative_quantifiers_1": 0.663, "blimp/accuracy/wh_island": 0.849, "blimp/accuracy/principle_A_domain_1": 0.993, "blimp/accuracy/complex_NP_island": 0.601, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.984, "blimp/accuracy/irregular_past_participle_verbs": 0.912, "blimp/accuracy/drop_argument": 0.732, "blimp/accuracy/wh_questions_object_gap": 0.827, "blimp/accuracy/animate_subject_passive": 0.781, "blimp/accuracy/existential_there_quantifiers_1": 0.965, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.912, "blimp/accuracy/npi_present_2": 0.624, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.959, "blimp/accuracy/anaphor_number_agreement": 0.984, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.969, "blimp/accuracy/existential_there_object_raising": 0.853, "blimp/accuracy/matrix_question_npi_licensor_present": 0.358, "blimp/accuracy/npi_present_1": 0.607, "blimp/accuracy/wh_vs_that_no_gap": 0.977, "blimp/accuracy/left_branch_island_echo_question": 0.552, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.965, "blimp/accuracy/causative": 0.746, "blimp/accuracy/group_average": 0.7972089552238806, "blimp/accuracy/seq_average": 0.7972089552238806, "cbt/accuracy/NE": 0.8000801282051282, "cbt/accuracy/V": 0.9356, "cbt/accuracy/CN": 0.8732, "cbt/accuracy/P": 0.9244, "cbt/accuracy/group_average": 0.8833200320512821, "cbt/accuracy/seq_average": 0.8833533413365346, "hellaswag/accuracy/val": 0.3492332204740092, "hellaswag/accuracy/group_average": 0.3492332204740092, "hellaswag/accuracy/seq_average": 0.3492332204740092, "piqa/accuracy/val": 0.6414581066376496, "piqa/accuracy/group_average": 0.6414581066376496, "piqa/accuracy/seq_average": 0.6414581066376496, "ai2arc/accuracy/ARC-Easy": 0.38308668076109936, "ai2arc/accuracy/ARC-Challenge": 0.22660944206008585, "ai2arc/accuracy/group_average": 0.3048480614105926, "ai2arc/accuracy/seq_average": 0.3314447592067989} \ No newline at end of file diff --git a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/export/result-model-205000.pth.json b/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/export/result-model-205000.pth.json deleted file mode 100644 index 8a955b7fe3fd3c61ab9a800ebdfdf68e85b4df85..0000000000000000000000000000000000000000 --- a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/export/result-model-205000.pth.json +++ /dev/null @@ -1 +0,0 @@ -{"val/loss": 2.3490779816158236, "val/accuracy": 0.5128919813368056, "val/perplexity": 10.475906292288581, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.3582519152149652, "lambada/accuracy/total": 0.35384316770186336, "lambada/accuracy/openai_last_token": 0.796777950310559, "lambada/perplexity": 7.179454384541888, "lambada/lm_loss": 2.9343860198674525, "lambada/lm_perplexity": 18.809950653055125, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.4333675745193345, "mean_loss": 2.3536649484153944, "blimp/accuracy/passive_2": 0.896, "blimp/accuracy/determiner_noun_agreement_2": 0.979, "blimp/accuracy/ellipsis_n_bar_1": 0.837, "blimp/accuracy/tough_vs_raising_2": 0.875, "blimp/accuracy/tough_vs_raising_1": 0.57, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.928, "blimp/accuracy/principle_A_reconstruction": 0.451, "blimp/accuracy/wh_vs_that_with_gap": 0.45, "blimp/accuracy/principle_A_domain_2": 0.903, "blimp/accuracy/determiner_noun_agreement_1": 0.99, "blimp/accuracy/ellipsis_n_bar_2": 0.917, "blimp/accuracy/principle_A_domain_3": 0.653, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.937, "blimp/accuracy/animate_subject_trans": 0.908, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.93, "blimp/accuracy/distractor_agreement_relative_clause": 0.658, "blimp/accuracy/transitive": 0.889, "blimp/accuracy/sentential_subject_island": 0.301, "blimp/accuracy/adjunct_island": 0.888, "blimp/accuracy/intransitive": 0.769, "blimp/accuracy/existential_there_subject_raising": 0.892, "blimp/accuracy/irregular_past_participle_adjectives": 0.93, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.694, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.279, "blimp/accuracy/only_npi_scope": 0.761, "blimp/accuracy/superlative_quantifiers_2": 0.782, "blimp/accuracy/passive_1": 0.88, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.924, "blimp/accuracy/inchoative": 0.624, "blimp/accuracy/anaphor_gender_agreement": 0.979, "blimp/accuracy/principle_A_c_command": 0.613, "blimp/accuracy/only_npi_licensor_present": 0.681, "blimp/accuracy/expletive_it_object_raising": 0.796, "blimp/accuracy/left_branch_island_simple_question": 0.789, "blimp/accuracy/wh_questions_subject_gap": 0.924, "blimp/accuracy/existential_there_quantifiers_2": 0.53, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.937, "blimp/accuracy/sentential_negation_npi_scope": 0.673, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.853, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.894, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.915, "blimp/accuracy/principle_A_case_2": 0.915, "blimp/accuracy/distractor_agreement_relational_noun": 0.811, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.99, "blimp/accuracy/superlative_quantifiers_1": 0.642, "blimp/accuracy/wh_island": 0.848, "blimp/accuracy/principle_A_domain_1": 0.994, "blimp/accuracy/complex_NP_island": 0.612, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.98, "blimp/accuracy/irregular_past_participle_verbs": 0.922, "blimp/accuracy/drop_argument": 0.727, "blimp/accuracy/wh_questions_object_gap": 0.796, "blimp/accuracy/animate_subject_passive": 0.797, "blimp/accuracy/existential_there_quantifiers_1": 0.967, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.882, "blimp/accuracy/npi_present_2": 0.612, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.952, "blimp/accuracy/anaphor_number_agreement": 0.985, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.97, "blimp/accuracy/existential_there_object_raising": 0.84, "blimp/accuracy/matrix_question_npi_licensor_present": 0.411, "blimp/accuracy/npi_present_1": 0.614, "blimp/accuracy/wh_vs_that_no_gap": 0.977, "blimp/accuracy/left_branch_island_echo_question": 0.547, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.973, "blimp/accuracy/causative": 0.746, "blimp/accuracy/group_average": 0.7998358208955221, "blimp/accuracy/seq_average": 0.7998358208955224, "cbt/accuracy/NE": 0.8169070512820513, "cbt/accuracy/V": 0.9324, "cbt/accuracy/CN": 0.8724, "cbt/accuracy/P": 0.9152, "cbt/accuracy/group_average": 0.8842267628205128, "cbt/accuracy/seq_average": 0.8842537014805922, "hellaswag/accuracy/val": 0.3480382393945429, "hellaswag/accuracy/group_average": 0.3480382393945429, "hellaswag/accuracy/seq_average": 0.3480382393945429, "piqa/accuracy/val": 0.6327529923830251, "piqa/accuracy/group_average": 0.6327529923830251, "piqa/accuracy/seq_average": 0.6327529923830251, "ai2arc/accuracy/ARC-Easy": 0.38097251585623676, "ai2arc/accuracy/ARC-Challenge": 0.22832618025751072, "ai2arc/accuracy/group_average": 0.3046493480568737, "ai2arc/accuracy/seq_average": 0.3305949008498584} \ No newline at end of file diff --git a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/export/result-model-2100000.pth.json b/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/export/result-model-2100000.pth.json deleted file mode 100644 index 58f711cf88ee7db7aa47b0100ebfbc169cf7cf09..0000000000000000000000000000000000000000 --- a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/export/result-model-2100000.pth.json +++ /dev/null @@ -1 +0,0 @@ -{"val/loss": 2.345306396484375, "val/accuracy": 0.5136234343998016, "val/perplexity": 10.436469935414774, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.367531154466712, "lambada/accuracy/total": 0.3125, "lambada/accuracy/openai_last_token": 0.7882375776397516, "lambada/perplexity": 8.643184532218681, "lambada/lm_loss": 2.9319599466122455, "lambada/lm_perplexity": 18.764371646204605, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.4130617171999008, "mean_loss": 2.3564187754755435, "blimp/accuracy/passive_2": 0.897, "blimp/accuracy/determiner_noun_agreement_2": 0.982, "blimp/accuracy/ellipsis_n_bar_1": 0.845, "blimp/accuracy/tough_vs_raising_2": 0.887, "blimp/accuracy/tough_vs_raising_1": 0.572, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.895, "blimp/accuracy/principle_A_reconstruction": 0.433, "blimp/accuracy/wh_vs_that_with_gap": 0.491, "blimp/accuracy/principle_A_domain_2": 0.886, "blimp/accuracy/determiner_noun_agreement_1": 0.993, "blimp/accuracy/ellipsis_n_bar_2": 0.891, "blimp/accuracy/principle_A_domain_3": 0.617, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.937, "blimp/accuracy/animate_subject_trans": 0.902, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.926, "blimp/accuracy/distractor_agreement_relative_clause": 0.684, "blimp/accuracy/transitive": 0.876, "blimp/accuracy/sentential_subject_island": 0.35, "blimp/accuracy/adjunct_island": 0.878, "blimp/accuracy/intransitive": 0.757, "blimp/accuracy/existential_there_subject_raising": 0.878, "blimp/accuracy/irregular_past_participle_adjectives": 0.869, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.728, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.382, "blimp/accuracy/only_npi_scope": 0.728, "blimp/accuracy/superlative_quantifiers_2": 0.741, "blimp/accuracy/passive_1": 0.903, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.933, "blimp/accuracy/inchoative": 0.624, "blimp/accuracy/anaphor_gender_agreement": 0.972, "blimp/accuracy/principle_A_c_command": 0.678, "blimp/accuracy/only_npi_licensor_present": 0.561, "blimp/accuracy/expletive_it_object_raising": 0.77, "blimp/accuracy/left_branch_island_simple_question": 0.826, "blimp/accuracy/wh_questions_subject_gap": 0.923, "blimp/accuracy/existential_there_quantifiers_2": 0.435, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.944, "blimp/accuracy/sentential_negation_npi_scope": 0.663, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.837, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.87, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.903, "blimp/accuracy/principle_A_case_2": 0.916, "blimp/accuracy/distractor_agreement_relational_noun": 0.83, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.987, "blimp/accuracy/superlative_quantifiers_1": 0.663, "blimp/accuracy/wh_island": 0.849, "blimp/accuracy/principle_A_domain_1": 0.993, "blimp/accuracy/complex_NP_island": 0.601, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.984, "blimp/accuracy/irregular_past_participle_verbs": 0.912, "blimp/accuracy/drop_argument": 0.732, "blimp/accuracy/wh_questions_object_gap": 0.827, "blimp/accuracy/animate_subject_passive": 0.781, "blimp/accuracy/existential_there_quantifiers_1": 0.965, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.912, "blimp/accuracy/npi_present_2": 0.624, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.959, "blimp/accuracy/anaphor_number_agreement": 0.984, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.969, "blimp/accuracy/existential_there_object_raising": 0.853, "blimp/accuracy/matrix_question_npi_licensor_present": 0.358, "blimp/accuracy/npi_present_1": 0.607, "blimp/accuracy/wh_vs_that_no_gap": 0.977, "blimp/accuracy/left_branch_island_echo_question": 0.552, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.965, "blimp/accuracy/causative": 0.746, "blimp/accuracy/group_average": 0.7972089552238806, "blimp/accuracy/seq_average": 0.7972089552238806, "cbt/accuracy/NE": 0.8000801282051282, "cbt/accuracy/V": 0.9356, "cbt/accuracy/CN": 0.8732, "cbt/accuracy/P": 0.9244, "cbt/accuracy/group_average": 0.8833200320512821, "cbt/accuracy/seq_average": 0.8833533413365346, "hellaswag/accuracy/val": 0.3492332204740092, "hellaswag/accuracy/group_average": 0.3492332204740092, "hellaswag/accuracy/seq_average": 0.3492332204740092, "piqa/accuracy/val": 0.6414581066376496, "piqa/accuracy/group_average": 0.6414581066376496, "piqa/accuracy/seq_average": 0.6414581066376496, "ai2arc/accuracy/ARC-Easy": 0.38308668076109936, "ai2arc/accuracy/ARC-Challenge": 0.22660944206008585, "ai2arc/accuracy/group_average": 0.3048480614105926, "ai2arc/accuracy/seq_average": 0.3314447592067989} \ No newline at end of file diff --git a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/export/result-model-280000.pth.json b/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/export/result-model-280000.pth.json deleted file mode 100644 index c08693715bf2cee803951271e30889fd032b29d8..0000000000000000000000000000000000000000 --- a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/export/result-model-280000.pth.json +++ /dev/null @@ -1 +0,0 @@ -{"val/loss": 2.311976771200857, "val/accuracy": 0.5189691358996976, "val/perplexity": 10.094359184046882, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.193172478527756, "lambada/accuracy/total": 0.37577639751552794, "lambada/accuracy/openai_last_token": 0.8016304347826086, "lambada/perplexity": 6.779006294918139, "lambada/lm_loss": 2.8871369140285337, "lambada/lm_perplexity": 17.941866886848043, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.4473727667076128, "mean_loss": 2.2525746248643066, "blimp/accuracy/passive_2": 0.912, "blimp/accuracy/determiner_noun_agreement_2": 0.984, "blimp/accuracy/ellipsis_n_bar_1": 0.84, "blimp/accuracy/tough_vs_raising_2": 0.873, "blimp/accuracy/tough_vs_raising_1": 0.603, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.855, "blimp/accuracy/principle_A_reconstruction": 0.421, "blimp/accuracy/wh_vs_that_with_gap": 0.429, "blimp/accuracy/principle_A_domain_2": 0.888, "blimp/accuracy/determiner_noun_agreement_1": 0.989, "blimp/accuracy/ellipsis_n_bar_2": 0.918, "blimp/accuracy/principle_A_domain_3": 0.634, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.92, "blimp/accuracy/animate_subject_trans": 0.902, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.914, "blimp/accuracy/distractor_agreement_relative_clause": 0.726, "blimp/accuracy/transitive": 0.901, "blimp/accuracy/sentential_subject_island": 0.328, "blimp/accuracy/adjunct_island": 0.868, "blimp/accuracy/intransitive": 0.758, "blimp/accuracy/existential_there_subject_raising": 0.909, "blimp/accuracy/irregular_past_participle_adjectives": 0.769, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.725, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.291, "blimp/accuracy/only_npi_scope": 0.679, "blimp/accuracy/superlative_quantifiers_2": 0.782, "blimp/accuracy/passive_1": 0.901, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.914, "blimp/accuracy/inchoative": 0.609, "blimp/accuracy/anaphor_gender_agreement": 0.977, "blimp/accuracy/principle_A_c_command": 0.698, "blimp/accuracy/only_npi_licensor_present": 0.856, "blimp/accuracy/expletive_it_object_raising": 0.778, "blimp/accuracy/left_branch_island_simple_question": 0.799, "blimp/accuracy/wh_questions_subject_gap": 0.941, "blimp/accuracy/existential_there_quantifiers_2": 0.55, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.949, "blimp/accuracy/sentential_negation_npi_scope": 0.677, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.839, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.914, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.874, "blimp/accuracy/principle_A_case_2": 0.932, "blimp/accuracy/distractor_agreement_relational_noun": 0.866, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.956, "blimp/accuracy/superlative_quantifiers_1": 0.835, "blimp/accuracy/wh_island": 0.768, "blimp/accuracy/principle_A_domain_1": 0.993, "blimp/accuracy/complex_NP_island": 0.622, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.982, "blimp/accuracy/irregular_past_participle_verbs": 0.927, "blimp/accuracy/drop_argument": 0.727, "blimp/accuracy/wh_questions_object_gap": 0.848, "blimp/accuracy/animate_subject_passive": 0.802, "blimp/accuracy/existential_there_quantifiers_1": 0.98, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.864, "blimp/accuracy/npi_present_2": 0.576, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.938, "blimp/accuracy/anaphor_number_agreement": 0.991, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.962, "blimp/accuracy/existential_there_object_raising": 0.86, "blimp/accuracy/matrix_question_npi_licensor_present": 0.433, "blimp/accuracy/npi_present_1": 0.615, "blimp/accuracy/wh_vs_that_no_gap": 0.98, "blimp/accuracy/left_branch_island_echo_question": 0.482, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.973, "blimp/accuracy/causative": 0.755, "blimp/accuracy/group_average": 0.8024029850746269, "blimp/accuracy/seq_average": 0.8024029850746268, "cbt/accuracy/NE": 0.8261217948717948, "cbt/accuracy/V": 0.9348, "cbt/accuracy/CN": 0.8856, "cbt/accuracy/P": 0.9168, "cbt/accuracy/group_average": 0.8908304487179487, "cbt/accuracy/seq_average": 0.8908563425370148, "hellaswag/accuracy/val": 0.3626767576180044, "hellaswag/accuracy/group_average": 0.3626767576180044, "hellaswag/accuracy/seq_average": 0.3626767576180044, "piqa/accuracy/val": 0.6322089227421109, "piqa/accuracy/group_average": 0.6322089227421109, "piqa/accuracy/seq_average": 0.6322089227421109, "ai2arc/accuracy/ARC-Easy": 0.38054968287526425, "ai2arc/accuracy/ARC-Challenge": 0.2257510729613734, "ai2arc/accuracy/group_average": 0.30315037791831884, "ai2arc/accuracy/seq_average": 0.32946175637393765, "mmlu/accuracy/MMLU": 0.25927779764032893, "mmlu/accuracy/group_average": 0.25927779764032893, "mmlu/accuracy/seq_average": 0.25927779764032893, "openbookqa/accuracy/test": 0.276, "openbookqa/accuracy/group_average": 0.276, "openbookqa/accuracy/seq_average": 0.276, "race/accuracy/test/high": 0.28987993138936535, "race/accuracy/test/middle": 0.3767409470752089, "race/accuracy/group_average": 0.3333104392322871, "race/accuracy/seq_average": 0.3151601134981759, "siqa/accuracy/dev": 0.37461617195496416, "siqa/accuracy/group_average": 0.37461617195496416, "siqa/accuracy/seq_average": 0.37461617195496416, "winogrande/accuracy/dev": 0.5059194948697711, "winogrande/accuracy/group_average": 0.5059194948697711, "winogrande/accuracy/seq_average": 0.5059194948697711, "commonsenseqa/accuracy/dev_rand_split": 0.2702702702702703, "commonsenseqa/accuracy/group_average": 0.2702702702702703, "commonsenseqa/accuracy/seq_average": 0.2702702702702703} \ No newline at end of file diff --git a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/sigmoid/events.out.tfevents.1744239232.SPP00018465.647722.0 b/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/sigmoid/events.out.tfevents.1744239232.SPP00018465.647722.0 deleted file mode 100644 index 865f312c5ed0aefddc0b153986032a4146203dcb..0000000000000000000000000000000000000000 --- a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/sigmoid/events.out.tfevents.1744239232.SPP00018465.647722.0 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8da7218d7d34003c7c4b8eb775957ab483772a5db038ffcce5360240c76cfa9f -size 359384212 diff --git a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/smoe/tensorboard/events.out.tfevents.1743294880.SPP00018465.3718870.0 b/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/smoe/tensorboard/events.out.tfevents.1743294880.SPP00018465.3718870.0 deleted file mode 100644 index 5dd2e8e9f07ec29ca16a42667e253e6eabc31ee1..0000000000000000000000000000000000000000 --- a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/smoe/tensorboard/events.out.tfevents.1743294880.SPP00018465.3718870.0 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0a7fb0751cd8982434bc13ae03a2e5a616de13f91c6ffc4732f667c3ce444dd2 -size 139998781 diff --git a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/smoe/tensorboard/events.out.tfevents.1743339370.SPP00018465.3058064.0 b/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/smoe/tensorboard/events.out.tfevents.1743339370.SPP00018465.3058064.0 deleted file mode 100644 index 3320ea47863b6af0adccdfa3b01c10895f57b787..0000000000000000000000000000000000000000 --- a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/smoe/tensorboard/events.out.tfevents.1743339370.SPP00018465.3058064.0 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0418ed236938d85323b4735d3b794cbf738aae600fe598a31216f714994a300c -size 30546837 diff --git a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/smoe/tensorboard/events.out.tfevents.1743379682.SPP00018465.1817913.0 b/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/smoe/tensorboard/events.out.tfevents.1743379682.SPP00018465.1817913.0 deleted file mode 100644 index 3ec50d9e7d083ad52e1c09aec605dfe0a07aa197..0000000000000000000000000000000000000000 --- a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/smoe/tensorboard/events.out.tfevents.1743379682.SPP00018465.1817913.0 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b15c6908d9fea692a5800132872079e1ca799db275465fa354e925bd6eff2db6 -size 85063073 diff --git a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/smoe/tensorboard/events.out.tfevents.1743599190.ithndgx005.2694658.0 b/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/smoe/tensorboard/events.out.tfevents.1743599190.ithndgx005.2694658.0 deleted file mode 100644 index 9563fc4c0bf13b919ae0516972a9f9d301726902..0000000000000000000000000000000000000000 --- a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/smoe/tensorboard/events.out.tfevents.1743599190.ithndgx005.2694658.0 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d0013e85aa578110994dd227dbb6abcf1ae90631515efd655034b4b31278101d -size 88 diff --git a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/smoe/tensorboard/events.out.tfevents.1743599273.ithndgx005.2727181.0 b/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/smoe/tensorboard/events.out.tfevents.1743599273.ithndgx005.2727181.0 deleted file mode 100644 index bd7a5e091839530677033438cb93a3f873ee3629..0000000000000000000000000000000000000000 --- a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/smoe/tensorboard/events.out.tfevents.1743599273.ithndgx005.2727181.0 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f08dcbd05fd327e0c44efcabf1c28ba091324cda7a92a15ecb4f437519599bf0 -size 261603 diff --git a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/smoe/tensorboard/events.out.tfevents.1743636153.ithndgx005.2724020.0 b/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/smoe/tensorboard/events.out.tfevents.1743636153.ithndgx005.2724020.0 deleted file mode 100644 index 6f4fd7e2472106e40b69ef2e6e3a3a6c696dcfe1..0000000000000000000000000000000000000000 --- a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/smoe/tensorboard/events.out.tfevents.1743636153.ithndgx005.2724020.0 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:60bebbadded19b65c16711a226ea1e2690617b694fc47b703020d2415ff7cad7 -size 63790453 diff --git a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/smoe/tensorboard/events.out.tfevents.1743824448.ithndgx005.4052318.0 b/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/smoe/tensorboard/events.out.tfevents.1743824448.ithndgx005.4052318.0 deleted file mode 100644 index cf2252e57a7978a7959ca0b974e303c65fd07299..0000000000000000000000000000000000000000 --- a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/smoe/tensorboard/events.out.tfevents.1743824448.ithndgx005.4052318.0 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2fab41363ad1cbdcf28b24cfea2fc062e8222aa7de86bdb8db544a46e2861731 -size 99730171 diff --git a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/tensorboard/events.out.tfevents.1753615266.SPP00018465.3308355.0 b/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/tensorboard/events.out.tfevents.1753615266.SPP00018465.3308355.0 deleted file mode 100644 index 418a271deedd631748ec81e0e174e4d8674bd8db..0000000000000000000000000000000000000000 --- a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/tensorboard/events.out.tfevents.1753615266.SPP00018465.3308355.0 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:69a6058903b1a0ec20f6411e23a080b09debef1de7d73f1dc5f37c78e80d12e5 -size 208392400 diff --git a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/tensorboard/events.out.tfevents.1753692351.SPP00018465.1406893.0 b/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/tensorboard/events.out.tfevents.1753692351.SPP00018465.1406893.0 deleted file mode 100644 index 0e678b9dc238a4be48a2ef27e727d44214ea35ea..0000000000000000000000000000000000000000 --- a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/tensorboard/events.out.tfevents.1753692351.SPP00018465.1406893.0 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:74946a527fe2dabab7e1aed8a09947a36f903b7da6fc67f24cefa746d9d7d03e -size 88 diff --git a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/tensorboard/events.out.tfevents.1753692378.SPP00018465.1407946.0 b/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/tensorboard/events.out.tfevents.1753692378.SPP00018465.1407946.0 deleted file mode 100644 index 1bcc43f83313f65ddbc42046b17710ab381a97ef..0000000000000000000000000000000000000000 --- a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/tensorboard/events.out.tfevents.1753692378.SPP00018465.1407946.0 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:1a3b4a7336fc81be2d15e95e286049725a142afe2738d9036d9d7c0e48c0fb0a -size 5255738 diff --git a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/tensorboard/events.out.tfevents.1753695179.SPP00018465.1496769.0 b/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/tensorboard/events.out.tfevents.1753695179.SPP00018465.1496769.0 deleted file mode 100644 index d26c3ae44f4befb9ccd5c04821b97723c9648cbe..0000000000000000000000000000000000000000 --- a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/tensorboard/events.out.tfevents.1753695179.SPP00018465.1496769.0 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:23926b9a6438499f9877a5ae72df2ec8d3b16497d9fc05744d54fc8b870e2019 -size 457 diff --git a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/tensorboard/events.out.tfevents.1753695548.SPP00018465.1509166.0 b/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/tensorboard/events.out.tfevents.1753695548.SPP00018465.1509166.0 deleted file mode 100644 index eceb603631efa983d0f4ec6986328c9022401700..0000000000000000000000000000000000000000 --- a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/tensorboard/events.out.tfevents.1753695548.SPP00018465.1509166.0 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f335eda9dcd3fbfef3ec80c45bb8c6c2144fc37d1ea8529fc376db1dfe43affb -size 10328391 diff --git a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/tensorboard/events.out.tfevents.1753699482.SPP00018465.1619327.0 b/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/tensorboard/events.out.tfevents.1753699482.SPP00018465.1619327.0 deleted file mode 100644 index df4bf746f24c8ad14615f6562a954f61f0a9d91f..0000000000000000000000000000000000000000 --- a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/tensorboard/events.out.tfevents.1753699482.SPP00018465.1619327.0 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:5a998b5c5988c98abddde14365455a7c537d5186b2a4a38f5b7b3069769bc81d -size 88 diff --git a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/tensorboard/events.out.tfevents.1753773569.SPP00018465.3860334.0 b/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/tensorboard/events.out.tfevents.1753773569.SPP00018465.3860334.0 deleted file mode 100644 index 7223f22859a00eb8a15d5458e3e4e72361f90c3d..0000000000000000000000000000000000000000 --- a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/tensorboard/events.out.tfevents.1753773569.SPP00018465.3860334.0 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6944c2c0c74e2ed9af4142b008638945c6cda609776d10f08ccb3b547e139dfb -size 235359627 diff --git a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/tensorboard_competesmoe_div0.01/events.out.tfevents.1753721324.SPP00018465.2288322.0 b/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/tensorboard_competesmoe_div0.01/events.out.tfevents.1753721324.SPP00018465.2288322.0 deleted file mode 100644 index bd9631bd54c326d239b01a6b3cc1eaa41e5e6bcc..0000000000000000000000000000000000000000 --- a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/tensorboard_competesmoe_div0.01/events.out.tfevents.1753721324.SPP00018465.2288322.0 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:1e7f6c1d913291f6ad8c87add0d8bada0b33a2f349fe7455794f61bd714a1384 -size 21426092 diff --git a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/tmpx/model-205000.pth b/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/tmpx/model-205000.pth deleted file mode 100644 index 367b4a1e85c375b2b6068fd185dcea1b6a0b0f47..0000000000000000000000000000000000000000 --- a/Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/tmpx/model-205000.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:7515f5c97a50c771c1f7b84f444267e6aadabf5cad5b46067aca6b2d08da4ce6 -size 8147935240