DavidNguyen commited on
Commit
67f54a4
·
verified ·
1 Parent(s): 55b3396

deleted folder error config

Browse files
Files changed (50) hide show
  1. Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb_down_flip/checkpoint/model-100000.pth +0 -3
  2. Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb_down_flip/checkpoint/model-110000.pth +0 -3
  3. Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb_down_flip/checkpoint/model-120000.pth +0 -3
  4. Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb_down_flip/checkpoint/model-130000.pth +0 -3
  5. Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb_down_flip/checkpoint/model-140000.pth +0 -3
  6. Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb_down_flip/checkpoint/model-150000.pth +0 -3
  7. Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb_down_flip/checkpoint/model-160000.pth +0 -3
  8. Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb_down_flip/checkpoint/model-170000.pth +0 -3
  9. Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb_down_flip/checkpoint/model-180000.pth +0 -3
  10. Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb_down_flip/checkpoint/model-190000.pth +0 -3
  11. Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb_down_flip/checkpoint/model-20000.pth +0 -3
  12. Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb_down_flip/checkpoint/model-200000.pth +0 -3
  13. Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb_down_flip/checkpoint/model-210000.pth +0 -3
  14. Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb_down_flip/checkpoint/model-220000.pth +0 -3
  15. Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb_down_flip/checkpoint/model-230000.pth +0 -3
  16. Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb_down_flip/checkpoint/model-240000.pth +0 -3
  17. Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb_down_flip/checkpoint/model-250000.pth +0 -3
  18. Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb_down_flip/checkpoint/model-260000.pth +0 -3
  19. Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb_down_flip/checkpoint/model-270000.pth +0 -3
  20. Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb_down_flip/checkpoint/model-280000.pth +0 -3
  21. Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb_down_flip/checkpoint/model-290000.pth +0 -3
  22. Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb_down_flip/checkpoint/model-30000.pth +0 -3
  23. Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb_down_flip/checkpoint/model-300000.pth +0 -3
  24. Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb_down_flip/checkpoint/model-310000.pth +0 -3
  25. Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb_down_flip/checkpoint/model-320000.pth +0 -3
  26. Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb_down_flip/checkpoint/model-330000.pth +0 -3
  27. Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb_down_flip/checkpoint/model-340000.pth +0 -3
  28. Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb_down_flip/checkpoint/model-350000.pth +0 -3
  29. Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb_down_flip/checkpoint/model-360000.pth +0 -3
  30. Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb_down_flip/checkpoint/model-370000.pth +0 -3
  31. Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb_down_flip/checkpoint/model-380000.pth +0 -3
  32. Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb_down_flip/checkpoint/model-390000.pth +0 -3
  33. Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb_down_flip/checkpoint/model-40000.pth +0 -3
  34. Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb_down_flip/checkpoint/model-400000.pth +0 -3
  35. Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb_down_flip/checkpoint/model-50000.pth +0 -3
  36. Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb_down_flip/checkpoint/model-60000.pth +0 -3
  37. Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb_down_flip/checkpoint/model-70000.pth +0 -3
  38. Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb_down_flip/checkpoint/model-80000.pth +0 -3
  39. Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb_down_flip/checkpoint/model-90000.pth +0 -3
  40. Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb_down_flip/export/result-model-120000.pth.json +0 -1
  41. Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb_down_flip/export/result-model-120001.pth.json +0 -1
  42. Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb_down_flip/export/result-model-140000.pth.json +0 -1
  43. Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb_down_flip/export/result-model-160000.pth.json +0 -1
  44. Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb_down_flip/export/result-model-180000.pth.json +0 -1
  45. Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb_down_flip/export/result-model-200000.pth.json +0 -1
  46. Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb_down_flip/export/result-model-360000.pth.json +0 -1
  47. Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb_down_flip/export/result-model-380000.pth.json +0 -1
  48. Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb_down_flip/tensorboard/events.out.tfevents.1753821939.SPP00018465.1233037.0 +0 -3
  49. Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb_down_flip/tensorboard/events.out.tfevents.1753821993.SPP00018465.1236599.0 +0 -3
  50. Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb_down_flip/tensorboard/events.out.tfevents.1753822261.SPP00018465.1253372.0 +0 -3
Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb_down_flip/checkpoint/model-100000.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:1c794e9bfa83ad5382f28297111236b32f41abfcc055aaf9ac45e457122151a2
3
- size 8147754376
 
 
 
 
Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb_down_flip/checkpoint/model-110000.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:e540694dfbb51460ae965cb47e8edecc190981e00e79b0855f99f2447de39d17
3
- size 8147754376
 
 
 
 
Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb_down_flip/checkpoint/model-120000.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:8ba67e8d503884c61b66b2ad6972f835352d7550fa6aadfd225870271a6d3d7f
3
- size 8147754376
 
 
 
 
Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb_down_flip/checkpoint/model-130000.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:17e6adb76a603056636897bb3a61ab4675fb7f181e0ba73d083e50122100d925
3
- size 8147754376
 
 
 
 
Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb_down_flip/checkpoint/model-140000.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:955daae01dc367d75e242ed40b6f99dc08bd676cbe4ecbc441c14441be1b4021
3
- size 8147754376
 
 
 
 
Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb_down_flip/checkpoint/model-150000.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:95c8e92db9dbc134dd20c4c8a63b2b89e0da68b141cdfd27b7e5d090f4eb5042
3
- size 8147754376
 
 
 
 
Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb_down_flip/checkpoint/model-160000.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:88e3de641dd47acec59a109fe9bce28fac91614d0b5c1e1f3689954d633fb738
3
- size 8147754376
 
 
 
 
Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb_down_flip/checkpoint/model-170000.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:c83b9f119a1f01fde8f952edc1a6a5609fbaf4a29afdfbb4ddf271a16082bdc3
3
- size 8147754376
 
 
 
 
Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb_down_flip/checkpoint/model-180000.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:eb72680f49a40bd7aa8898a55a31d8d361b1e9bbeac2fe4f7ed5202775c1c147
3
- size 8147754376
 
 
 
 
Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb_down_flip/checkpoint/model-190000.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:ad3140f47f66523c570c764b9e38f5d3f107e2433643243dc861e447e04abd9c
3
- size 8147754376
 
 
 
 
Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb_down_flip/checkpoint/model-20000.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:aff429fe62427a1cbe005a5e3a410d0083747ba7171822b49a2d4eac3910d134
3
- size 8140908610
 
 
 
 
Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb_down_flip/checkpoint/model-200000.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:758dffafda3dc6e9c289c4378971bcf4f5d8af255afb1d9138569a9cdb0c4c9b
3
- size 8147754376
 
 
 
 
Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb_down_flip/checkpoint/model-210000.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:7061d6d0d07f10233f9f67bcfbe7fe8209eba27166ba69c02f7978c21c50b0b4
3
- size 8147754376
 
 
 
 
Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb_down_flip/checkpoint/model-220000.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:4b03b526214df6436c74bb2e2a891650806cce45c926f5f27038ceb7b746afc9
3
- size 8147754376
 
 
 
 
Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb_down_flip/checkpoint/model-230000.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:36bff9c7b8689edd2029be597e85a54853b66e440339b6ad24a156a33ab32821
3
- size 8147754376
 
 
 
 
Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb_down_flip/checkpoint/model-240000.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:5bdba3d2613e7a8ea9bb5b586c023d63a115a520735e13b96ddbcf2c53ffb7b8
3
- size 8147754376
 
 
 
 
Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb_down_flip/checkpoint/model-250000.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:43fac73e36f0cef866291a83f7923efbd10a7942d629ab4c0f5f9d2072158820
3
- size 8147754376
 
 
 
 
Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb_down_flip/checkpoint/model-260000.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:18f24f1785b5936bf53a9100bab9e0926efff59b7dd3b28fecd4f942da884fdf
3
- size 8147754376
 
 
 
 
Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb_down_flip/checkpoint/model-270000.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:18267faa6452945be84f362d490ef9378b1f26a4239b6f846f2eb105dd7b139c
3
- size 8147754376
 
 
 
 
Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb_down_flip/checkpoint/model-280000.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:ed03a37377a301a34e1988d349dbe13d127d3c4aaca4dc16a68adcff1109cdee
3
- size 8147754376
 
 
 
 
Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb_down_flip/checkpoint/model-290000.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:494e0b7f75f82402b5a2dc7bb3670b188581107f14b73a91f97ccf3625adf421
3
- size 8147754376
 
 
 
 
Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb_down_flip/checkpoint/model-30000.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:b2e1de06ecd0dbb4e19a908030241d50de5acb0c549f2c5e2604be57d0a650f6
3
- size 8147754376
 
 
 
 
Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb_down_flip/checkpoint/model-300000.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:aef4656d95b02a97a290651fe7a75020aed8d3ed8201015dd2c24961c1f0790a
3
- size 8147754376
 
 
 
 
Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb_down_flip/checkpoint/model-310000.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:adfc52c119d1172bfb9eeb1e083bbe928714b5a284825fca8e319f7dc4588f70
3
- size 8147754376
 
 
 
 
Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb_down_flip/checkpoint/model-320000.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:67c19aa7a1215b74c464ea742765bf1fcca1fa1a46e4f50893972ec25452f3d6
3
- size 8147754376
 
 
 
 
Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb_down_flip/checkpoint/model-330000.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:31c8db601cb8b037c7c28feba549ad9b28aaf3862fb3050f810b0ab3c35aa5a3
3
- size 8147754376
 
 
 
 
Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb_down_flip/checkpoint/model-340000.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:5d8f34cd11460fb57678504558b52fb74300c81766c914c4eda28d2b30c098ba
3
- size 8147754376
 
 
 
 
Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb_down_flip/checkpoint/model-350000.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:46f277fcb0649cb436adea0172ad3348043d99c755f4c32c3b6d0db5552b7a3b
3
- size 8147754376
 
 
 
 
Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb_down_flip/checkpoint/model-360000.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:cdcc444664133b3fb7b836bedfdba22cc610f965cfcbd54c23ec08c0e5aa48aa
3
- size 8147754376
 
 
 
 
Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb_down_flip/checkpoint/model-370000.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:43eb6423fa0a99fed789b8bda21865fcd38f8d88f0c9867fb68f1683ca501f53
3
- size 8147754376
 
 
 
 
Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb_down_flip/checkpoint/model-380000.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:3e34da56f112de6484c576b237ed47484f021e7b479f39b92d2afba4852ac2a4
3
- size 8147754376
 
 
 
 
Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb_down_flip/checkpoint/model-390000.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:25f794e30918b3e590448408ec31c8619fa793ae29dad45382c2bfc7f6d0564e
3
- size 8147754376
 
 
 
 
Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb_down_flip/checkpoint/model-40000.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:7f99633440f9c1bc9e5885d1a515e5a2ca4ebb67739e33a957c51ee46925a99b
3
- size 8147754376
 
 
 
 
Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb_down_flip/checkpoint/model-400000.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:eb690f106878c003d5eec96224ae7ac5ef4c936dcf2448396c8460aa2ca493c2
3
- size 8147754376
 
 
 
 
Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb_down_flip/checkpoint/model-50000.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:7b696f61e1eb786b29445bd258592d385b07e849f248b8f7e1ae2a6932afc6cb
3
- size 8147754376
 
 
 
 
Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb_down_flip/checkpoint/model-60000.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:8bd5e29ca8e5092a31a2548ce3632041e95f3ae626dc7549a6b2eacc0ab15132
3
- size 8147754376
 
 
 
 
Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb_down_flip/checkpoint/model-70000.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:79c2f2f9ad415a30a6824931b0b1f1c2dec42c2b801b84f002d50be86493913f
3
- size 8147754376
 
 
 
 
Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb_down_flip/checkpoint/model-80000.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:83252e4c35993fd96db983f21d3124bfe70871b9a9c01d2762b59e194a2df256
3
- size 8147754376
 
 
 
 
Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb_down_flip/checkpoint/model-90000.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:4ecb651f3ef1f7ec73416774c938fca62f79fb6a079d5e2844565d97bd6b4378
3
- size 8147754376
 
 
 
 
Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb_down_flip/export/result-model-120000.pth.json DELETED
@@ -1 +0,0 @@
1
- {"val/loss": 2.4369583129882812, "val/accuracy": 0.5002707204511089, "val/perplexity": 11.438196363495134, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.3129389982045807, "lambada/accuracy/total": 0.34627329192546585, "lambada/accuracy/openai_last_token": 0.7878493788819876, "lambada/perplexity": 7.96804092333307, "lambada/lm_loss": 3.0043084211430595, "lambada/lm_perplexity": 20.172260561966155, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.42327200618828736, "mean_loss": 2.374948655596431, "blimp/accuracy/passive_2": 0.918, "blimp/accuracy/determiner_noun_agreement_2": 0.984, "blimp/accuracy/ellipsis_n_bar_1": 0.836, "blimp/accuracy/tough_vs_raising_2": 0.881, "blimp/accuracy/tough_vs_raising_1": 0.6, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.896, "blimp/accuracy/principle_A_reconstruction": 0.41, "blimp/accuracy/wh_vs_that_with_gap": 0.468, "blimp/accuracy/principle_A_domain_2": 0.883, "blimp/accuracy/determiner_noun_agreement_1": 0.993, "blimp/accuracy/ellipsis_n_bar_2": 0.918, "blimp/accuracy/principle_A_domain_3": 0.581, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.916, "blimp/accuracy/animate_subject_trans": 0.891, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.9, "blimp/accuracy/distractor_agreement_relative_clause": 0.68, "blimp/accuracy/transitive": 0.866, "blimp/accuracy/sentential_subject_island": 0.399, "blimp/accuracy/adjunct_island": 0.862, "blimp/accuracy/intransitive": 0.764, "blimp/accuracy/existential_there_subject_raising": 0.869, "blimp/accuracy/irregular_past_participle_adjectives": 0.895, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.67, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.31, "blimp/accuracy/only_npi_scope": 0.691, "blimp/accuracy/superlative_quantifiers_2": 0.759, "blimp/accuracy/passive_1": 0.887, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.923, "blimp/accuracy/inchoative": 0.612, "blimp/accuracy/anaphor_gender_agreement": 0.969, "blimp/accuracy/principle_A_c_command": 0.682, "blimp/accuracy/only_npi_licensor_present": 0.65, "blimp/accuracy/expletive_it_object_raising": 0.784, "blimp/accuracy/left_branch_island_simple_question": 0.768, "blimp/accuracy/wh_questions_subject_gap": 0.929, "blimp/accuracy/existential_there_quantifiers_2": 0.411, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.95, "blimp/accuracy/sentential_negation_npi_scope": 0.749, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.792, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.918, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.918, "blimp/accuracy/principle_A_case_2": 0.92, "blimp/accuracy/distractor_agreement_relational_noun": 0.857, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.99, "blimp/accuracy/superlative_quantifiers_1": 0.822, "blimp/accuracy/wh_island": 0.789, "blimp/accuracy/principle_A_domain_1": 0.994, "blimp/accuracy/complex_NP_island": 0.595, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.973, "blimp/accuracy/irregular_past_participle_verbs": 0.877, "blimp/accuracy/drop_argument": 0.736, "blimp/accuracy/wh_questions_object_gap": 0.809, "blimp/accuracy/animate_subject_passive": 0.819, "blimp/accuracy/existential_there_quantifiers_1": 0.976, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.888, "blimp/accuracy/npi_present_2": 0.612, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.964, "blimp/accuracy/anaphor_number_agreement": 0.986, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.955, "blimp/accuracy/existential_there_object_raising": 0.843, "blimp/accuracy/matrix_question_npi_licensor_present": 0.295, "blimp/accuracy/npi_present_1": 0.646, "blimp/accuracy/wh_vs_that_no_gap": 0.979, "blimp/accuracy/left_branch_island_echo_question": 0.473, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.957, "blimp/accuracy/causative": 0.755, "blimp/accuracy/group_average": 0.7954029850746269, "blimp/accuracy/seq_average": 0.7954029850746268, "cbt/accuracy/NE": 0.8020833333333334, "cbt/accuracy/V": 0.93, "cbt/accuracy/CN": 0.8656, "cbt/accuracy/P": 0.904, "cbt/accuracy/group_average": 0.8754208333333333, "cbt/accuracy/seq_average": 0.8754501800720288, "hellaswag/accuracy/val": 0.3337980481975702, "hellaswag/accuracy/group_average": 0.3337980481975702, "hellaswag/accuracy/seq_average": 0.3337980481975702, "piqa/accuracy/val": 0.6218715995647442, "piqa/accuracy/group_average": 0.6218715995647442, "piqa/accuracy/seq_average": 0.6218715995647442, "ai2arc/accuracy/ARC-Easy": 0.37251585623678646, "ai2arc/accuracy/ARC-Challenge": 0.21716738197424892, "ai2arc/accuracy/group_average": 0.2948416191055177, "ai2arc/accuracy/seq_average": 0.3212464589235127, "mmlu/accuracy/MMLU": 0.2661422953164104, "mmlu/accuracy/group_average": 0.2661422953164104, "mmlu/accuracy/seq_average": 0.2661422953164104, "openbookqa/accuracy/test": 0.29, "openbookqa/accuracy/group_average": 0.29, "openbookqa/accuracy/seq_average": 0.29, "race/accuracy/test/high": 0.2804459691252144, "race/accuracy/test/middle": 0.346100278551532, "race/accuracy/group_average": 0.31327312383837325, "race/accuracy/seq_average": 0.2995541143088772, "siqa/accuracy/dev": 0.3607983623336745, "siqa/accuracy/group_average": 0.3607983623336745, "siqa/accuracy/seq_average": 0.3607983623336745, "winogrande/accuracy/dev": 0.5019731649565904, "winogrande/accuracy/group_average": 0.5019731649565904, "winogrande/accuracy/seq_average": 0.5019731649565904, "commonsenseqa/accuracy/dev_rand_split": 0.25307125307125306, "commonsenseqa/accuracy/group_average": 0.25307125307125306, "commonsenseqa/accuracy/seq_average": 0.25307125307125306}
 
 
Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb_down_flip/export/result-model-120001.pth.json DELETED
@@ -1 +0,0 @@
1
- {"val/loss": 2.42450443390877, "val/accuracy": 0.5012768160912299, "val/perplexity": 11.296629805228108, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.5750654706303377, "lambada/accuracy/total": 0.3513198757763975, "lambada/accuracy/openai_last_token": 0.7905667701863354, "lambada/perplexity": 7.749698948866573, "lambada/lm_loss": 2.9992504132941376, "lambada/lm_perplexity": 20.07048671315332, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.42629834593381366, "mean_loss": 2.499784952269554, "blimp/accuracy/passive_2": 0.898, "blimp/accuracy/determiner_noun_agreement_2": 0.987, "blimp/accuracy/ellipsis_n_bar_1": 0.846, "blimp/accuracy/tough_vs_raising_2": 0.894, "blimp/accuracy/tough_vs_raising_1": 0.587, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.915, "blimp/accuracy/principle_A_reconstruction": 0.457, "blimp/accuracy/wh_vs_that_with_gap": 0.442, "blimp/accuracy/principle_A_domain_2": 0.859, "blimp/accuracy/determiner_noun_agreement_1": 0.995, "blimp/accuracy/ellipsis_n_bar_2": 0.925, "blimp/accuracy/principle_A_domain_3": 0.572, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.914, "blimp/accuracy/animate_subject_trans": 0.905, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.906, "blimp/accuracy/distractor_agreement_relative_clause": 0.662, "blimp/accuracy/transitive": 0.866, "blimp/accuracy/sentential_subject_island": 0.327, "blimp/accuracy/adjunct_island": 0.873, "blimp/accuracy/intransitive": 0.763, "blimp/accuracy/existential_there_subject_raising": 0.893, "blimp/accuracy/irregular_past_participle_adjectives": 0.86, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.759, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.326, "blimp/accuracy/only_npi_scope": 0.66, "blimp/accuracy/superlative_quantifiers_2": 0.871, "blimp/accuracy/passive_1": 0.88, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.898, "blimp/accuracy/inchoative": 0.611, "blimp/accuracy/anaphor_gender_agreement": 0.971, "blimp/accuracy/principle_A_c_command": 0.631, "blimp/accuracy/only_npi_licensor_present": 0.723, "blimp/accuracy/expletive_it_object_raising": 0.767, "blimp/accuracy/left_branch_island_simple_question": 0.864, "blimp/accuracy/wh_questions_subject_gap": 0.952, "blimp/accuracy/existential_there_quantifiers_2": 0.508, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.945, "blimp/accuracy/sentential_negation_npi_scope": 0.709, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.838, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.912, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.908, "blimp/accuracy/principle_A_case_2": 0.931, "blimp/accuracy/distractor_agreement_relational_noun": 0.848, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.975, "blimp/accuracy/superlative_quantifiers_1": 0.924, "blimp/accuracy/wh_island": 0.769, "blimp/accuracy/principle_A_domain_1": 0.994, "blimp/accuracy/complex_NP_island": 0.586, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.969, "blimp/accuracy/irregular_past_participle_verbs": 0.885, "blimp/accuracy/drop_argument": 0.731, "blimp/accuracy/wh_questions_object_gap": 0.859, "blimp/accuracy/animate_subject_passive": 0.791, "blimp/accuracy/existential_there_quantifiers_1": 0.976, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.898, "blimp/accuracy/npi_present_2": 0.528, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.962, "blimp/accuracy/anaphor_number_agreement": 0.987, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.955, "blimp/accuracy/existential_there_object_raising": 0.825, "blimp/accuracy/matrix_question_npi_licensor_present": 0.256, "blimp/accuracy/npi_present_1": 0.572, "blimp/accuracy/wh_vs_that_no_gap": 0.983, "blimp/accuracy/left_branch_island_echo_question": 0.515, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.957, "blimp/accuracy/causative": 0.729, "blimp/accuracy/group_average": 0.7982686567164178, "blimp/accuracy/seq_average": 0.7982686567164179, "cbt/accuracy/NE": 0.797676282051282, "cbt/accuracy/V": 0.934, "cbt/accuracy/CN": 0.8664, "cbt/accuracy/P": 0.9016, "cbt/accuracy/group_average": 0.8749190705128205, "cbt/accuracy/seq_average": 0.8749499799919968, "hellaswag/accuracy/val": 0.3333001394144593, "hellaswag/accuracy/group_average": 0.3333001394144593, "hellaswag/accuracy/seq_average": 0.3333001394144593, "piqa/accuracy/val": 0.6180631120783461, "piqa/accuracy/group_average": 0.6180631120783461, "piqa/accuracy/seq_average": 0.6180631120783461, "ai2arc/accuracy/ARC-Easy": 0.3602536997885835, "ai2arc/accuracy/ARC-Challenge": 0.2257510729613734, "ai2arc/accuracy/group_average": 0.29300238637497844, "ai2arc/accuracy/seq_average": 0.31586402266288954, "mmlu/accuracy/MMLU": 0.26642831605291384, "mmlu/accuracy/group_average": 0.26642831605291384, "mmlu/accuracy/seq_average": 0.26642831605291384, "openbookqa/accuracy/test": 0.28, "openbookqa/accuracy/group_average": 0.28, "openbookqa/accuracy/seq_average": 0.28, "race/accuracy/test/high": 0.2804459691252144, "race/accuracy/test/middle": 0.362116991643454, "race/accuracy/group_average": 0.32128148038433424, "race/accuracy/seq_average": 0.30421564653425215, "siqa/accuracy/dev": 0.3694984646878199, "siqa/accuracy/group_average": 0.3694984646878199, "siqa/accuracy/seq_average": 0.3694984646878199, "winogrande/accuracy/dev": 0.5114443567482242, "winogrande/accuracy/group_average": 0.5114443567482242, "winogrande/accuracy/seq_average": 0.5114443567482242, "commonsenseqa/accuracy/dev_rand_split": 0.26371826371826373, "commonsenseqa/accuracy/group_average": 0.26371826371826373, "commonsenseqa/accuracy/seq_average": 0.26371826371826373}
 
 
Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb_down_flip/export/result-model-140000.pth.json DELETED
@@ -1 +0,0 @@
1
- {"val/loss": 2.417517877394153, "val/accuracy": 0.5032575053553427, "val/perplexity": 11.217980327100483, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.217075892857143, "lambada/accuracy/total": 0.3223990683229814, "lambada/accuracy/openai_last_token": 0.7895962732919255, "lambada/perplexity": 8.033157605216047, "lambada/lm_loss": 2.9931819562798707, "lambada/lm_perplexity": 19.94905864044293, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.41282828683916206, "mean_loss": 2.317296885125648, "blimp/accuracy/passive_2": 0.893, "blimp/accuracy/determiner_noun_agreement_2": 0.984, "blimp/accuracy/ellipsis_n_bar_1": 0.866, "blimp/accuracy/tough_vs_raising_2": 0.908, "blimp/accuracy/tough_vs_raising_1": 0.591, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.906, "blimp/accuracy/principle_A_reconstruction": 0.498, "blimp/accuracy/wh_vs_that_with_gap": 0.516, "blimp/accuracy/principle_A_domain_2": 0.871, "blimp/accuracy/determiner_noun_agreement_1": 0.988, "blimp/accuracy/ellipsis_n_bar_2": 0.906, "blimp/accuracy/principle_A_domain_3": 0.601, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.922, "blimp/accuracy/animate_subject_trans": 0.917, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.913, "blimp/accuracy/distractor_agreement_relative_clause": 0.652, "blimp/accuracy/transitive": 0.896, "blimp/accuracy/sentential_subject_island": 0.352, "blimp/accuracy/adjunct_island": 0.899, "blimp/accuracy/intransitive": 0.77, "blimp/accuracy/existential_there_subject_raising": 0.883, "blimp/accuracy/irregular_past_participle_adjectives": 0.894, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.595, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.343, "blimp/accuracy/only_npi_scope": 0.794, "blimp/accuracy/superlative_quantifiers_2": 0.71, "blimp/accuracy/passive_1": 0.879, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.931, "blimp/accuracy/inchoative": 0.613, "blimp/accuracy/anaphor_gender_agreement": 0.979, "blimp/accuracy/principle_A_c_command": 0.618, "blimp/accuracy/only_npi_licensor_present": 0.736, "blimp/accuracy/expletive_it_object_raising": 0.752, "blimp/accuracy/left_branch_island_simple_question": 0.665, "blimp/accuracy/wh_questions_subject_gap": 0.947, "blimp/accuracy/existential_there_quantifiers_2": 0.465, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.952, "blimp/accuracy/sentential_negation_npi_scope": 0.695, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.825, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.911, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.901, "blimp/accuracy/principle_A_case_2": 0.921, "blimp/accuracy/distractor_agreement_relational_noun": 0.843, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.978, "blimp/accuracy/superlative_quantifiers_1": 0.673, "blimp/accuracy/wh_island": 0.704, "blimp/accuracy/principle_A_domain_1": 0.994, "blimp/accuracy/complex_NP_island": 0.64, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.971, "blimp/accuracy/irregular_past_participle_verbs": 0.92, "blimp/accuracy/drop_argument": 0.749, "blimp/accuracy/wh_questions_object_gap": 0.874, "blimp/accuracy/animate_subject_passive": 0.804, "blimp/accuracy/existential_there_quantifiers_1": 0.986, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.915, "blimp/accuracy/npi_present_2": 0.562, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.95, "blimp/accuracy/anaphor_number_agreement": 0.985, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.962, "blimp/accuracy/existential_there_object_raising": 0.836, "blimp/accuracy/matrix_question_npi_licensor_present": 0.324, "blimp/accuracy/npi_present_1": 0.536, "blimp/accuracy/wh_vs_that_no_gap": 0.982, "blimp/accuracy/left_branch_island_echo_question": 0.511, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.974, "blimp/accuracy/causative": 0.754, "blimp/accuracy/group_average": 0.7952985074626865, "blimp/accuracy/seq_average": 0.7952985074626866, "cbt/accuracy/NE": 0.8024839743589743, "cbt/accuracy/V": 0.9336, "cbt/accuracy/CN": 0.8684, "cbt/accuracy/P": 0.9088, "cbt/accuracy/group_average": 0.8783209935897436, "cbt/accuracy/seq_average": 0.8783513405362144, "hellaswag/accuracy/val": 0.34106751643098987, "hellaswag/accuracy/group_average": 0.34106751643098987, "hellaswag/accuracy/seq_average": 0.34106751643098987, "piqa/accuracy/val": 0.6213275299238302, "piqa/accuracy/group_average": 0.6213275299238302, "piqa/accuracy/seq_average": 0.6213275299238302, "ai2arc/accuracy/ARC-Easy": 0.3627906976744186, "ai2arc/accuracy/ARC-Challenge": 0.2429184549356223, "ai2arc/accuracy/group_average": 0.30285457630502044, "ai2arc/accuracy/seq_average": 0.32322946175637396, "mmlu/accuracy/MMLU": 0.2615659635323561, "mmlu/accuracy/group_average": 0.2615659635323561, "mmlu/accuracy/seq_average": 0.2615659635323561, "openbookqa/accuracy/test": 0.272, "openbookqa/accuracy/group_average": 0.272, "openbookqa/accuracy/seq_average": 0.272, "race/accuracy/test/high": 0.2815894797026873, "race/accuracy/test/middle": 0.33774373259052926, "race/accuracy/group_average": 0.30966660614660824, "race/accuracy/seq_average": 0.2979327117957033, "siqa/accuracy/dev": 0.3669396110542477, "siqa/accuracy/group_average": 0.3669396110542477, "siqa/accuracy/seq_average": 0.3669396110542477, "winogrande/accuracy/dev": 0.516179952644041, "winogrande/accuracy/group_average": 0.516179952644041, "winogrande/accuracy/seq_average": 0.516179952644041, "commonsenseqa/accuracy/dev_rand_split": 0.2620802620802621, "commonsenseqa/accuracy/group_average": 0.2620802620802621, "commonsenseqa/accuracy/seq_average": 0.2620802620802621}
 
 
Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb_down_flip/export/result-model-160000.pth.json DELETED
@@ -1 +0,0 @@
1
- {"val/loss": 2.3851849955897175, "val/accuracy": 0.5081767420614919, "val/perplexity": 10.861071729450302, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.21494440114276, "lambada/accuracy/total": 0.33482142857142855, "lambada/accuracy/openai_last_token": 0.7936723602484472, "lambada/perplexity": 7.655349224839091, "lambada/lm_loss": 2.9612883776869356, "lambada/lm_perplexity": 19.32285085511263, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.4214990853164602, "mean_loss": 2.3000646983662385, "blimp/accuracy/passive_2": 0.903, "blimp/accuracy/determiner_noun_agreement_2": 0.986, "blimp/accuracy/ellipsis_n_bar_1": 0.85, "blimp/accuracy/tough_vs_raising_2": 0.857, "blimp/accuracy/tough_vs_raising_1": 0.605, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.899, "blimp/accuracy/principle_A_reconstruction": 0.383, "blimp/accuracy/wh_vs_that_with_gap": 0.486, "blimp/accuracy/principle_A_domain_2": 0.905, "blimp/accuracy/determiner_noun_agreement_1": 0.99, "blimp/accuracy/ellipsis_n_bar_2": 0.908, "blimp/accuracy/principle_A_domain_3": 0.604, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.914, "blimp/accuracy/animate_subject_trans": 0.904, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.905, "blimp/accuracy/distractor_agreement_relative_clause": 0.666, "blimp/accuracy/transitive": 0.869, "blimp/accuracy/sentential_subject_island": 0.41, "blimp/accuracy/adjunct_island": 0.875, "blimp/accuracy/intransitive": 0.772, "blimp/accuracy/existential_there_subject_raising": 0.898, "blimp/accuracy/irregular_past_participle_adjectives": 0.971, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.674, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.373, "blimp/accuracy/only_npi_scope": 0.613, "blimp/accuracy/superlative_quantifiers_2": 0.705, "blimp/accuracy/passive_1": 0.876, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.921, "blimp/accuracy/inchoative": 0.614, "blimp/accuracy/anaphor_gender_agreement": 0.97, "blimp/accuracy/principle_A_c_command": 0.698, "blimp/accuracy/only_npi_licensor_present": 0.717, "blimp/accuracy/expletive_it_object_raising": 0.782, "blimp/accuracy/left_branch_island_simple_question": 0.784, "blimp/accuracy/wh_questions_subject_gap": 0.929, "blimp/accuracy/existential_there_quantifiers_2": 0.365, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.939, "blimp/accuracy/sentential_negation_npi_scope": 0.747, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.826, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.9, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.897, "blimp/accuracy/principle_A_case_2": 0.955, "blimp/accuracy/distractor_agreement_relational_noun": 0.822, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.983, "blimp/accuracy/superlative_quantifiers_1": 0.775, "blimp/accuracy/wh_island": 0.74, "blimp/accuracy/principle_A_domain_1": 0.994, "blimp/accuracy/complex_NP_island": 0.609, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.975, "blimp/accuracy/irregular_past_participle_verbs": 0.889, "blimp/accuracy/drop_argument": 0.741, "blimp/accuracy/wh_questions_object_gap": 0.835, "blimp/accuracy/animate_subject_passive": 0.8, "blimp/accuracy/existential_there_quantifiers_1": 0.961, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.907, "blimp/accuracy/npi_present_2": 0.573, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.958, "blimp/accuracy/anaphor_number_agreement": 0.989, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.953, "blimp/accuracy/existential_there_object_raising": 0.829, "blimp/accuracy/matrix_question_npi_licensor_present": 0.312, "blimp/accuracy/npi_present_1": 0.547, "blimp/accuracy/wh_vs_that_no_gap": 0.976, "blimp/accuracy/left_branch_island_echo_question": 0.551, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.96, "blimp/accuracy/causative": 0.729, "blimp/accuracy/group_average": 0.7948208955223879, "blimp/accuracy/seq_average": 0.7948208955223881, "cbt/accuracy/NE": 0.8064903846153846, "cbt/accuracy/V": 0.9376, "cbt/accuracy/CN": 0.8764, "cbt/accuracy/P": 0.9164, "cbt/accuracy/group_average": 0.8842225961538461, "cbt/accuracy/seq_average": 0.8842537014805922, "hellaswag/accuracy/val": 0.34644493128858794, "hellaswag/accuracy/group_average": 0.34644493128858794, "hellaswag/accuracy/seq_average": 0.34644493128858794, "piqa/accuracy/val": 0.6082698585418934, "piqa/accuracy/group_average": 0.6082698585418934, "piqa/accuracy/seq_average": 0.6082698585418934, "ai2arc/accuracy/ARC-Easy": 0.3564482029598309, "ai2arc/accuracy/ARC-Challenge": 0.21888412017167383, "ai2arc/accuracy/group_average": 0.28766616156575237, "ai2arc/accuracy/seq_average": 0.3110481586402266, "mmlu/accuracy/MMLU": 0.267786914551305, "mmlu/accuracy/group_average": 0.267786914551305, "mmlu/accuracy/seq_average": 0.267786914551305, "openbookqa/accuracy/test": 0.288, "openbookqa/accuracy/group_average": 0.288, "openbookqa/accuracy/seq_average": 0.288, "race/accuracy/test/high": 0.2833047455688965, "race/accuracy/test/middle": 0.366991643454039, "race/accuracy/group_average": 0.32514819451146776, "race/accuracy/seq_average": 0.30766112687474667, "siqa/accuracy/dev": 0.36745138178096215, "siqa/accuracy/group_average": 0.36745138178096215, "siqa/accuracy/seq_average": 0.36745138178096215, "winogrande/accuracy/dev": 0.5090765588003157, "winogrande/accuracy/group_average": 0.5090765588003157, "winogrande/accuracy/seq_average": 0.5090765588003157, "commonsenseqa/accuracy/dev_rand_split": 0.2588042588042588, "commonsenseqa/accuracy/group_average": 0.2588042588042588, "commonsenseqa/accuracy/seq_average": 0.2588042588042588}
 
 
Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb_down_flip/export/result-model-180000.pth.json DELETED
@@ -1 +0,0 @@
1
- {"val/loss": 2.378108855216734, "val/accuracy": 0.5088865218623992, "val/perplexity": 10.784488537554052, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.2516739175926825, "lambada/accuracy/total": 0.34646739130434784, "lambada/accuracy/openai_last_token": 0.796001552795031, "lambada/perplexity": 7.44671847155186, "lambada/lm_loss": 2.9623079937057764, "lambada/lm_perplexity": 19.342562790969215, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.42767695658337357, "mean_loss": 2.3148913864047085, "blimp/accuracy/passive_2": 0.912, "blimp/accuracy/determiner_noun_agreement_2": 0.98, "blimp/accuracy/ellipsis_n_bar_1": 0.857, "blimp/accuracy/tough_vs_raising_2": 0.865, "blimp/accuracy/tough_vs_raising_1": 0.603, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.909, "blimp/accuracy/principle_A_reconstruction": 0.329, "blimp/accuracy/wh_vs_that_with_gap": 0.42, "blimp/accuracy/principle_A_domain_2": 0.882, "blimp/accuracy/determiner_noun_agreement_1": 0.988, "blimp/accuracy/ellipsis_n_bar_2": 0.908, "blimp/accuracy/principle_A_domain_3": 0.592, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.921, "blimp/accuracy/animate_subject_trans": 0.9, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.914, "blimp/accuracy/distractor_agreement_relative_clause": 0.681, "blimp/accuracy/transitive": 0.892, "blimp/accuracy/sentential_subject_island": 0.378, "blimp/accuracy/adjunct_island": 0.886, "blimp/accuracy/intransitive": 0.743, "blimp/accuracy/existential_there_subject_raising": 0.883, "blimp/accuracy/irregular_past_participle_adjectives": 0.947, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.733, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.336, "blimp/accuracy/only_npi_scope": 0.719, "blimp/accuracy/superlative_quantifiers_2": 0.818, "blimp/accuracy/passive_1": 0.896, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.938, "blimp/accuracy/inchoative": 0.591, "blimp/accuracy/anaphor_gender_agreement": 0.965, "blimp/accuracy/principle_A_c_command": 0.683, "blimp/accuracy/only_npi_licensor_present": 0.671, "blimp/accuracy/expletive_it_object_raising": 0.764, "blimp/accuracy/left_branch_island_simple_question": 0.844, "blimp/accuracy/wh_questions_subject_gap": 0.951, "blimp/accuracy/existential_there_quantifiers_2": 0.518, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.943, "blimp/accuracy/sentential_negation_npi_scope": 0.756, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.821, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.927, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.903, "blimp/accuracy/principle_A_case_2": 0.905, "blimp/accuracy/distractor_agreement_relational_noun": 0.827, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.984, "blimp/accuracy/superlative_quantifiers_1": 0.631, "blimp/accuracy/wh_island": 0.754, "blimp/accuracy/principle_A_domain_1": 0.996, "blimp/accuracy/complex_NP_island": 0.613, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.97, "blimp/accuracy/irregular_past_participle_verbs": 0.899, "blimp/accuracy/drop_argument": 0.724, "blimp/accuracy/wh_questions_object_gap": 0.853, "blimp/accuracy/animate_subject_passive": 0.797, "blimp/accuracy/existential_there_quantifiers_1": 0.985, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.882, "blimp/accuracy/npi_present_2": 0.546, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.94, "blimp/accuracy/anaphor_number_agreement": 0.99, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.959, "blimp/accuracy/existential_there_object_raising": 0.885, "blimp/accuracy/matrix_question_npi_licensor_present": 0.398, "blimp/accuracy/npi_present_1": 0.558, "blimp/accuracy/wh_vs_that_no_gap": 0.98, "blimp/accuracy/left_branch_island_echo_question": 0.485, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.972, "blimp/accuracy/causative": 0.759, "blimp/accuracy/group_average": 0.7978955223880596, "blimp/accuracy/seq_average": 0.7978955223880597, "cbt/accuracy/NE": 0.8084935897435898, "cbt/accuracy/V": 0.9356, "cbt/accuracy/CN": 0.8772, "cbt/accuracy/P": 0.92, "cbt/accuracy/group_average": 0.8853233974358974, "cbt/accuracy/seq_average": 0.8853541416566627, "hellaswag/accuracy/val": 0.34714200358494324, "hellaswag/accuracy/group_average": 0.34714200358494324, "hellaswag/accuracy/seq_average": 0.34714200358494324, "piqa/accuracy/val": 0.6305767138193689, "piqa/accuracy/group_average": 0.6305767138193689, "piqa/accuracy/seq_average": 0.6305767138193689, "ai2arc/accuracy/ARC-Easy": 0.36236786469344606, "ai2arc/accuracy/ARC-Challenge": 0.23261802575107296, "ai2arc/accuracy/group_average": 0.2974929452222595, "ai2arc/accuracy/seq_average": 0.31954674220963175, "mmlu/accuracy/MMLU": 0.262567036110118, "mmlu/accuracy/group_average": 0.262567036110118, "mmlu/accuracy/seq_average": 0.262567036110118, "openbookqa/accuracy/test": 0.276, "openbookqa/accuracy/group_average": 0.276, "openbookqa/accuracy/seq_average": 0.276, "race/accuracy/test/high": 0.2858776443682104, "race/accuracy/test/middle": 0.36629526462395545, "race/accuracy/group_average": 0.3260864544960829, "race/accuracy/seq_average": 0.30928252938792056, "siqa/accuracy/dev": 0.3710337768679631, "siqa/accuracy/group_average": 0.3710337768679631, "siqa/accuracy/seq_average": 0.3710337768679631, "winogrande/accuracy/dev": 0.4964483030781373, "winogrande/accuracy/group_average": 0.4964483030781373, "winogrande/accuracy/seq_average": 0.4964483030781373, "commonsenseqa/accuracy/dev_rand_split": 0.26371826371826373, "commonsenseqa/accuracy/group_average": 0.26371826371826373, "commonsenseqa/accuracy/seq_average": 0.26371826371826373}
 
 
Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb_down_flip/export/result-model-200000.pth.json DELETED
@@ -1 +0,0 @@
1
- {"val/loss": 2.3618279733965473, "val/accuracy": 0.511565177671371, "val/perplexity": 10.610329135481647, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.249894231002523, "lambada/accuracy/total": 0.34607919254658387, "lambada/accuracy/openai_last_token": 0.7901785714285714, "lambada/perplexity": 7.497578859131717, "lambada/lm_loss": 2.9572677957436464, "lambada/lm_perplexity": 19.245317718480347, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.4288221851089774, "mean_loss": 2.305861102199535, "blimp/accuracy/passive_2": 0.904, "blimp/accuracy/determiner_noun_agreement_2": 0.983, "blimp/accuracy/ellipsis_n_bar_1": 0.858, "blimp/accuracy/tough_vs_raising_2": 0.857, "blimp/accuracy/tough_vs_raising_1": 0.572, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.911, "blimp/accuracy/principle_A_reconstruction": 0.429, "blimp/accuracy/wh_vs_that_with_gap": 0.504, "blimp/accuracy/principle_A_domain_2": 0.894, "blimp/accuracy/determiner_noun_agreement_1": 0.994, "blimp/accuracy/ellipsis_n_bar_2": 0.896, "blimp/accuracy/principle_A_domain_3": 0.622, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.922, "blimp/accuracy/animate_subject_trans": 0.906, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.929, "blimp/accuracy/distractor_agreement_relative_clause": 0.705, "blimp/accuracy/transitive": 0.865, "blimp/accuracy/sentential_subject_island": 0.386, "blimp/accuracy/adjunct_island": 0.879, "blimp/accuracy/intransitive": 0.729, "blimp/accuracy/existential_there_subject_raising": 0.885, "blimp/accuracy/irregular_past_participle_adjectives": 0.973, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.756, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.369, "blimp/accuracy/only_npi_scope": 0.672, "blimp/accuracy/superlative_quantifiers_2": 0.788, "blimp/accuracy/passive_1": 0.895, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.915, "blimp/accuracy/inchoative": 0.588, "blimp/accuracy/anaphor_gender_agreement": 0.973, "blimp/accuracy/principle_A_c_command": 0.688, "blimp/accuracy/only_npi_licensor_present": 0.981, "blimp/accuracy/expletive_it_object_raising": 0.763, "blimp/accuracy/left_branch_island_simple_question": 0.863, "blimp/accuracy/wh_questions_subject_gap": 0.94, "blimp/accuracy/existential_there_quantifiers_2": 0.521, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.95, "blimp/accuracy/sentential_negation_npi_scope": 0.738, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.844, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.895, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.905, "blimp/accuracy/principle_A_case_2": 0.94, "blimp/accuracy/distractor_agreement_relational_noun": 0.857, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.969, "blimp/accuracy/superlative_quantifiers_1": 0.68, "blimp/accuracy/wh_island": 0.732, "blimp/accuracy/principle_A_domain_1": 0.995, "blimp/accuracy/complex_NP_island": 0.642, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.966, "blimp/accuracy/irregular_past_participle_verbs": 0.935, "blimp/accuracy/drop_argument": 0.719, "blimp/accuracy/wh_questions_object_gap": 0.864, "blimp/accuracy/animate_subject_passive": 0.784, "blimp/accuracy/existential_there_quantifiers_1": 0.979, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.888, "blimp/accuracy/npi_present_2": 0.573, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.946, "blimp/accuracy/anaphor_number_agreement": 0.991, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.962, "blimp/accuracy/existential_there_object_raising": 0.867, "blimp/accuracy/matrix_question_npi_licensor_present": 0.389, "blimp/accuracy/npi_present_1": 0.583, "blimp/accuracy/wh_vs_that_no_gap": 0.98, "blimp/accuracy/left_branch_island_echo_question": 0.556, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.956, "blimp/accuracy/causative": 0.766, "blimp/accuracy/group_average": 0.8084477611940298, "blimp/accuracy/seq_average": 0.8084477611940298, "cbt/accuracy/NE": 0.8161057692307693, "cbt/accuracy/V": 0.9356, "cbt/accuracy/CN": 0.878, "cbt/accuracy/P": 0.9184, "cbt/accuracy/group_average": 0.8870264423076923, "cbt/accuracy/seq_average": 0.8870548219287715, "hellaswag/accuracy/val": 0.3549093806014738, "hellaswag/accuracy/group_average": 0.3549093806014738, "hellaswag/accuracy/seq_average": 0.3549093806014738, "piqa/accuracy/val": 0.6245919477693145, "piqa/accuracy/group_average": 0.6245919477693145, "piqa/accuracy/seq_average": 0.6245919477693145, "ai2arc/accuracy/ARC-Easy": 0.3657505285412262, "ai2arc/accuracy/ARC-Challenge": 0.22832618025751072, "ai2arc/accuracy/group_average": 0.29703835439936843, "ai2arc/accuracy/seq_average": 0.32039660056657226, "mmlu/accuracy/MMLU": 0.26399713979263495, "mmlu/accuracy/group_average": 0.26399713979263495, "mmlu/accuracy/seq_average": 0.26399713979263495, "openbookqa/accuracy/test": 0.284, "openbookqa/accuracy/group_average": 0.284, "openbookqa/accuracy/seq_average": 0.284, "race/accuracy/test/high": 0.2833047455688965, "race/accuracy/test/middle": 0.35863509749303624, "race/accuracy/group_average": 0.3209699215309664, "race/accuracy/seq_average": 0.3052290231049858, "siqa/accuracy/dev": 0.3705220061412487, "siqa/accuracy/group_average": 0.3705220061412487, "siqa/accuracy/seq_average": 0.3705220061412487, "winogrande/accuracy/dev": 0.4996053670086819, "winogrande/accuracy/group_average": 0.4996053670086819, "winogrande/accuracy/seq_average": 0.4996053670086819, "commonsenseqa/accuracy/dev_rand_split": 0.266994266994267, "commonsenseqa/accuracy/group_average": 0.266994266994267, "commonsenseqa/accuracy/seq_average": 0.266994266994267}
 
 
Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb_down_flip/export/result-model-360000.pth.json DELETED
@@ -1 +0,0 @@
1
- {"val/loss": 2.2778657482516382, "val/accuracy": 0.5237318469632056, "val/perplexity": 9.755836762978193, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.152370666124806, "lambada/accuracy/total": 0.38548136645962733, "lambada/accuracy/openai_last_token": 0.8049301242236024, "lambada/perplexity": 6.364111971040649, "lambada/lm_loss": 2.8554696257618963, "lambada/lm_perplexity": 17.38259860721917, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.4546066067114165, "mean_loss": 2.215118207188222, "blimp/accuracy/passive_2": 0.914, "blimp/accuracy/determiner_noun_agreement_2": 0.983, "blimp/accuracy/ellipsis_n_bar_1": 0.866, "blimp/accuracy/tough_vs_raising_2": 0.906, "blimp/accuracy/tough_vs_raising_1": 0.573, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.886, "blimp/accuracy/principle_A_reconstruction": 0.474, "blimp/accuracy/wh_vs_that_with_gap": 0.448, "blimp/accuracy/principle_A_domain_2": 0.905, "blimp/accuracy/determiner_noun_agreement_1": 0.995, "blimp/accuracy/ellipsis_n_bar_2": 0.904, "blimp/accuracy/principle_A_domain_3": 0.632, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.934, "blimp/accuracy/animate_subject_trans": 0.904, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.927, "blimp/accuracy/distractor_agreement_relative_clause": 0.676, "blimp/accuracy/transitive": 0.878, "blimp/accuracy/sentential_subject_island": 0.33, "blimp/accuracy/adjunct_island": 0.876, "blimp/accuracy/intransitive": 0.746, "blimp/accuracy/existential_there_subject_raising": 0.887, "blimp/accuracy/irregular_past_participle_adjectives": 0.929, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.749, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.366, "blimp/accuracy/only_npi_scope": 0.655, "blimp/accuracy/superlative_quantifiers_2": 0.845, "blimp/accuracy/passive_1": 0.896, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.913, "blimp/accuracy/inchoative": 0.612, "blimp/accuracy/anaphor_gender_agreement": 0.98, "blimp/accuracy/principle_A_c_command": 0.719, "blimp/accuracy/only_npi_licensor_present": 0.707, "blimp/accuracy/expletive_it_object_raising": 0.77, "blimp/accuracy/left_branch_island_simple_question": 0.835, "blimp/accuracy/wh_questions_subject_gap": 0.937, "blimp/accuracy/existential_there_quantifiers_2": 0.506, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.946, "blimp/accuracy/sentential_negation_npi_scope": 0.75, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.825, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.901, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.892, "blimp/accuracy/principle_A_case_2": 0.922, "blimp/accuracy/distractor_agreement_relational_noun": 0.866, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.974, "blimp/accuracy/superlative_quantifiers_1": 0.823, "blimp/accuracy/wh_island": 0.736, "blimp/accuracy/principle_A_domain_1": 0.997, "blimp/accuracy/complex_NP_island": 0.613, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.974, "blimp/accuracy/irregular_past_participle_verbs": 0.923, "blimp/accuracy/drop_argument": 0.722, "blimp/accuracy/wh_questions_object_gap": 0.858, "blimp/accuracy/animate_subject_passive": 0.795, "blimp/accuracy/existential_there_quantifiers_1": 0.978, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.89, "blimp/accuracy/npi_present_2": 0.583, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.962, "blimp/accuracy/anaphor_number_agreement": 0.991, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.966, "blimp/accuracy/existential_there_object_raising": 0.831, "blimp/accuracy/matrix_question_npi_licensor_present": 0.426, "blimp/accuracy/npi_present_1": 0.577, "blimp/accuracy/wh_vs_that_no_gap": 0.98, "blimp/accuracy/left_branch_island_echo_question": 0.578, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.958, "blimp/accuracy/causative": 0.752, "blimp/accuracy/group_average": 0.8067462686567164, "blimp/accuracy/seq_average": 0.8067462686567164, "cbt/accuracy/NE": 0.8217147435897436, "cbt/accuracy/V": 0.9432, "cbt/accuracy/CN": 0.8892, "cbt/accuracy/P": 0.9212, "cbt/accuracy/group_average": 0.8938286858974358, "cbt/accuracy/seq_average": 0.8938575430172069, "hellaswag/accuracy/val": 0.3732324238199562, "hellaswag/accuracy/group_average": 0.3732324238199562, "hellaswag/accuracy/seq_average": 0.3732324238199562, "piqa/accuracy/val": 0.6349292709466812, "piqa/accuracy/group_average": 0.6349292709466812, "piqa/accuracy/seq_average": 0.6349292709466812, "ai2arc/accuracy/ARC-Easy": 0.38520084566596197, "ai2arc/accuracy/ARC-Challenge": 0.23090128755364808, "ai2arc/accuracy/group_average": 0.308051066609805, "ai2arc/accuracy/seq_average": 0.3342776203966006, "mmlu/accuracy/MMLU": 0.2621380050053629, "mmlu/accuracy/group_average": 0.2621380050053629, "mmlu/accuracy/seq_average": 0.2621380050053629, "openbookqa/accuracy/test": 0.284, "openbookqa/accuracy/group_average": 0.284, "openbookqa/accuracy/seq_average": 0.284, "race/accuracy/test/high": 0.2890222984562607, "race/accuracy/test/middle": 0.3725626740947075, "race/accuracy/group_average": 0.3307924862754841, "race/accuracy/seq_average": 0.3133360356708553, "siqa/accuracy/dev": 0.37871033776867963, "siqa/accuracy/group_average": 0.37871033776867963, "siqa/accuracy/seq_average": 0.37871033776867963, "winogrande/accuracy/dev": 0.5035516969218626, "winogrande/accuracy/group_average": 0.5035516969218626, "winogrande/accuracy/seq_average": 0.5035516969218626, "commonsenseqa/accuracy/dev_rand_split": 0.27682227682227684, "commonsenseqa/accuracy/group_average": 0.27682227682227684, "commonsenseqa/accuracy/seq_average": 0.27682227682227684}
 
 
Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb_down_flip/export/result-model-380000.pth.json DELETED
@@ -1 +0,0 @@
1
- {"val/loss": 2.2734333161384828, "val/accuracy": 0.5244495022681451, "val/perplexity": 9.712690371184987, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.161026664402174, "lambada/accuracy/total": 0.3825698757763975, "lambada/accuracy/openai_last_token": 0.8060947204968945, "lambada/perplexity": 6.370929817950676, "lambada/lm_loss": 2.8524791362414432, "lambada/lm_perplexity": 17.330693777353112, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.4535096890222713, "mean_loss": 2.2172299902703285, "blimp/accuracy/passive_2": 0.91, "blimp/accuracy/determiner_noun_agreement_2": 0.989, "blimp/accuracy/ellipsis_n_bar_1": 0.867, "blimp/accuracy/tough_vs_raising_2": 0.88, "blimp/accuracy/tough_vs_raising_1": 0.568, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.891, "blimp/accuracy/principle_A_reconstruction": 0.412, "blimp/accuracy/wh_vs_that_with_gap": 0.415, "blimp/accuracy/principle_A_domain_2": 0.889, "blimp/accuracy/determiner_noun_agreement_1": 0.992, "blimp/accuracy/ellipsis_n_bar_2": 0.905, "blimp/accuracy/principle_A_domain_3": 0.612, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.93, "blimp/accuracy/animate_subject_trans": 0.905, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.924, "blimp/accuracy/distractor_agreement_relative_clause": 0.709, "blimp/accuracy/transitive": 0.892, "blimp/accuracy/sentential_subject_island": 0.335, "blimp/accuracy/adjunct_island": 0.861, "blimp/accuracy/intransitive": 0.761, "blimp/accuracy/existential_there_subject_raising": 0.893, "blimp/accuracy/irregular_past_participle_adjectives": 0.976, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.748, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.339, "blimp/accuracy/only_npi_scope": 0.718, "blimp/accuracy/superlative_quantifiers_2": 0.837, "blimp/accuracy/passive_1": 0.896, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.932, "blimp/accuracy/inchoative": 0.613, "blimp/accuracy/anaphor_gender_agreement": 0.983, "blimp/accuracy/principle_A_c_command": 0.702, "blimp/accuracy/only_npi_licensor_present": 0.732, "blimp/accuracy/expletive_it_object_raising": 0.766, "blimp/accuracy/left_branch_island_simple_question": 0.839, "blimp/accuracy/wh_questions_subject_gap": 0.938, "blimp/accuracy/existential_there_quantifiers_2": 0.463, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.94, "blimp/accuracy/sentential_negation_npi_scope": 0.739, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.821, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.899, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.903, "blimp/accuracy/principle_A_case_2": 0.92, "blimp/accuracy/distractor_agreement_relational_noun": 0.852, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.973, "blimp/accuracy/superlative_quantifiers_1": 0.87, "blimp/accuracy/wh_island": 0.741, "blimp/accuracy/principle_A_domain_1": 0.995, "blimp/accuracy/complex_NP_island": 0.606, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.975, "blimp/accuracy/irregular_past_participle_verbs": 0.951, "blimp/accuracy/drop_argument": 0.727, "blimp/accuracy/wh_questions_object_gap": 0.866, "blimp/accuracy/animate_subject_passive": 0.798, "blimp/accuracy/existential_there_quantifiers_1": 0.963, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.894, "blimp/accuracy/npi_present_2": 0.566, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.962, "blimp/accuracy/anaphor_number_agreement": 0.992, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.966, "blimp/accuracy/existential_there_object_raising": 0.843, "blimp/accuracy/matrix_question_npi_licensor_present": 0.405, "blimp/accuracy/npi_present_1": 0.544, "blimp/accuracy/wh_vs_that_no_gap": 0.983, "blimp/accuracy/left_branch_island_echo_question": 0.515, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.965, "blimp/accuracy/causative": 0.756, "blimp/accuracy/group_average": 0.8052537313432835, "blimp/accuracy/seq_average": 0.8052537313432836, "cbt/accuracy/NE": 0.8205128205128205, "cbt/accuracy/V": 0.9424, "cbt/accuracy/CN": 0.89, "cbt/accuracy/P": 0.9236, "cbt/accuracy/group_average": 0.8941282051282051, "cbt/accuracy/seq_average": 0.894157663065226, "hellaswag/accuracy/val": 0.3738299143596893, "hellaswag/accuracy/group_average": 0.3738299143596893, "hellaswag/accuracy/seq_average": 0.3738299143596893, "piqa/accuracy/val": 0.6365614798694232, "piqa/accuracy/group_average": 0.6365614798694232, "piqa/accuracy/seq_average": 0.6365614798694232, "ai2arc/accuracy/ARC-Easy": 0.3864693446088795, "ai2arc/accuracy/ARC-Challenge": 0.23605150214592274, "ai2arc/accuracy/group_average": 0.3112604233774011, "ai2arc/accuracy/seq_average": 0.33682719546742207, "mmlu/accuracy/MMLU": 0.262567036110118, "mmlu/accuracy/group_average": 0.262567036110118, "mmlu/accuracy/seq_average": 0.262567036110118, "openbookqa/accuracy/test": 0.278, "openbookqa/accuracy/group_average": 0.278, "openbookqa/accuracy/seq_average": 0.278, "race/accuracy/test/high": 0.29130931961120643, "race/accuracy/test/middle": 0.36908077994428967, "race/accuracy/group_average": 0.33019504977774805, "race/accuracy/seq_average": 0.3139440616132955, "siqa/accuracy/dev": 0.3751279426816786, "siqa/accuracy/group_average": 0.3751279426816786, "siqa/accuracy/seq_average": 0.3751279426816786, "winogrande/accuracy/dev": 0.5082872928176796, "winogrande/accuracy/group_average": 0.5082872928176796, "winogrande/accuracy/seq_average": 0.5082872928176796, "commonsenseqa/accuracy/dev_rand_split": 0.2719082719082719, "commonsenseqa/accuracy/group_average": 0.2719082719082719, "commonsenseqa/accuracy/seq_average": 0.2719082719082719}
 
 
Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb_down_flip/tensorboard/events.out.tfevents.1753821939.SPP00018465.1233037.0 DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:056a96afcb803e29b5f30f2a0c597031f23a800b334e9f1cee7ff1a893a2cece
3
- size 88
 
 
 
 
Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb_down_flip/tensorboard/events.out.tfevents.1753821993.SPP00018465.1236599.0 DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:be69745257a541a3bc58aa2372e7d86499433f9b7b8c12322c4c5a03108be3a4
3
- size 276054
 
 
 
 
Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb_down_flip/tensorboard/events.out.tfevents.1753822261.SPP00018465.1253372.0 DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:df8852ad7633aef96ded0003b154068d836a3bb4e21b582f2fca0a09003e5a5a
3
- size 385043108