Add files using upload-large-folder tool
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +13 -0
- checkpoints/GPT-2/babylm_reverse_control_10M_seed0/runs/checkpoint-2100/model.safetensors +3 -0
- checkpoints/GPT-2/babylm_reverse_control_10M_seed0/runs/checkpoint-2200/model.safetensors +3 -0
- checkpoints/GPT-2/babylm_reverse_control_10M_seed0/runs/checkpoint-2300/model.safetensors +3 -0
- checkpoints/GPT-2/babylm_reverse_control_10M_seed0/runs/checkpoint-2400/model.safetensors +3 -0
- checkpoints/GPT-2/babylm_reverse_control_10M_seed0/runs/checkpoint-2500/model.safetensors +3 -0
- checkpoints/GPT-2/babylm_reverse_control_10M_seed0/runs/checkpoint-2600/model.safetensors +3 -0
- checkpoints/GPT-2/babylm_reverse_control_10M_seed0/runs/checkpoint-300/model.safetensors +3 -0
- checkpoints/GPT-2/babylm_reverse_full_10M_seed0/artifacts/models--gpt2/blobs/248dfc3911869ec493c76e65bf2fcf7f615828b0254c12b473182f0f81d3a707 +3 -0
- checkpoints/GPT-2/babylm_reverse_full_10M_seed0/artifacts/models--gpt2/snapshots/607a30d783dfa663caf39e06633721c8d4cfcd7e/model.safetensors +3 -0
- checkpoints/GPT-2/babylm_reverse_partial_10M_seed0/runs/checkpoint-100/model.safetensors +3 -0
- checkpoints/GPT-2/babylm_reverse_partial_10M_seed0/runs/checkpoint-1100/model.safetensors +3 -0
- checkpoints/GPT-2/babylm_reverse_partial_10M_seed0/runs/checkpoint-1200/model.safetensors +3 -0
- checkpoints/GPT-2/babylm_reverse_partial_10M_seed0/runs/checkpoint-1300/model.safetensors +3 -0
- checkpoints/GPT-2/babylm_reverse_partial_10M_seed0/runs/checkpoint-1400/model.safetensors +3 -0
- checkpoints/GPT-2/babylm_reverse_partial_10M_seed0/runs/checkpoint-1500/model.safetensors +3 -0
- checkpoints/GPT-2/babylm_reverse_partial_10M_seed0/runs/checkpoint-1600/model.safetensors +3 -0
- checkpoints/GPT-2/babylm_reverse_partial_10M_seed0/runs/checkpoint-1700/model.safetensors +3 -0
- checkpoints/GPT-2/babylm_reverse_partial_10M_seed0/runs/checkpoint-1800/model.safetensors +3 -0
- checkpoints/GPT-2/babylm_reverse_partial_10M_seed0/runs/checkpoint-1900/model.safetensors +3 -0
- checkpoints/GPT-2/babylm_reverse_partial_10M_seed0/runs/checkpoint-200/model.safetensors +3 -0
- checkpoints/GPT-2/babylm_reverse_partial_10M_seed0/runs/checkpoint-2000/model.safetensors +3 -0
- checkpoints/GPT-2/babylm_reverse_partial_10M_seed0/runs/checkpoint-2700/model.safetensors +3 -0
- checkpoints/GPT-2/babylm_reverse_partial_10M_seed0/runs/checkpoint-2919/model.safetensors +3 -0
- checkpoints/GPT-2/babylm_reverse_partial_10M_seed0/runs/checkpoint-400/model.safetensors +3 -0
- checkpoints/GPT-2/babylm_reverse_partial_10M_seed0/runs/checkpoint-600/model.safetensors +3 -0
- checkpoints/GPT-2/babylm_reverse_partial_10M_seed0/runs/checkpoint-700/model.safetensors +3 -0
- checkpoints/GPT-2/babylm_reverse_partial_10M_seed0/runs/checkpoint-800/model.safetensors +3 -0
- checkpoints/GPT-2/babylm_reverse_partial_10M_seed0/runs/checkpoint-900/model.safetensors +3 -0
- checkpoints/GPT-2/babylm_reverse_partial_10M_seed0/runs/checkpoint-900/rng_state_1.pth +3 -0
- checkpoints/GPT-2/babylm_reverse_partial_10M_seed0/runs/checkpoint-900/training_args.bin +3 -0
- checkpoints/Llama-3.2-3B-FTP/babylm_reverse_full_10M_seed0/runs/checkpoint-1100/rng_state_0.pth +3 -0
- checkpoints/Llama-3.2-3B-FTP/babylm_reverse_full_10M_seed0/runs/checkpoint-1100/rng_state_1.pth +3 -0
- checkpoints/Llama-3.2-3B-FTP/babylm_reverse_full_10M_seed0/runs/checkpoint-1100/rng_state_2.pth +3 -0
- checkpoints/Llama-3.2-3B-FTP/babylm_reverse_full_10M_seed0/runs/checkpoint-1100/scheduler.pt +3 -0
- checkpoints/Llama-3.2-3B-FTP/babylm_reverse_full_10M_seed0/runs/checkpoint-1100/tokenizer.json +3 -0
- checkpoints/Llama-3.2-3B-FTP/babylm_reverse_full_10M_seed0/runs/checkpoint-1100/training_args.bin +3 -0
- checkpoints/Llama-3.2-3B-FTP/babylm_reverse_full_10M_seed0/runs/checkpoint-1200/rng_state_1.pth +3 -0
- checkpoints/Llama-3.2-3B-FTP/babylm_reverse_full_10M_seed0/runs/checkpoint-1200/rng_state_2.pth +3 -0
- checkpoints/Llama-3.2-3B-FTP/babylm_reverse_full_10M_seed0/runs/checkpoint-1200/scheduler.pt +3 -0
- checkpoints/Llama-3.2-3B-FTP/babylm_reverse_full_10M_seed0/runs/checkpoint-1200/tokenizer.json +3 -0
- checkpoints/Llama-3.2-3B-FTP/babylm_reverse_full_10M_seed0/runs/checkpoint-1200/training_args.bin +3 -0
- checkpoints/Llama-3.2-3B-FTP/babylm_reverse_full_10M_seed0/runs/checkpoint-1300/rng_state_0.pth +3 -0
- checkpoints/Llama-3.2-3B-FTP/babylm_reverse_full_10M_seed0/runs/checkpoint-1300/rng_state_1.pth +3 -0
- checkpoints/Llama-3.2-3B-FTP/babylm_reverse_full_10M_seed0/runs/checkpoint-1300/rng_state_2.pth +3 -0
- checkpoints/Llama-3.2-3B-FTP/babylm_reverse_full_10M_seed0/runs/checkpoint-1300/scheduler.pt +3 -0
- checkpoints/Llama-3.2-3B-FTP/babylm_reverse_full_10M_seed0/runs/checkpoint-1300/tokenizer.json +3 -0
- checkpoints/Llama-3.2-3B-FTP/babylm_reverse_full_10M_seed0/runs/checkpoint-1300/training_args.bin +3 -0
- checkpoints/Llama-3.2-3B-FTP/babylm_reverse_full_10M_seed0/runs/checkpoint-1400/config.json +36 -0
- checkpoints/Llama-3.2-3B-FTP/babylm_reverse_full_10M_seed0/runs/checkpoint-1400/generation_config.json +9 -0
.gitattributes
CHANGED
|
@@ -294,3 +294,16 @@ checkpoints/Llama-3.2-1B/babylm_reverse_partial_10M_seed0/artifacts/models--meta
|
|
| 294 |
checkpoints/Llama-3.2-3B-FTP/babylm_reverse_full_10M_seed0/runs/checkpoint-900/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 295 |
checkpoints/Llama-3.2-3B-FTP/babylm_reverse_full_10M_seed0/runs/checkpoint-100/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 296 |
checkpoints/Llama-3.2-3B-FTP/babylm_reverse_full_10M_seed0/runs/checkpoint-1700/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 294 |
checkpoints/Llama-3.2-3B-FTP/babylm_reverse_full_10M_seed0/runs/checkpoint-900/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 295 |
checkpoints/Llama-3.2-3B-FTP/babylm_reverse_full_10M_seed0/runs/checkpoint-100/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 296 |
checkpoints/Llama-3.2-3B-FTP/babylm_reverse_full_10M_seed0/runs/checkpoint-1700/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 297 |
+
checkpoints/Llama-3.2-3B-FTP/babylm_reverse_full_10M_seed0/runs/checkpoint-1200/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 298 |
+
checkpoints/Llama-3.2-3B-FTP/babylm_reverse_full_10M_seed0/runs/checkpoint-1100/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 299 |
+
checkpoints/Llama-3.2-3B-FTP/babylm_reverse_full_10M_seed0/runs/checkpoint-1600/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 300 |
+
checkpoints/Llama-3.2-3B-FTP/babylm_reverse_full_10M_seed0/runs/checkpoint-600/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 301 |
+
checkpoints/GPT-2/babylm_reverse_full_10M_seed0/artifacts/models--gpt2/blobs/248dfc3911869ec493c76e65bf2fcf7f615828b0254c12b473182f0f81d3a707 filter=lfs diff=lfs merge=lfs -text
|
| 302 |
+
checkpoints/Llama-3.2-3B-FTP/babylm_reverse_full_10M_seed0/runs/checkpoint-1500/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 303 |
+
checkpoints/Llama-3.2-3B-FTP/babylm_reverse_full_10M_seed0/runs/checkpoint-1300/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 304 |
+
checkpoints/Llama-3.2-3B-FTP/babylm_reverse_full_10M_seed0/runs/checkpoint-2700/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 305 |
+
checkpoints/Llama-3.2-3B-FTP/babylm_reverse_full_10M_seed0/runs/checkpoint-400/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 306 |
+
checkpoints/Llama-3.2-3B-FTP/babylm_reverse_full_10M_seed0/runs/checkpoint-2919/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 307 |
+
checkpoints/Llama-3.2-3B-FTP/babylm_reverse_full_10M_seed0/runs/checkpoint-1400/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 308 |
+
checkpoints/Llama-3.2-3B-FTP/babylm_reverse_full_10M_seed0/runs/checkpoint-800/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 309 |
+
checkpoints/Llama-3.2-3B-FTP/babylm_reverse_full_10M_seed0/runs/checkpoint-1900/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
checkpoints/GPT-2/babylm_reverse_control_10M_seed0/runs/checkpoint-2100/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d3621a4e42cc2c5cad279b3450fd9ef09ec5b4546548562775d917cab8173c52
|
| 3 |
+
size 326089504
|
checkpoints/GPT-2/babylm_reverse_control_10M_seed0/runs/checkpoint-2200/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3e1b7fbfd29153849db4684c989fd9536f3ec8949a70db40ec7a646b540d8320
|
| 3 |
+
size 326089504
|
checkpoints/GPT-2/babylm_reverse_control_10M_seed0/runs/checkpoint-2300/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fa77bada3cf328889026531cde3aced0ca7348eaf7dfe4d63fdb5a5453dae863
|
| 3 |
+
size 326089504
|
checkpoints/GPT-2/babylm_reverse_control_10M_seed0/runs/checkpoint-2400/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:febd1939ba5ec58e6aee28d517d528577d1029323762745f64034e8a94ceda7e
|
| 3 |
+
size 326089504
|
checkpoints/GPT-2/babylm_reverse_control_10M_seed0/runs/checkpoint-2500/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2fc4646004037925fcefb11d3282b67c94a692c05be616969767b9b8b7f9732a
|
| 3 |
+
size 326089504
|
checkpoints/GPT-2/babylm_reverse_control_10M_seed0/runs/checkpoint-2600/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4dbc180db5bbccea247d1aa54689010da96cf250136eb68add7c3eb2b5aebda6
|
| 3 |
+
size 326089504
|
checkpoints/GPT-2/babylm_reverse_control_10M_seed0/runs/checkpoint-300/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a3c9aa6b8aff52d50b47aee227a6dd86316334acc17165d36d1da41059471549
|
| 3 |
+
size 326089504
|
checkpoints/GPT-2/babylm_reverse_full_10M_seed0/artifacts/models--gpt2/blobs/248dfc3911869ec493c76e65bf2fcf7f615828b0254c12b473182f0f81d3a707
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:248dfc3911869ec493c76e65bf2fcf7f615828b0254c12b473182f0f81d3a707
|
| 3 |
+
size 548105171
|
checkpoints/GPT-2/babylm_reverse_full_10M_seed0/artifacts/models--gpt2/snapshots/607a30d783dfa663caf39e06633721c8d4cfcd7e/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:248dfc3911869ec493c76e65bf2fcf7f615828b0254c12b473182f0f81d3a707
|
| 3 |
+
size 548105171
|
checkpoints/GPT-2/babylm_reverse_partial_10M_seed0/runs/checkpoint-100/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dbce2dc19c88327ccdd638378cf309339267b94959339b032dfd46918ad5f15e
|
| 3 |
+
size 326089504
|
checkpoints/GPT-2/babylm_reverse_partial_10M_seed0/runs/checkpoint-1100/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3e58a65c3d4eb283fe014174a890cfbbecc025f3a137b375dc09ea0372d0e7da
|
| 3 |
+
size 326089504
|
checkpoints/GPT-2/babylm_reverse_partial_10M_seed0/runs/checkpoint-1200/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:28b9d6469233b6f517cffe52eef1034c5aae0d6cafa930fb7e59f1feefdf4ac4
|
| 3 |
+
size 326089504
|
checkpoints/GPT-2/babylm_reverse_partial_10M_seed0/runs/checkpoint-1300/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9e47d5e2ac22b27408d78568cbf135ee5dcbccfddd4c98f85a0715f294ca8236
|
| 3 |
+
size 326089504
|
checkpoints/GPT-2/babylm_reverse_partial_10M_seed0/runs/checkpoint-1400/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:777ec5eeb0591dd995361a0caf4686be57b66ceb13635213f7068a3e019e093c
|
| 3 |
+
size 326089504
|
checkpoints/GPT-2/babylm_reverse_partial_10M_seed0/runs/checkpoint-1500/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7821603eb8e686f0804cc0300c8319d32845730696a41690e42094dfb96b2a3d
|
| 3 |
+
size 326089504
|
checkpoints/GPT-2/babylm_reverse_partial_10M_seed0/runs/checkpoint-1600/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dc275143ad3bf5c42fadd6fde0e0ed95ef73bab086244257694ee0f1f20a4b85
|
| 3 |
+
size 326089504
|
checkpoints/GPT-2/babylm_reverse_partial_10M_seed0/runs/checkpoint-1700/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2d32b436b597203c3bb9a72bc9db598262e90264454ae0c922146743e81a47bb
|
| 3 |
+
size 326089504
|
checkpoints/GPT-2/babylm_reverse_partial_10M_seed0/runs/checkpoint-1800/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:719b6e0326e50aa26ad4f9e839be1d9c355596499cd156c99cd2ca72f12eea91
|
| 3 |
+
size 326089504
|
checkpoints/GPT-2/babylm_reverse_partial_10M_seed0/runs/checkpoint-1900/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b93da9b1f889ff06893a0272c9789e61719115c2cd31c59b75fc45b7690b1ef4
|
| 3 |
+
size 326089504
|
checkpoints/GPT-2/babylm_reverse_partial_10M_seed0/runs/checkpoint-200/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:be3096f2cd2008ababe2056da753b61f62ce6711d69c3629a0a6b9ca6cc171ed
|
| 3 |
+
size 326089504
|
checkpoints/GPT-2/babylm_reverse_partial_10M_seed0/runs/checkpoint-2000/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2eaab632ae2ef5e08af8b8e61edf8b8dd19262ae37a3a07ca3ee49085ecf2293
|
| 3 |
+
size 326089504
|
checkpoints/GPT-2/babylm_reverse_partial_10M_seed0/runs/checkpoint-2700/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fda7ac84cde4f9f75fc3d58bd11610d90df061735584d346d8eb0a3c5e755cae
|
| 3 |
+
size 326089504
|
checkpoints/GPT-2/babylm_reverse_partial_10M_seed0/runs/checkpoint-2919/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7593aafc3cf49f93f2f49c76c8652f670db002ec4244b7173e5776af928476ab
|
| 3 |
+
size 326089504
|
checkpoints/GPT-2/babylm_reverse_partial_10M_seed0/runs/checkpoint-400/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:70e695632c2b4ea1232d5e8ff06eef4707b8c1a92389243721f1bba7a878ef00
|
| 3 |
+
size 326089504
|
checkpoints/GPT-2/babylm_reverse_partial_10M_seed0/runs/checkpoint-600/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0e1b863cbbce509888f1fc064504a60412b84fa240b550e43ed394f28bd8d492
|
| 3 |
+
size 326089504
|
checkpoints/GPT-2/babylm_reverse_partial_10M_seed0/runs/checkpoint-700/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4dca9ac06808d48d1c8224c697c45d4744b449b4e8032e6a12fdd7853f6a3c57
|
| 3 |
+
size 326089504
|
checkpoints/GPT-2/babylm_reverse_partial_10M_seed0/runs/checkpoint-800/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6db5b63776b0a238d47e9a64800764dd780e4f3b85c1cf88a6c0c8a56940528d
|
| 3 |
+
size 326089504
|
checkpoints/GPT-2/babylm_reverse_partial_10M_seed0/runs/checkpoint-900/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0f741e772b467b67e4d2598373dc9efc8372a84b24a6ca00e4f31c85642d18e0
|
| 3 |
+
size 326089504
|
checkpoints/GPT-2/babylm_reverse_partial_10M_seed0/runs/checkpoint-900/rng_state_1.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1adf48ea3a4dfba5404637be6f82c9a15ebd900e796e29b6ffeb0fdc3b1d475c
|
| 3 |
+
size 16567
|
checkpoints/GPT-2/babylm_reverse_partial_10M_seed0/runs/checkpoint-900/training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8eb0a6f24460a35a5849475791340c8d7dfb54225c5749b791b10e833071abfa
|
| 3 |
+
size 6011
|
checkpoints/Llama-3.2-3B-FTP/babylm_reverse_full_10M_seed0/runs/checkpoint-1100/rng_state_0.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:186b472c4073bfc086d5b4584aeaefc23f9c40f1a8199963bda08775617e1e12
|
| 3 |
+
size 16567
|
checkpoints/Llama-3.2-3B-FTP/babylm_reverse_full_10M_seed0/runs/checkpoint-1100/rng_state_1.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d8957d748b592f49a56723f1936535313c244cee56956dbc893c99d40f6ef84b
|
| 3 |
+
size 16567
|
checkpoints/Llama-3.2-3B-FTP/babylm_reverse_full_10M_seed0/runs/checkpoint-1100/rng_state_2.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:be2cd13083af0074c65198c01c90c0a3c599fbe6e0ea705a8835ca51f393d0c0
|
| 3 |
+
size 16567
|
checkpoints/Llama-3.2-3B-FTP/babylm_reverse_full_10M_seed0/runs/checkpoint-1100/scheduler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:31b0601a70e7904c3660f5346767114aa9fa6d4a2cba625fac4b4b9ea3ada066
|
| 3 |
+
size 563
|
checkpoints/Llama-3.2-3B-FTP/babylm_reverse_full_10M_seed0/runs/checkpoint-1100/tokenizer.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6fda2ecf7b5daf3cb77324ea6c5b9cb8bf47119249a6d756ef2dd4d62540ac6f
|
| 3 |
+
size 17210554
|
checkpoints/Llama-3.2-3B-FTP/babylm_reverse_full_10M_seed0/runs/checkpoint-1100/training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4beb369b5d9b2e313f485f5348c5624fda1578b4b4b839a45918f8f15a4fbd4b
|
| 3 |
+
size 6011
|
checkpoints/Llama-3.2-3B-FTP/babylm_reverse_full_10M_seed0/runs/checkpoint-1200/rng_state_1.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f34c8972483cae48cde438f54c14179a5393e31298d634393f2e166c6a9bfb3e
|
| 3 |
+
size 16567
|
checkpoints/Llama-3.2-3B-FTP/babylm_reverse_full_10M_seed0/runs/checkpoint-1200/rng_state_2.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0d0068d515416f5010564895d620c337c347a04bf8d5382015b5833708d52850
|
| 3 |
+
size 16567
|
checkpoints/Llama-3.2-3B-FTP/babylm_reverse_full_10M_seed0/runs/checkpoint-1200/scheduler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1597fdb72beb896bfa337f48e1d75c0339960cd7266fb14e0c9cea974436401f
|
| 3 |
+
size 563
|
checkpoints/Llama-3.2-3B-FTP/babylm_reverse_full_10M_seed0/runs/checkpoint-1200/tokenizer.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6fda2ecf7b5daf3cb77324ea6c5b9cb8bf47119249a6d756ef2dd4d62540ac6f
|
| 3 |
+
size 17210554
|
checkpoints/Llama-3.2-3B-FTP/babylm_reverse_full_10M_seed0/runs/checkpoint-1200/training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4beb369b5d9b2e313f485f5348c5624fda1578b4b4b839a45918f8f15a4fbd4b
|
| 3 |
+
size 6011
|
checkpoints/Llama-3.2-3B-FTP/babylm_reverse_full_10M_seed0/runs/checkpoint-1300/rng_state_0.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f7020c2ba12cf7959bf7033d7ad83dcf637e59ddbd3ff5b17fc4b9a1ebed3202
|
| 3 |
+
size 16567
|
checkpoints/Llama-3.2-3B-FTP/babylm_reverse_full_10M_seed0/runs/checkpoint-1300/rng_state_1.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f29ab0036e394c39267ad86bf247bf1558e2213add95383fce34a980f109b42e
|
| 3 |
+
size 16567
|
checkpoints/Llama-3.2-3B-FTP/babylm_reverse_full_10M_seed0/runs/checkpoint-1300/rng_state_2.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:14413139ea5b777791c35297ed8a0a717bb3d6a3553268cc92b15fa61be1ae39
|
| 3 |
+
size 16567
|
checkpoints/Llama-3.2-3B-FTP/babylm_reverse_full_10M_seed0/runs/checkpoint-1300/scheduler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3288dac9d8ea621655c7c1e03f2306d032714a31bcf52c99a781b6eb9a2538c3
|
| 3 |
+
size 563
|
checkpoints/Llama-3.2-3B-FTP/babylm_reverse_full_10M_seed0/runs/checkpoint-1300/tokenizer.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6fda2ecf7b5daf3cb77324ea6c5b9cb8bf47119249a6d756ef2dd4d62540ac6f
|
| 3 |
+
size 17210554
|
checkpoints/Llama-3.2-3B-FTP/babylm_reverse_full_10M_seed0/runs/checkpoint-1300/training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4beb369b5d9b2e313f485f5348c5624fda1578b4b4b839a45918f8f15a4fbd4b
|
| 3 |
+
size 6011
|
checkpoints/Llama-3.2-3B-FTP/babylm_reverse_full_10M_seed0/runs/checkpoint-1400/config.json
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_name_or_path": "meta-llama/Llama-3.2-3B",
|
| 3 |
+
"architectures": [
|
| 4 |
+
"LlamaForCausalLM"
|
| 5 |
+
],
|
| 6 |
+
"attention_bias": false,
|
| 7 |
+
"attention_dropout": 0.0,
|
| 8 |
+
"bos_token_id": 128000,
|
| 9 |
+
"eos_token_id": 128001,
|
| 10 |
+
"head_dim": 128,
|
| 11 |
+
"hidden_act": "silu",
|
| 12 |
+
"hidden_size": 3072,
|
| 13 |
+
"initializer_range": 0.02,
|
| 14 |
+
"intermediate_size": 8192,
|
| 15 |
+
"max_position_embeddings": 131072,
|
| 16 |
+
"mlp_bias": false,
|
| 17 |
+
"model_type": "llama",
|
| 18 |
+
"num_attention_heads": 24,
|
| 19 |
+
"num_hidden_layers": 28,
|
| 20 |
+
"num_key_value_heads": 8,
|
| 21 |
+
"pretraining_tp": 1,
|
| 22 |
+
"rms_norm_eps": 1e-05,
|
| 23 |
+
"rope_scaling": {
|
| 24 |
+
"factor": 32.0,
|
| 25 |
+
"high_freq_factor": 4.0,
|
| 26 |
+
"low_freq_factor": 1.0,
|
| 27 |
+
"original_max_position_embeddings": 8192,
|
| 28 |
+
"rope_type": "llama3"
|
| 29 |
+
},
|
| 30 |
+
"rope_theta": 500000.0,
|
| 31 |
+
"tie_word_embeddings": true,
|
| 32 |
+
"torch_dtype": "float16",
|
| 33 |
+
"transformers_version": "4.45.1",
|
| 34 |
+
"use_cache": true,
|
| 35 |
+
"vocab_size": 128256
|
| 36 |
+
}
|
checkpoints/Llama-3.2-3B-FTP/babylm_reverse_full_10M_seed0/runs/checkpoint-1400/generation_config.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_from_model_config": true,
|
| 3 |
+
"bos_token_id": 128000,
|
| 4 |
+
"do_sample": true,
|
| 5 |
+
"eos_token_id": 128001,
|
| 6 |
+
"temperature": 0.6,
|
| 7 |
+
"top_p": 0.9,
|
| 8 |
+
"transformers_version": "4.45.1"
|
| 9 |
+
}
|