Add files using upload-large-folder tool
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- .gitattributes +8 -0
- checkpoints/Llama-3.2-3B-FTP/babylm_reverse_partial_10M_seed0/runs/checkpoint-100/global_step100/zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- checkpoints/Llama-3.2-3B-FTP/babylm_reverse_partial_10M_seed0/runs/checkpoint-100/global_step100/zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/artifacts/models--meta-llama--Llama-3.2-3B/blobs/4719a04514ec2f060240711b7c33ab21187cac730ecaba3040b7a0fd95a9cefb +3 -0
- checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/artifacts/models--meta-llama--Llama-3.2-3B/blobs/584d8d3e3f82f7964955174dfe5e3b1cf117a9d859f022cfdf7fcb884856e002 +3 -0
- checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/artifacts/models--meta-llama--Llama-3.2-3B/snapshots/13afe5124825b4f3751f836b40dafda64c1ed062/model-00001-of-00002.safetensors +3 -0
- checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/artifacts/models--meta-llama--Llama-3.2-3B/snapshots/13afe5124825b4f3751f836b40dafda64c1ed062/model-00002-of-00002.safetensors +3 -0
- checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-1000/model-00002-of-00002.safetensors +3 -0
- checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-1400/model-00001-of-00002.safetensors +3 -0
- checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-1800/model-00001-of-00002.safetensors +3 -0
- checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-1900/model-00001-of-00002.safetensors +3 -0
- checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-200/model-00002-of-00002.safetensors +3 -0
- checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-2100/model-00002-of-00002.safetensors +3 -0
- checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-2200/rng_state_0.pth +3 -0
- checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-2200/rng_state_1.pth +3 -0
- checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-2200/rng_state_2.pth +3 -0
- checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-2200/scheduler.pt +3 -0
- checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-2200/tokenizer.json +3 -0
- checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-2200/training_args.bin +3 -0
- checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-2300/rng_state_1.pth +3 -0
- checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-2300/training_args.bin +3 -0
- checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-2500/rng_state_0.pth +3 -0
- checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-2500/rng_state_1.pth +3 -0
- checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-2500/rng_state_2.pth +3 -0
- checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-2500/scheduler.pt +3 -0
- checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-2500/tokenizer.json +3 -0
- checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-2500/training_args.bin +3 -0
- checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-2600/rng_state_0.pth +3 -0
- checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-2600/rng_state_1.pth +3 -0
- checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-2600/rng_state_2.pth +3 -0
- checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-2600/scheduler.pt +3 -0
- checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-2600/tokenizer.json +3 -0
- checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-2600/training_args.bin +3 -0
- checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-2700/model-00001-of-00002.safetensors +3 -0
- checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-2736/model-00002-of-00002.safetensors +3 -0
- checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-400/model-00001-of-00002.safetensors +3 -0
- checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-800/model-00001-of-00002.safetensors +3 -0
- checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_nondeterministic_10M_seed0/runs/checkpoint-100/rng_state_0.pth +3 -0
- checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_nondeterministic_10M_seed0/runs/checkpoint-100/rng_state_1.pth +3 -0
- checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_nondeterministic_10M_seed0/runs/checkpoint-100/rng_state_2.pth +3 -0
- checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_nondeterministic_10M_seed0/runs/checkpoint-100/scheduler.pt +3 -0
- checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_nondeterministic_10M_seed0/runs/checkpoint-100/tokenizer.json +3 -0
- checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_nondeterministic_10M_seed0/runs/checkpoint-100/training_args.bin +3 -0
- checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_nondeterministic_10M_seed0/runs/checkpoint-1700/rng_state_0.pth +3 -0
- checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_nondeterministic_10M_seed0/runs/checkpoint-1700/rng_state_2.pth +3 -0
- checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_nondeterministic_10M_seed0/runs/checkpoint-1700/scheduler.pt +3 -0
- checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_nondeterministic_10M_seed0/runs/checkpoint-1700/tokenizer.json +3 -0
- checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_nondeterministic_10M_seed0/runs/checkpoint-2300/config.json +36 -0
- checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_nondeterministic_10M_seed0/runs/checkpoint-2300/generation_config.json +9 -0
- checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_nondeterministic_10M_seed0/runs/checkpoint-2300/latest +1 -0
.gitattributes
CHANGED
|
@@ -474,3 +474,11 @@ checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/check
|
|
| 474 |
checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-300/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 475 |
checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-2400/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 476 |
checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-2300/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 474 |
checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-300/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 475 |
checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-2400/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 476 |
checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-2300/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 477 |
+
checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-2600/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 478 |
+
checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-2200/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 479 |
+
checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-2500/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 480 |
+
checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/artifacts/models--meta-llama--Llama-3.2-3B/blobs/584d8d3e3f82f7964955174dfe5e3b1cf117a9d859f022cfdf7fcb884856e002 filter=lfs diff=lfs merge=lfs -text
|
| 481 |
+
checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/artifacts/models--meta-llama--Llama-3.2-3B/blobs/4719a04514ec2f060240711b7c33ab21187cac730ecaba3040b7a0fd95a9cefb filter=lfs diff=lfs merge=lfs -text
|
| 482 |
+
checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_nondeterministic_10M_seed0/runs/checkpoint-900/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 483 |
+
checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_nondeterministic_10M_seed0/runs/checkpoint-100/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 484 |
+
checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_nondeterministic_10M_seed0/runs/checkpoint-1700/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
checkpoints/Llama-3.2-3B-FTP/babylm_reverse_partial_10M_seed0/runs/checkpoint-100/global_step100/zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f7ba538ad057004706fbc41ab7a2891a0737c5f21f0146fb0f5853e4058a5b2d
|
| 3 |
+
size 25702104228
|
checkpoints/Llama-3.2-3B-FTP/babylm_reverse_partial_10M_seed0/runs/checkpoint-100/global_step100/zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1481f1831e5310ce8fbf499492bac9bdd6b8d16f33d889ff30faba36f28943a1
|
| 3 |
+
size 25702107428
|
checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/artifacts/models--meta-llama--Llama-3.2-3B/blobs/4719a04514ec2f060240711b7c33ab21187cac730ecaba3040b7a0fd95a9cefb
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4719a04514ec2f060240711b7c33ab21187cac730ecaba3040b7a0fd95a9cefb
|
| 3 |
+
size 1459729952
|
checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/artifacts/models--meta-llama--Llama-3.2-3B/blobs/584d8d3e3f82f7964955174dfe5e3b1cf117a9d859f022cfdf7fcb884856e002
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:584d8d3e3f82f7964955174dfe5e3b1cf117a9d859f022cfdf7fcb884856e002
|
| 3 |
+
size 4965799096
|
checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/artifacts/models--meta-llama--Llama-3.2-3B/snapshots/13afe5124825b4f3751f836b40dafda64c1ed062/model-00001-of-00002.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:584d8d3e3f82f7964955174dfe5e3b1cf117a9d859f022cfdf7fcb884856e002
|
| 3 |
+
size 4965799096
|
checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/artifacts/models--meta-llama--Llama-3.2-3B/snapshots/13afe5124825b4f3751f836b40dafda64c1ed062/model-00002-of-00002.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4719a04514ec2f060240711b7c33ab21187cac730ecaba3040b7a0fd95a9cefb
|
| 3 |
+
size 1459729952
|
checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-1000/model-00002-of-00002.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:28051a8c2a5f83fed48af40900c0996e7fa90218c8be19c4c194686ed529547b
|
| 3 |
+
size 2247734920
|
checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-1400/model-00001-of-00002.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ce00fce8136b9f0731d237926e667fa5aa61b18fddac7406c0a2cf46496bc217
|
| 3 |
+
size 4965798912
|
checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-1800/model-00001-of-00002.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b248d3ea91548006f181907865da500590937f8e5023d9f24533d765038dedbd
|
| 3 |
+
size 4965798912
|
checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-1900/model-00001-of-00002.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2bdb5e0577fff0aaf2d97b2422c034677fda38875c3bb4c3edb30490b2d491ad
|
| 3 |
+
size 4965798912
|
checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-200/model-00002-of-00002.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:45b6cabbd4cf1a136e34060bd944e77dfd9b269034e96b6bceae6496363fa60d
|
| 3 |
+
size 2247734920
|
checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-2100/model-00002-of-00002.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:812a79e59e58f3116a592fd95ad17bf4fd510b3aca83fa5a4c02985e71d590eb
|
| 3 |
+
size 2247734920
|
checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-2200/rng_state_0.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ef233d2806676c682805f85cf51776d27dc06740dc062015cc1e961a14df9e97
|
| 3 |
+
size 16567
|
checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-2200/rng_state_1.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:24be9519b492ae2ab5da4e7d258ad3f276da346422161ebc6874f26057c028cd
|
| 3 |
+
size 16567
|
checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-2200/rng_state_2.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:77a120a924d0a14713ef6ebebe2792a637dac07f7fbafa91cf0930019864eb1b
|
| 3 |
+
size 16567
|
checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-2200/scheduler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d0ae0b26ac4fa40c1ab1e0b15f393cd9c504f42bb6597d6fdb213847b1fd4b2e
|
| 3 |
+
size 563
|
checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-2200/tokenizer.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d422bc7d90212ffb8070a743cdbe8ff2dfc5634b1fce8f2e3fa671ee441989eb
|
| 3 |
+
size 17210374
|
checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-2200/training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e484bef0de9dbc2a1854d7263015c073f363105ee8aeafa103cb8b7f04d74337
|
| 3 |
+
size 6011
|
checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-2300/rng_state_1.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e17eca23ec632d21ab0115d8db1a9360104fa7b08d928954eee2eed9adf46b8a
|
| 3 |
+
size 16567
|
checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-2300/training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e484bef0de9dbc2a1854d7263015c073f363105ee8aeafa103cb8b7f04d74337
|
| 3 |
+
size 6011
|
checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-2500/rng_state_0.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8bfdf5ccc88de7823e7dc6081ffb5cb38ed71058da20638fbf7f61d94c58874c
|
| 3 |
+
size 16567
|
checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-2500/rng_state_1.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:aa288bebadeaf65eb824b833596a55b793d13b5c957085cf3d7dc8c0e6de2ddf
|
| 3 |
+
size 16567
|
checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-2500/rng_state_2.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a08b70b84bf331849cc389e31e88e5bf289f53f5f48e4ac516e4a788f9bfae7d
|
| 3 |
+
size 16567
|
checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-2500/scheduler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9a256d097b17ba83387055dacebcb0185dd490bfab0db29d565b455331b9b685
|
| 3 |
+
size 563
|
checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-2500/tokenizer.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d422bc7d90212ffb8070a743cdbe8ff2dfc5634b1fce8f2e3fa671ee441989eb
|
| 3 |
+
size 17210374
|
checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-2500/training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e484bef0de9dbc2a1854d7263015c073f363105ee8aeafa103cb8b7f04d74337
|
| 3 |
+
size 6011
|
checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-2600/rng_state_0.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b29ad903afb6749549a67ba4698ef3df31a1e3e9dfb1b5e1608cdc0c7f134fed
|
| 3 |
+
size 16567
|
checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-2600/rng_state_1.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dbfc100a7ea6b8e39a9f3b3bd74445fc7ec7c756194072251f759b6ef0f7f0d9
|
| 3 |
+
size 16567
|
checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-2600/rng_state_2.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:65c1c60da2259cc4f2bb221937d352d1d8d838bc52aff6746e8e832ad9930d34
|
| 3 |
+
size 16567
|
checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-2600/scheduler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c5a50d106c1f89a4ed91288f57c29bfa325a2c965d4a3cee88795e02d99a339e
|
| 3 |
+
size 563
|
checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-2600/tokenizer.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d422bc7d90212ffb8070a743cdbe8ff2dfc5634b1fce8f2e3fa671ee441989eb
|
| 3 |
+
size 17210374
|
checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-2600/training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e484bef0de9dbc2a1854d7263015c073f363105ee8aeafa103cb8b7f04d74337
|
| 3 |
+
size 6011
|
checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-2700/model-00001-of-00002.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7ffa4c668601430b6ccf7d5f1e70c2de63304d01d769b84c093241835bc032ea
|
| 3 |
+
size 4965798912
|
checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-2736/model-00002-of-00002.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:690df0626d9ac92a412c881f42b4eb717f4e8dc67c26efe1c7400659b79efafa
|
| 3 |
+
size 2247734920
|
checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-400/model-00001-of-00002.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7875b524d6ae479b4d88ba2aeafb4a18c50cdd090d6536b964c849e44ec13587
|
| 3 |
+
size 4965798912
|
checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-800/model-00001-of-00002.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f37324a83852fc47ff9ae8238460942721dad8604c37f1958a5a56e347acd5a9
|
| 3 |
+
size 4965798912
|
checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_nondeterministic_10M_seed0/runs/checkpoint-100/rng_state_0.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:01b934e6ec558bac6fe169663502cde3d438344e9336e1e7d8afe651e9135428
|
| 3 |
+
size 16567
|
checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_nondeterministic_10M_seed0/runs/checkpoint-100/rng_state_1.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1bc80f18f7e62b82a61f483f87081652089fe5a8da2188f74fe6f789b8c74274
|
| 3 |
+
size 16567
|
checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_nondeterministic_10M_seed0/runs/checkpoint-100/rng_state_2.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:74c577a7e7176d95ccd3d6ec57d9ef0875615f3974af670a48262ddf57ea767d
|
| 3 |
+
size 16567
|
checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_nondeterministic_10M_seed0/runs/checkpoint-100/scheduler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9c842068b80c15ab581fea48d08f43b509bfab142b88272294384d42be7c28f0
|
| 3 |
+
size 563
|
checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_nondeterministic_10M_seed0/runs/checkpoint-100/tokenizer.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d422bc7d90212ffb8070a743cdbe8ff2dfc5634b1fce8f2e3fa671ee441989eb
|
| 3 |
+
size 17210374
|
checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_nondeterministic_10M_seed0/runs/checkpoint-100/training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b62eb270908475de51cd2bd0793440ff3c7234746943dad6610947db320ee11a
|
| 3 |
+
size 6011
|
checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_nondeterministic_10M_seed0/runs/checkpoint-1700/rng_state_0.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:103551ff1c3e292b45450c5a23cdaf1958c78f242d8cf8b564df9603b5e96d5c
|
| 3 |
+
size 16567
|
checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_nondeterministic_10M_seed0/runs/checkpoint-1700/rng_state_2.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ffe85d84d1459bc77a8988c2ddaa541ec71b9112c567063ec91af255e984793b
|
| 3 |
+
size 16567
|
checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_nondeterministic_10M_seed0/runs/checkpoint-1700/scheduler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bfbb44e31e401ec553bf4a48fb406a5b2b50f84e4cfbc78588dd8b86958e8632
|
| 3 |
+
size 563
|
checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_nondeterministic_10M_seed0/runs/checkpoint-1700/tokenizer.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d422bc7d90212ffb8070a743cdbe8ff2dfc5634b1fce8f2e3fa671ee441989eb
|
| 3 |
+
size 17210374
|
checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_nondeterministic_10M_seed0/runs/checkpoint-2300/config.json
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_name_or_path": "meta-llama/Llama-3.2-3B",
|
| 3 |
+
"architectures": [
|
| 4 |
+
"LlamaForCausalLM"
|
| 5 |
+
],
|
| 6 |
+
"attention_bias": false,
|
| 7 |
+
"attention_dropout": 0.0,
|
| 8 |
+
"bos_token_id": 128000,
|
| 9 |
+
"eos_token_id": 128001,
|
| 10 |
+
"head_dim": 128,
|
| 11 |
+
"hidden_act": "silu",
|
| 12 |
+
"hidden_size": 3072,
|
| 13 |
+
"initializer_range": 0.02,
|
| 14 |
+
"intermediate_size": 8192,
|
| 15 |
+
"max_position_embeddings": 131072,
|
| 16 |
+
"mlp_bias": false,
|
| 17 |
+
"model_type": "llama",
|
| 18 |
+
"num_attention_heads": 24,
|
| 19 |
+
"num_hidden_layers": 28,
|
| 20 |
+
"num_key_value_heads": 8,
|
| 21 |
+
"pretraining_tp": 1,
|
| 22 |
+
"rms_norm_eps": 1e-05,
|
| 23 |
+
"rope_scaling": {
|
| 24 |
+
"factor": 32.0,
|
| 25 |
+
"high_freq_factor": 4.0,
|
| 26 |
+
"low_freq_factor": 1.0,
|
| 27 |
+
"original_max_position_embeddings": 8192,
|
| 28 |
+
"rope_type": "llama3"
|
| 29 |
+
},
|
| 30 |
+
"rope_theta": 500000.0,
|
| 31 |
+
"tie_word_embeddings": true,
|
| 32 |
+
"torch_dtype": "float16",
|
| 33 |
+
"transformers_version": "4.45.1",
|
| 34 |
+
"use_cache": true,
|
| 35 |
+
"vocab_size": 128256
|
| 36 |
+
}
|
checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_nondeterministic_10M_seed0/runs/checkpoint-2300/generation_config.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_from_model_config": true,
|
| 3 |
+
"bos_token_id": 128000,
|
| 4 |
+
"do_sample": true,
|
| 5 |
+
"eos_token_id": 128001,
|
| 6 |
+
"temperature": 0.6,
|
| 7 |
+
"top_p": 0.9,
|
| 8 |
+
"transformers_version": "4.45.1"
|
| 9 |
+
}
|
checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_nondeterministic_10M_seed0/runs/checkpoint-2300/latest
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
global_step2300
|