Yaning1001 commited on
Commit
8e3abd1
·
verified ·
1 Parent(s): ac42b45

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +8 -0
  2. checkpoints/Llama-3.2-3B-FTP/babylm_reverse_partial_10M_seed0/runs/checkpoint-100/global_step100/zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
  3. checkpoints/Llama-3.2-3B-FTP/babylm_reverse_partial_10M_seed0/runs/checkpoint-100/global_step100/zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
  4. checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/artifacts/models--meta-llama--Llama-3.2-3B/blobs/4719a04514ec2f060240711b7c33ab21187cac730ecaba3040b7a0fd95a9cefb +3 -0
  5. checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/artifacts/models--meta-llama--Llama-3.2-3B/blobs/584d8d3e3f82f7964955174dfe5e3b1cf117a9d859f022cfdf7fcb884856e002 +3 -0
  6. checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/artifacts/models--meta-llama--Llama-3.2-3B/snapshots/13afe5124825b4f3751f836b40dafda64c1ed062/model-00001-of-00002.safetensors +3 -0
  7. checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/artifacts/models--meta-llama--Llama-3.2-3B/snapshots/13afe5124825b4f3751f836b40dafda64c1ed062/model-00002-of-00002.safetensors +3 -0
  8. checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-1000/model-00002-of-00002.safetensors +3 -0
  9. checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-1400/model-00001-of-00002.safetensors +3 -0
  10. checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-1800/model-00001-of-00002.safetensors +3 -0
  11. checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-1900/model-00001-of-00002.safetensors +3 -0
  12. checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-200/model-00002-of-00002.safetensors +3 -0
  13. checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-2100/model-00002-of-00002.safetensors +3 -0
  14. checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-2200/rng_state_0.pth +3 -0
  15. checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-2200/rng_state_1.pth +3 -0
  16. checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-2200/rng_state_2.pth +3 -0
  17. checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-2200/scheduler.pt +3 -0
  18. checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-2200/tokenizer.json +3 -0
  19. checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-2200/training_args.bin +3 -0
  20. checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-2300/rng_state_1.pth +3 -0
  21. checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-2300/training_args.bin +3 -0
  22. checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-2500/rng_state_0.pth +3 -0
  23. checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-2500/rng_state_1.pth +3 -0
  24. checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-2500/rng_state_2.pth +3 -0
  25. checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-2500/scheduler.pt +3 -0
  26. checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-2500/tokenizer.json +3 -0
  27. checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-2500/training_args.bin +3 -0
  28. checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-2600/rng_state_0.pth +3 -0
  29. checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-2600/rng_state_1.pth +3 -0
  30. checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-2600/rng_state_2.pth +3 -0
  31. checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-2600/scheduler.pt +3 -0
  32. checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-2600/tokenizer.json +3 -0
  33. checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-2600/training_args.bin +3 -0
  34. checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-2700/model-00001-of-00002.safetensors +3 -0
  35. checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-2736/model-00002-of-00002.safetensors +3 -0
  36. checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-400/model-00001-of-00002.safetensors +3 -0
  37. checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-800/model-00001-of-00002.safetensors +3 -0
  38. checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_nondeterministic_10M_seed0/runs/checkpoint-100/rng_state_0.pth +3 -0
  39. checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_nondeterministic_10M_seed0/runs/checkpoint-100/rng_state_1.pth +3 -0
  40. checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_nondeterministic_10M_seed0/runs/checkpoint-100/rng_state_2.pth +3 -0
  41. checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_nondeterministic_10M_seed0/runs/checkpoint-100/scheduler.pt +3 -0
  42. checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_nondeterministic_10M_seed0/runs/checkpoint-100/tokenizer.json +3 -0
  43. checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_nondeterministic_10M_seed0/runs/checkpoint-100/training_args.bin +3 -0
  44. checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_nondeterministic_10M_seed0/runs/checkpoint-1700/rng_state_0.pth +3 -0
  45. checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_nondeterministic_10M_seed0/runs/checkpoint-1700/rng_state_2.pth +3 -0
  46. checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_nondeterministic_10M_seed0/runs/checkpoint-1700/scheduler.pt +3 -0
  47. checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_nondeterministic_10M_seed0/runs/checkpoint-1700/tokenizer.json +3 -0
  48. checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_nondeterministic_10M_seed0/runs/checkpoint-2300/config.json +36 -0
  49. checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_nondeterministic_10M_seed0/runs/checkpoint-2300/generation_config.json +9 -0
  50. checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_nondeterministic_10M_seed0/runs/checkpoint-2300/latest +1 -0
.gitattributes CHANGED
@@ -474,3 +474,11 @@ checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/check
474
  checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-300/tokenizer.json filter=lfs diff=lfs merge=lfs -text
475
  checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-2400/tokenizer.json filter=lfs diff=lfs merge=lfs -text
476
  checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-2300/tokenizer.json filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
474
  checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-300/tokenizer.json filter=lfs diff=lfs merge=lfs -text
475
  checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-2400/tokenizer.json filter=lfs diff=lfs merge=lfs -text
476
  checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-2300/tokenizer.json filter=lfs diff=lfs merge=lfs -text
477
+ checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-2600/tokenizer.json filter=lfs diff=lfs merge=lfs -text
478
+ checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-2200/tokenizer.json filter=lfs diff=lfs merge=lfs -text
479
+ checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-2500/tokenizer.json filter=lfs diff=lfs merge=lfs -text
480
+ checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/artifacts/models--meta-llama--Llama-3.2-3B/blobs/584d8d3e3f82f7964955174dfe5e3b1cf117a9d859f022cfdf7fcb884856e002 filter=lfs diff=lfs merge=lfs -text
481
+ checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/artifacts/models--meta-llama--Llama-3.2-3B/blobs/4719a04514ec2f060240711b7c33ab21187cac730ecaba3040b7a0fd95a9cefb filter=lfs diff=lfs merge=lfs -text
482
+ checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_nondeterministic_10M_seed0/runs/checkpoint-900/tokenizer.json filter=lfs diff=lfs merge=lfs -text
483
+ checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_nondeterministic_10M_seed0/runs/checkpoint-100/tokenizer.json filter=lfs diff=lfs merge=lfs -text
484
+ checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_nondeterministic_10M_seed0/runs/checkpoint-1700/tokenizer.json filter=lfs diff=lfs merge=lfs -text
checkpoints/Llama-3.2-3B-FTP/babylm_reverse_partial_10M_seed0/runs/checkpoint-100/global_step100/zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f7ba538ad057004706fbc41ab7a2891a0737c5f21f0146fb0f5853e4058a5b2d
3
+ size 25702104228
checkpoints/Llama-3.2-3B-FTP/babylm_reverse_partial_10M_seed0/runs/checkpoint-100/global_step100/zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1481f1831e5310ce8fbf499492bac9bdd6b8d16f33d889ff30faba36f28943a1
3
+ size 25702107428
checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/artifacts/models--meta-llama--Llama-3.2-3B/blobs/4719a04514ec2f060240711b7c33ab21187cac730ecaba3040b7a0fd95a9cefb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4719a04514ec2f060240711b7c33ab21187cac730ecaba3040b7a0fd95a9cefb
3
+ size 1459729952
checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/artifacts/models--meta-llama--Llama-3.2-3B/blobs/584d8d3e3f82f7964955174dfe5e3b1cf117a9d859f022cfdf7fcb884856e002 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:584d8d3e3f82f7964955174dfe5e3b1cf117a9d859f022cfdf7fcb884856e002
3
+ size 4965799096
checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/artifacts/models--meta-llama--Llama-3.2-3B/snapshots/13afe5124825b4f3751f836b40dafda64c1ed062/model-00001-of-00002.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:584d8d3e3f82f7964955174dfe5e3b1cf117a9d859f022cfdf7fcb884856e002
3
+ size 4965799096
checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/artifacts/models--meta-llama--Llama-3.2-3B/snapshots/13afe5124825b4f3751f836b40dafda64c1ed062/model-00002-of-00002.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4719a04514ec2f060240711b7c33ab21187cac730ecaba3040b7a0fd95a9cefb
3
+ size 1459729952
checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-1000/model-00002-of-00002.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:28051a8c2a5f83fed48af40900c0996e7fa90218c8be19c4c194686ed529547b
3
+ size 2247734920
checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-1400/model-00001-of-00002.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce00fce8136b9f0731d237926e667fa5aa61b18fddac7406c0a2cf46496bc217
3
+ size 4965798912
checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-1800/model-00001-of-00002.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b248d3ea91548006f181907865da500590937f8e5023d9f24533d765038dedbd
3
+ size 4965798912
checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-1900/model-00001-of-00002.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2bdb5e0577fff0aaf2d97b2422c034677fda38875c3bb4c3edb30490b2d491ad
3
+ size 4965798912
checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-200/model-00002-of-00002.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45b6cabbd4cf1a136e34060bd944e77dfd9b269034e96b6bceae6496363fa60d
3
+ size 2247734920
checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-2100/model-00002-of-00002.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:812a79e59e58f3116a592fd95ad17bf4fd510b3aca83fa5a4c02985e71d590eb
3
+ size 2247734920
checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-2200/rng_state_0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef233d2806676c682805f85cf51776d27dc06740dc062015cc1e961a14df9e97
3
+ size 16567
checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-2200/rng_state_1.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:24be9519b492ae2ab5da4e7d258ad3f276da346422161ebc6874f26057c028cd
3
+ size 16567
checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-2200/rng_state_2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:77a120a924d0a14713ef6ebebe2792a637dac07f7fbafa91cf0930019864eb1b
3
+ size 16567
checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-2200/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d0ae0b26ac4fa40c1ab1e0b15f393cd9c504f42bb6597d6fdb213847b1fd4b2e
3
+ size 563
checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-2200/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d422bc7d90212ffb8070a743cdbe8ff2dfc5634b1fce8f2e3fa671ee441989eb
3
+ size 17210374
checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-2200/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e484bef0de9dbc2a1854d7263015c073f363105ee8aeafa103cb8b7f04d74337
3
+ size 6011
checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-2300/rng_state_1.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e17eca23ec632d21ab0115d8db1a9360104fa7b08d928954eee2eed9adf46b8a
3
+ size 16567
checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-2300/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e484bef0de9dbc2a1854d7263015c073f363105ee8aeafa103cb8b7f04d74337
3
+ size 6011
checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-2500/rng_state_0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8bfdf5ccc88de7823e7dc6081ffb5cb38ed71058da20638fbf7f61d94c58874c
3
+ size 16567
checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-2500/rng_state_1.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aa288bebadeaf65eb824b833596a55b793d13b5c957085cf3d7dc8c0e6de2ddf
3
+ size 16567
checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-2500/rng_state_2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a08b70b84bf331849cc389e31e88e5bf289f53f5f48e4ac516e4a788f9bfae7d
3
+ size 16567
checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-2500/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a256d097b17ba83387055dacebcb0185dd490bfab0db29d565b455331b9b685
3
+ size 563
checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-2500/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d422bc7d90212ffb8070a743cdbe8ff2dfc5634b1fce8f2e3fa671ee441989eb
3
+ size 17210374
checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-2500/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e484bef0de9dbc2a1854d7263015c073f363105ee8aeafa103cb8b7f04d74337
3
+ size 6011
checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-2600/rng_state_0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b29ad903afb6749549a67ba4698ef3df31a1e3e9dfb1b5e1608cdc0c7f134fed
3
+ size 16567
checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-2600/rng_state_1.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dbfc100a7ea6b8e39a9f3b3bd74445fc7ec7c756194072251f759b6ef0f7f0d9
3
+ size 16567
checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-2600/rng_state_2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:65c1c60da2259cc4f2bb221937d352d1d8d838bc52aff6746e8e832ad9930d34
3
+ size 16567
checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-2600/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c5a50d106c1f89a4ed91288f57c29bfa325a2c965d4a3cee88795e02d99a339e
3
+ size 563
checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-2600/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d422bc7d90212ffb8070a743cdbe8ff2dfc5634b1fce8f2e3fa671ee441989eb
3
+ size 17210374
checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-2600/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e484bef0de9dbc2a1854d7263015c073f363105ee8aeafa103cb8b7f04d74337
3
+ size 6011
checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-2700/model-00001-of-00002.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ffa4c668601430b6ccf7d5f1e70c2de63304d01d769b84c093241835bc032ea
3
+ size 4965798912
checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-2736/model-00002-of-00002.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:690df0626d9ac92a412c881f42b4eb717f4e8dc67c26efe1c7400659b79efafa
3
+ size 2247734920
checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-400/model-00001-of-00002.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7875b524d6ae479b4d88ba2aeafb4a18c50cdd090d6536b964c849e44ec13587
3
+ size 4965798912
checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_deterministic84_10M_seed0/runs/checkpoint-800/model-00001-of-00002.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f37324a83852fc47ff9ae8238460942721dad8604c37f1958a5a56e347acd5a9
3
+ size 4965798912
checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_nondeterministic_10M_seed0/runs/checkpoint-100/rng_state_0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:01b934e6ec558bac6fe169663502cde3d438344e9336e1e7d8afe651e9135428
3
+ size 16567
checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_nondeterministic_10M_seed0/runs/checkpoint-100/rng_state_1.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1bc80f18f7e62b82a61f483f87081652089fe5a8da2188f74fe6f789b8c74274
3
+ size 16567
checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_nondeterministic_10M_seed0/runs/checkpoint-100/rng_state_2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:74c577a7e7176d95ccd3d6ec57d9ef0875615f3974af670a48262ddf57ea767d
3
+ size 16567
checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_nondeterministic_10M_seed0/runs/checkpoint-100/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c842068b80c15ab581fea48d08f43b509bfab142b88272294384d42be7c28f0
3
+ size 563
checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_nondeterministic_10M_seed0/runs/checkpoint-100/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d422bc7d90212ffb8070a743cdbe8ff2dfc5634b1fce8f2e3fa671ee441989eb
3
+ size 17210374
checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_nondeterministic_10M_seed0/runs/checkpoint-100/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b62eb270908475de51cd2bd0793440ff3c7234746943dad6610947db320ee11a
3
+ size 6011
checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_nondeterministic_10M_seed0/runs/checkpoint-1700/rng_state_0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:103551ff1c3e292b45450c5a23cdaf1958c78f242d8cf8b564df9603b5e96d5c
3
+ size 16567
checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_nondeterministic_10M_seed0/runs/checkpoint-1700/rng_state_2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ffe85d84d1459bc77a8988c2ddaa541ec71b9112c567063ec91af255e984793b
3
+ size 16567
checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_nondeterministic_10M_seed0/runs/checkpoint-1700/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bfbb44e31e401ec553bf4a48fb406a5b2b50f84e4cfbc78588dd8b86958e8632
3
+ size 563
checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_nondeterministic_10M_seed0/runs/checkpoint-1700/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d422bc7d90212ffb8070a743cdbe8ff2dfc5634b1fce8f2e3fa671ee441989eb
3
+ size 17210374
checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_nondeterministic_10M_seed0/runs/checkpoint-2300/config.json ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "meta-llama/Llama-3.2-3B",
3
+ "architectures": [
4
+ "LlamaForCausalLM"
5
+ ],
6
+ "attention_bias": false,
7
+ "attention_dropout": 0.0,
8
+ "bos_token_id": 128000,
9
+ "eos_token_id": 128001,
10
+ "head_dim": 128,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 3072,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 8192,
15
+ "max_position_embeddings": 131072,
16
+ "mlp_bias": false,
17
+ "model_type": "llama",
18
+ "num_attention_heads": 24,
19
+ "num_hidden_layers": 28,
20
+ "num_key_value_heads": 8,
21
+ "pretraining_tp": 1,
22
+ "rms_norm_eps": 1e-05,
23
+ "rope_scaling": {
24
+ "factor": 32.0,
25
+ "high_freq_factor": 4.0,
26
+ "low_freq_factor": 1.0,
27
+ "original_max_position_embeddings": 8192,
28
+ "rope_type": "llama3"
29
+ },
30
+ "rope_theta": 500000.0,
31
+ "tie_word_embeddings": true,
32
+ "torch_dtype": "float16",
33
+ "transformers_version": "4.45.1",
34
+ "use_cache": true,
35
+ "vocab_size": 128256
36
+ }
checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_nondeterministic_10M_seed0/runs/checkpoint-2300/generation_config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 128000,
4
+ "do_sample": true,
5
+ "eos_token_id": 128001,
6
+ "temperature": 0.6,
7
+ "top_p": 0.9,
8
+ "transformers_version": "4.45.1"
9
+ }
checkpoints/Llama-3.2-3B-FTP/babylm_shuffle_nondeterministic_10M_seed0/runs/checkpoint-2300/latest ADDED
@@ -0,0 +1 @@
 
 
1
+ global_step2300