faro1219 commited on
Commit
715bcf9
·
verified ·
1 Parent(s): 1cd52aa

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +7 -0
  2. fineweb-10B/gpt2/eos/adam/linear/linear-256-2048-15000-0.1/lr_0.0018-wd_0.1-betas_0.9_0.95-bs_32-seq_1024-iters_15000/250622_035242/config.json +29 -0
  3. fineweb-10B/gpt2/eos/adam/linear/linear-256-2048-15000-0.1/lr_0.0018-wd_0.1-betas_0.9_0.95-bs_32-seq_1024-iters_15000/250622_035242/res.log +4 -0
  4. fineweb-10B/gpt2/eos/adam/linear/linear-256-2048-15000-0.1/lr_0.0018-wd_0.1-betas_0.9_0.95-bs_32-seq_1024-iters_15000/250622_035242/training.log +0 -0
  5. fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/checkpoints/step_000000.pt +3 -0
  6. fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/checkpoints/step_001000.pt +3 -0
  7. fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/checkpoints/step_002000.pt +3 -0
  8. fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/checkpoints/step_003000.pt +3 -0
  9. fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/checkpoints/step_004000.pt +3 -0
  10. fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/checkpoints/step_005000.pt +3 -0
  11. fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/checkpoints/step_006000.pt +3 -0
  12. fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/checkpoints/step_007000.pt +3 -0
  13. fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/checkpoints/step_008000.pt +3 -0
  14. fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/checkpoints/step_009000.pt +3 -0
  15. fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/checkpoints/step_010000.pt +3 -0
  16. fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/checkpoints/step_011000.pt +3 -0
  17. fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/checkpoints/step_012000.pt +3 -0
  18. fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/checkpoints/step_013000.pt +3 -0
  19. fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/checkpoints/step_014000.pt +3 -0
  20. fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/checkpoints/step_015000.pt +3 -0
  21. fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/checkpoints/step_016000.pt +3 -0
  22. fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/checkpoints/step_017000.pt +3 -0
  23. fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/checkpoints/step_018000.pt +3 -0
  24. fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/checkpoints/step_019000.pt +3 -0
  25. fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/checkpoints/step_020000.pt +3 -0
  26. fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/checkpoints/step_021000.pt +3 -0
  27. fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/checkpoints/step_022000.pt +3 -0
  28. fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/checkpoints/step_023000.pt +3 -0
  29. fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/checkpoints/step_024000.pt +3 -0
  30. fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/checkpoints/step_025000.pt +3 -0
  31. fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/checkpoints/step_25001.pt +3 -0
  32. fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.98-bs_32-seq_1024-iters_25000/250730_022725/eos/step_003000/combined_eos_analysis_ori.png +3 -0
  33. fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.98-bs_32-seq_1024-iters_25000/250730_022725/eos/step_005000/combined_eos_analysis.png +3 -0
  34. fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.98-bs_32-seq_1024-iters_25000/250730_022725/eos/step_005000/combined_eos_analysis_ori.png +3 -0
  35. fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.98-bs_32-seq_1024-iters_25000/250730_022725/eos/step_007000/combined_eos_analysis.png +3 -0
  36. fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.98-bs_32-seq_1024-iters_25000/250730_022725/eos/step_007000/combined_eos_analysis_ori.png +3 -0
  37. fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.98-bs_32-seq_1024-iters_25000/250730_022725/eos/step_015000/combined_eos_analysis.png +3 -0
  38. fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.98-bs_32-seq_1024-iters_25000/250730_022725/eos/step_015000/combined_eos_analysis_ori.png +3 -0
  39. fineweb-10B/gpt2/eos/sgd/constant/lr_0.03-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250729_185249/checkpoints/step_000000.pt +3 -0
  40. fineweb-10B/gpt2/eos/sgd/constant/lr_0.03-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250729_185249/checkpoints/step_001000.pt +3 -0
  41. fineweb-10B/gpt2/eos/sgd/constant/lr_0.03-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250729_185249/checkpoints/step_002000.pt +3 -0
  42. fineweb-10B/gpt2/eos/sgd/constant/lr_0.03-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250729_185249/checkpoints/step_003000.pt +3 -0
  43. fineweb-10B/gpt2/eos/sgd/constant/lr_0.03-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250729_185249/checkpoints/step_004000.pt +3 -0
  44. fineweb-10B/gpt2/eos/sgd/constant/lr_0.03-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250729_185249/checkpoints/step_005000.pt +3 -0
  45. fineweb-10B/gpt2/eos/sgd/constant/lr_0.03-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250729_185249/checkpoints/step_006000.pt +3 -0
  46. fineweb-10B/gpt2/eos/sgd/constant/lr_0.03-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250729_185249/checkpoints/step_007000.pt +3 -0
  47. fineweb-10B/gpt2/eos/sgd/constant/lr_0.03-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250729_185249/checkpoints/step_008000.pt +3 -0
  48. fineweb-10B/gpt2/eos/sgd/constant/lr_0.03-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250729_185249/checkpoints/step_009000.pt +3 -0
  49. fineweb-10B/gpt2/eos/sgd/constant/lr_0.03-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250729_185249/checkpoints/step_010000.pt +3 -0
  50. fineweb-10B/gpt2/eos/sgd/constant/lr_0.03-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250729_185249/checkpoints/step_011000.pt +3 -0
.gitattributes CHANGED
@@ -165,3 +165,10 @@ fineweb-10B/gpt2/eos/sgd/constant/lr_0.03-wd_0.0001-m_0.0-bs_32-seq_1024-iters_9
165
  fineweb-10B/gpt2/eos/sgd/constant/lr_0.03-wd_0.0001-m_0.0-bs_32-seq_1024-iters_9536/250622_011417/eos/step_004500/losses_lr.png filter=lfs diff=lfs merge=lfs -text
166
  fineweb-10B/gpt2/eos/sgd/constant/lr_0.03-wd_0.0001-m_0.0-bs_32-seq_1024-iters_9536/250622_011417/eos/step_001500/losses_lr.png filter=lfs diff=lfs merge=lfs -text
167
  fineweb-10B/gpt2/eos/sgd/constant/lr_0.03-wd_0.0001-m_0.0-bs_32-seq_1024-iters_9536/250728_063551/training.log filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
165
  fineweb-10B/gpt2/eos/sgd/constant/lr_0.03-wd_0.0001-m_0.0-bs_32-seq_1024-iters_9536/250622_011417/eos/step_004500/losses_lr.png filter=lfs diff=lfs merge=lfs -text
166
  fineweb-10B/gpt2/eos/sgd/constant/lr_0.03-wd_0.0001-m_0.0-bs_32-seq_1024-iters_9536/250622_011417/eos/step_001500/losses_lr.png filter=lfs diff=lfs merge=lfs -text
167
  fineweb-10B/gpt2/eos/sgd/constant/lr_0.03-wd_0.0001-m_0.0-bs_32-seq_1024-iters_9536/250728_063551/training.log filter=lfs diff=lfs merge=lfs -text
168
+ fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.98-bs_32-seq_1024-iters_25000/250730_022725/eos/step_007000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text
169
+ fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.98-bs_32-seq_1024-iters_25000/250730_022725/eos/step_005000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text
170
+ fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.98-bs_32-seq_1024-iters_25000/250730_022725/eos/step_007000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text
171
+ fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.98-bs_32-seq_1024-iters_25000/250730_022725/eos/step_005000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text
172
+ fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.98-bs_32-seq_1024-iters_25000/250730_022725/eos/step_015000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text
173
+ fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.98-bs_32-seq_1024-iters_25000/250730_022725/eos/step_015000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text
174
+ fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.98-bs_32-seq_1024-iters_25000/250730_022725/eos/step_003000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text
fineweb-10B/gpt2/eos/adam/linear/linear-256-2048-15000-0.1/lr_0.0018-wd_0.1-betas_0.9_0.95-bs_32-seq_1024-iters_15000/250622_035242/config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_name": "gpt2_small",
3
+ "input_bin": "data/fineweb/fineweb10B/fineweb_train_*.bin",
4
+ "input_val_bin": "data/fineweb/fineweb10B/fineweb_val_*.bin",
5
+ "output_dir": "./logs/fineweb-10B/gpt2/eos",
6
+ "batch_size": 32,
7
+ "sequence_length": 1024,
8
+ "num_iterations": 15000,
9
+ "optim_name": "adam",
10
+ "learning_rate": 0.0018,
11
+ "weight_decay": 0.1,
12
+ "beta1": 0.9,
13
+ "beta2": 0.95,
14
+ "eps": 1e-08,
15
+ "alpha": 0.99,
16
+ "momentum": 0.0,
17
+ "scheduler_name": "linear",
18
+ "warmup_iters": 256,
19
+ "warmdown_iters": 2048,
20
+ "cosine_end_iter": 5000,
21
+ "gamma": 0.1,
22
+ "cosine_start_iter": 2000,
23
+ "val_loss_every": 128,
24
+ "val_max_steps": 20,
25
+ "save_every": 500,
26
+ "raw": false,
27
+ "wandb_flag": true,
28
+ "wandb_name": "Stochastic EoS"
29
+ }
fineweb-10B/gpt2/eos/adam/linear/linear-256-2048-15000-0.1/lr_0.0018-wd_0.1-betas_0.9_0.95-bs_32-seq_1024-iters_15000/250622_035242/res.log ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ final train loss: 3.382492
2
+ final val loss: 3.453866958618164
3
+ final 20 iters avg: 478.670ms
4
+ peak memory consumption: 50014 MiB
fineweb-10B/gpt2/eos/adam/linear/linear-256-2048-15000-0.1/lr_0.0018-wd_0.1-betas_0.9_0.95-bs_32-seq_1024-iters_15000/250622_035242/training.log ADDED
The diff for this file is too large to render. See raw diff
 
fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/checkpoints/step_000000.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:95625cc8310c20a1f3dc10ee5e01796c0229d3093fb8f728ac8e0183558d1108
3
+ size 648830485
fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/checkpoints/step_001000.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f148fbb603b49ab7edadbcd5f32041be2753879cd98424d263a9db1cf75d4fed
3
+ size 648830485
fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/checkpoints/step_002000.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:22c23d673d2bf5939a9887e3f750f1374725373b909bd868746a49eed34db6d8
3
+ size 648830485
fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/checkpoints/step_003000.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da4b2cf1186868e3a39c6d45282d1f655188eab2098ba35e1099ec3bd638860e
3
+ size 648830485
fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/checkpoints/step_004000.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d1292fc74076cfce7b1a42644e517821287628c7eed9d03a944bff0e5d6e6e11
3
+ size 648830485
fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/checkpoints/step_005000.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:56e9772eca90cdd5281f93603142c699e33fa68aa1ca880e5f6fbf519f970119
3
+ size 648830485
fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/checkpoints/step_006000.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cefd9eb505904f2ca31998c62d33e89139725043c7d6dfaf66e365ff76a762ba
3
+ size 648830485
fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/checkpoints/step_007000.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e78e3e5dd923361638bd6de7f3e8d7e9b4bcc889a4e59465236b9047d3662dd7
3
+ size 648830485
fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/checkpoints/step_008000.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e114a3864801416cef8bd154280528e585fd90e54edaf66d9161da35c59fa533
3
+ size 648830485
fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/checkpoints/step_009000.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5884e3641fbf14fdf1a58708fb780080c784741b03beaeb849517b61cabfa750
3
+ size 648830485
fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/checkpoints/step_010000.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:109351e5358ae2f7d7dd297e724545c5f8747122f98fd5c4db60d873a735e297
3
+ size 648830485
fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/checkpoints/step_011000.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:acb881147c41775cf45678400e46d2e1a47f18e9afcbc1abaf018e706f2e8c9f
3
+ size 648830485
fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/checkpoints/step_012000.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:18cb525c324b3f1f9334babbae55308aed71eb7ca7c0784e01b838816c57ce06
3
+ size 648830485
fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/checkpoints/step_013000.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:39f0d1bb8c21c25444b3cf2a537131a66a96075baf58d8d2e9edab2074bec4f2
3
+ size 648830485
fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/checkpoints/step_014000.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:54dd8796e4e200ea2768b46b538304f56ca985be8b686007f1c3932bb3f22a90
3
+ size 648830485
fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/checkpoints/step_015000.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b0185a822e6cb77a44b37c96b12e7282902a393191aafa3c787d314257a2cd80
3
+ size 648830485
fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/checkpoints/step_016000.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d80adf72dced5b24db1f26bdb8c5eaa0c08aacb07f83b7eae8765df9feb6f94
3
+ size 648830485
fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/checkpoints/step_017000.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:19b7540551575e0548ad72b0078d4d511161a614306b05f1dc2b1e0426a11542
3
+ size 648830485
fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/checkpoints/step_018000.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:36200ca1cdc8e225454ece94158c7ec8444106a18ae434c5813158e1327680a5
3
+ size 648830485
fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/checkpoints/step_019000.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4292ba139611d030f3f62bee14df7502cce1d5de310dd6c99dcc78df8c6f2aa8
3
+ size 648830485
fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/checkpoints/step_020000.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b9f7a3001a876db7b99df0b956571ddad29812b8d2c2a2db1e93ed7a2069f8a
3
+ size 648830485
fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/checkpoints/step_021000.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1db35c93d1089875e3d20826fbdca53d7713703c284c8a868e2dfecd54832fc3
3
+ size 648830485
fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/checkpoints/step_022000.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab66e77a3681337c1b487ef59288436c504319f698f06dd8b22c367a9e4c1af0
3
+ size 648830485
fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/checkpoints/step_023000.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c1c9cac22cc80e2503f9cbc698dec47f0a8b19386f07323a625d155b75b957b
3
+ size 648830485
fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/checkpoints/step_024000.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ee7d063baa198b019faf2e0b5c4bb35f58b7278b03ebead60b382cb382d255b
3
+ size 648830485
fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/checkpoints/step_025000.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b4fb2f7bf6ff13e2a5fb08c3b76eabf580d6dfeb8ea12889f90ab7bf24ffea1
3
+ size 648830485
fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/checkpoints/step_25001.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:968450f9da4b0fc6761111a769174aefe2bfb544116b05c04e6d4a6b961a5bf4
3
+ size 648830320
fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.98-bs_32-seq_1024-iters_25000/250730_022725/eos/step_003000/combined_eos_analysis_ori.png ADDED

Git LFS Details

  • SHA256: 562aa16e449650527dacb05ff918bb7bc53c0cc1a1de3a2210c0fec8cb23d5d9
  • Pointer size: 131 Bytes
  • Size of remote file: 653 kB
fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.98-bs_32-seq_1024-iters_25000/250730_022725/eos/step_005000/combined_eos_analysis.png ADDED

Git LFS Details

  • SHA256: 55f23c3d6641d8cba5d31b9c565209c640a76aed709a02a3f107917f4391347b
  • Pointer size: 131 Bytes
  • Size of remote file: 654 kB
fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.98-bs_32-seq_1024-iters_25000/250730_022725/eos/step_005000/combined_eos_analysis_ori.png ADDED

Git LFS Details

  • SHA256: e8bb46756640655d8e6f81130e7857a91db7f81e2d97cacc8dc6fe1c908f551c
  • Pointer size: 131 Bytes
  • Size of remote file: 652 kB
fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.98-bs_32-seq_1024-iters_25000/250730_022725/eos/step_007000/combined_eos_analysis.png ADDED

Git LFS Details

  • SHA256: cc34414a4da834543d6b809f63ff3a13091c7ac80e9448445dcfe296e4c114aa
  • Pointer size: 131 Bytes
  • Size of remote file: 573 kB
fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.98-bs_32-seq_1024-iters_25000/250730_022725/eos/step_007000/combined_eos_analysis_ori.png ADDED

Git LFS Details

  • SHA256: 7789bfea25757946fb1a91538bcc5ee7389fb3dcc65854eb1243baa52dd1b852
  • Pointer size: 131 Bytes
  • Size of remote file: 662 kB
fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.98-bs_32-seq_1024-iters_25000/250730_022725/eos/step_015000/combined_eos_analysis.png ADDED

Git LFS Details

  • SHA256: 07b627ed7ea1f7f0dcd1e671a456672f3fc4a7cb7f717335e2e0f5275a505a6b
  • Pointer size: 131 Bytes
  • Size of remote file: 508 kB
fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.98-bs_32-seq_1024-iters_25000/250730_022725/eos/step_015000/combined_eos_analysis_ori.png ADDED

Git LFS Details

  • SHA256: a05044a011739d86f00823c901da01a4e2eda1b6b83712371fd2f957efa8328d
  • Pointer size: 131 Bytes
  • Size of remote file: 699 kB
fineweb-10B/gpt2/eos/sgd/constant/lr_0.03-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250729_185249/checkpoints/step_000000.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:def9372de59c174ae4760938e839d648ccb6124a48283f076b08d932d09c6420
3
+ size 648830485
fineweb-10B/gpt2/eos/sgd/constant/lr_0.03-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250729_185249/checkpoints/step_001000.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ddd230a2b3c7a18db4a320a273f58ec52f1ba4d2cc82a9f77130a0bfc0d015c9
3
+ size 648830485
fineweb-10B/gpt2/eos/sgd/constant/lr_0.03-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250729_185249/checkpoints/step_002000.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f73ddf4a35b7d65c9d2cb70bfeca23597b0392efba557bb6595639e4cbc811a
3
+ size 648830485
fineweb-10B/gpt2/eos/sgd/constant/lr_0.03-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250729_185249/checkpoints/step_003000.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6dcee823af7da845a419d38f1bd6f3cb31738ec72588dd6a4d50315365aaf8d0
3
+ size 648830485
fineweb-10B/gpt2/eos/sgd/constant/lr_0.03-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250729_185249/checkpoints/step_004000.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34c3a71c48729f6b1147323a967c30e7672b6c9617de1bed9de2b5c644727f8e
3
+ size 648830485
fineweb-10B/gpt2/eos/sgd/constant/lr_0.03-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250729_185249/checkpoints/step_005000.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e840f0440eb49f39fb46bf75694cb738bb94a72c544371f9f394b3ef1cf88ac8
3
+ size 648830485
fineweb-10B/gpt2/eos/sgd/constant/lr_0.03-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250729_185249/checkpoints/step_006000.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d4480a185ce1acfe04fbc532f47eb36b0434edb94f03846e3bb1de1dd2e5e80
3
+ size 648830485
fineweb-10B/gpt2/eos/sgd/constant/lr_0.03-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250729_185249/checkpoints/step_007000.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca137674d6d81a81ef448ff9cf2eba5744c87830a010266397d3563c93e73752
3
+ size 648830485
fineweb-10B/gpt2/eos/sgd/constant/lr_0.03-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250729_185249/checkpoints/step_008000.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4014c14fbe11b2a5409734f45954872644710096b47643874fa6d5dd15f5af0
3
+ size 648830485
fineweb-10B/gpt2/eos/sgd/constant/lr_0.03-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250729_185249/checkpoints/step_009000.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba515a511980964072f9af26b6b5fffedc02d507bf132d3a574aef88b6609520
3
+ size 648830485
fineweb-10B/gpt2/eos/sgd/constant/lr_0.03-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250729_185249/checkpoints/step_010000.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:358a6e6d0180e1a321c38b37468a7e08cae1d312f1c8f9e224357808b225af19
3
+ size 648830485
fineweb-10B/gpt2/eos/sgd/constant/lr_0.03-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250729_185249/checkpoints/step_011000.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a97ed12011984b26491754ad36acfb87e7b788c9c1a19bfb446b0101956fe88d
3
+ size 648830485