Add files using upload-large-folder tool
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- .gitattributes +7 -0
- fineweb-10B/gpt2/eos/adam/linear/linear-256-2048-15000-0.1/lr_0.0018-wd_0.1-betas_0.9_0.95-bs_32-seq_1024-iters_15000/250622_035242/config.json +29 -0
- fineweb-10B/gpt2/eos/adam/linear/linear-256-2048-15000-0.1/lr_0.0018-wd_0.1-betas_0.9_0.95-bs_32-seq_1024-iters_15000/250622_035242/res.log +4 -0
- fineweb-10B/gpt2/eos/adam/linear/linear-256-2048-15000-0.1/lr_0.0018-wd_0.1-betas_0.9_0.95-bs_32-seq_1024-iters_15000/250622_035242/training.log +0 -0
- fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/checkpoints/step_000000.pt +3 -0
- fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/checkpoints/step_001000.pt +3 -0
- fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/checkpoints/step_002000.pt +3 -0
- fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/checkpoints/step_003000.pt +3 -0
- fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/checkpoints/step_004000.pt +3 -0
- fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/checkpoints/step_005000.pt +3 -0
- fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/checkpoints/step_006000.pt +3 -0
- fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/checkpoints/step_007000.pt +3 -0
- fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/checkpoints/step_008000.pt +3 -0
- fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/checkpoints/step_009000.pt +3 -0
- fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/checkpoints/step_010000.pt +3 -0
- fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/checkpoints/step_011000.pt +3 -0
- fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/checkpoints/step_012000.pt +3 -0
- fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/checkpoints/step_013000.pt +3 -0
- fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/checkpoints/step_014000.pt +3 -0
- fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/checkpoints/step_015000.pt +3 -0
- fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/checkpoints/step_016000.pt +3 -0
- fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/checkpoints/step_017000.pt +3 -0
- fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/checkpoints/step_018000.pt +3 -0
- fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/checkpoints/step_019000.pt +3 -0
- fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/checkpoints/step_020000.pt +3 -0
- fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/checkpoints/step_021000.pt +3 -0
- fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/checkpoints/step_022000.pt +3 -0
- fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/checkpoints/step_023000.pt +3 -0
- fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/checkpoints/step_024000.pt +3 -0
- fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/checkpoints/step_025000.pt +3 -0
- fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/checkpoints/step_25001.pt +3 -0
- fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.98-bs_32-seq_1024-iters_25000/250730_022725/eos/step_003000/combined_eos_analysis_ori.png +3 -0
- fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.98-bs_32-seq_1024-iters_25000/250730_022725/eos/step_005000/combined_eos_analysis.png +3 -0
- fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.98-bs_32-seq_1024-iters_25000/250730_022725/eos/step_005000/combined_eos_analysis_ori.png +3 -0
- fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.98-bs_32-seq_1024-iters_25000/250730_022725/eos/step_007000/combined_eos_analysis.png +3 -0
- fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.98-bs_32-seq_1024-iters_25000/250730_022725/eos/step_007000/combined_eos_analysis_ori.png +3 -0
- fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.98-bs_32-seq_1024-iters_25000/250730_022725/eos/step_015000/combined_eos_analysis.png +3 -0
- fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.98-bs_32-seq_1024-iters_25000/250730_022725/eos/step_015000/combined_eos_analysis_ori.png +3 -0
- fineweb-10B/gpt2/eos/sgd/constant/lr_0.03-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250729_185249/checkpoints/step_000000.pt +3 -0
- fineweb-10B/gpt2/eos/sgd/constant/lr_0.03-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250729_185249/checkpoints/step_001000.pt +3 -0
- fineweb-10B/gpt2/eos/sgd/constant/lr_0.03-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250729_185249/checkpoints/step_002000.pt +3 -0
- fineweb-10B/gpt2/eos/sgd/constant/lr_0.03-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250729_185249/checkpoints/step_003000.pt +3 -0
- fineweb-10B/gpt2/eos/sgd/constant/lr_0.03-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250729_185249/checkpoints/step_004000.pt +3 -0
- fineweb-10B/gpt2/eos/sgd/constant/lr_0.03-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250729_185249/checkpoints/step_005000.pt +3 -0
- fineweb-10B/gpt2/eos/sgd/constant/lr_0.03-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250729_185249/checkpoints/step_006000.pt +3 -0
- fineweb-10B/gpt2/eos/sgd/constant/lr_0.03-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250729_185249/checkpoints/step_007000.pt +3 -0
- fineweb-10B/gpt2/eos/sgd/constant/lr_0.03-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250729_185249/checkpoints/step_008000.pt +3 -0
- fineweb-10B/gpt2/eos/sgd/constant/lr_0.03-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250729_185249/checkpoints/step_009000.pt +3 -0
- fineweb-10B/gpt2/eos/sgd/constant/lr_0.03-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250729_185249/checkpoints/step_010000.pt +3 -0
- fineweb-10B/gpt2/eos/sgd/constant/lr_0.03-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250729_185249/checkpoints/step_011000.pt +3 -0
.gitattributes
CHANGED
|
@@ -165,3 +165,10 @@ fineweb-10B/gpt2/eos/sgd/constant/lr_0.03-wd_0.0001-m_0.0-bs_32-seq_1024-iters_9
|
|
| 165 |
fineweb-10B/gpt2/eos/sgd/constant/lr_0.03-wd_0.0001-m_0.0-bs_32-seq_1024-iters_9536/250622_011417/eos/step_004500/losses_lr.png filter=lfs diff=lfs merge=lfs -text
|
| 166 |
fineweb-10B/gpt2/eos/sgd/constant/lr_0.03-wd_0.0001-m_0.0-bs_32-seq_1024-iters_9536/250622_011417/eos/step_001500/losses_lr.png filter=lfs diff=lfs merge=lfs -text
|
| 167 |
fineweb-10B/gpt2/eos/sgd/constant/lr_0.03-wd_0.0001-m_0.0-bs_32-seq_1024-iters_9536/250728_063551/training.log filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 165 |
fineweb-10B/gpt2/eos/sgd/constant/lr_0.03-wd_0.0001-m_0.0-bs_32-seq_1024-iters_9536/250622_011417/eos/step_004500/losses_lr.png filter=lfs diff=lfs merge=lfs -text
|
| 166 |
fineweb-10B/gpt2/eos/sgd/constant/lr_0.03-wd_0.0001-m_0.0-bs_32-seq_1024-iters_9536/250622_011417/eos/step_001500/losses_lr.png filter=lfs diff=lfs merge=lfs -text
|
| 167 |
fineweb-10B/gpt2/eos/sgd/constant/lr_0.03-wd_0.0001-m_0.0-bs_32-seq_1024-iters_9536/250728_063551/training.log filter=lfs diff=lfs merge=lfs -text
|
| 168 |
+
fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.98-bs_32-seq_1024-iters_25000/250730_022725/eos/step_007000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text
|
| 169 |
+
fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.98-bs_32-seq_1024-iters_25000/250730_022725/eos/step_005000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text
|
| 170 |
+
fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.98-bs_32-seq_1024-iters_25000/250730_022725/eos/step_007000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text
|
| 171 |
+
fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.98-bs_32-seq_1024-iters_25000/250730_022725/eos/step_005000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text
|
| 172 |
+
fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.98-bs_32-seq_1024-iters_25000/250730_022725/eos/step_015000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text
|
| 173 |
+
fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.98-bs_32-seq_1024-iters_25000/250730_022725/eos/step_015000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text
|
| 174 |
+
fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.98-bs_32-seq_1024-iters_25000/250730_022725/eos/step_003000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text
|
fineweb-10B/gpt2/eos/adam/linear/linear-256-2048-15000-0.1/lr_0.0018-wd_0.1-betas_0.9_0.95-bs_32-seq_1024-iters_15000/250622_035242/config.json
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model_name": "gpt2_small",
|
| 3 |
+
"input_bin": "data/fineweb/fineweb10B/fineweb_train_*.bin",
|
| 4 |
+
"input_val_bin": "data/fineweb/fineweb10B/fineweb_val_*.bin",
|
| 5 |
+
"output_dir": "./logs/fineweb-10B/gpt2/eos",
|
| 6 |
+
"batch_size": 32,
|
| 7 |
+
"sequence_length": 1024,
|
| 8 |
+
"num_iterations": 15000,
|
| 9 |
+
"optim_name": "adam",
|
| 10 |
+
"learning_rate": 0.0018,
|
| 11 |
+
"weight_decay": 0.1,
|
| 12 |
+
"beta1": 0.9,
|
| 13 |
+
"beta2": 0.95,
|
| 14 |
+
"eps": 1e-08,
|
| 15 |
+
"alpha": 0.99,
|
| 16 |
+
"momentum": 0.0,
|
| 17 |
+
"scheduler_name": "linear",
|
| 18 |
+
"warmup_iters": 256,
|
| 19 |
+
"warmdown_iters": 2048,
|
| 20 |
+
"cosine_end_iter": 5000,
|
| 21 |
+
"gamma": 0.1,
|
| 22 |
+
"cosine_start_iter": 2000,
|
| 23 |
+
"val_loss_every": 128,
|
| 24 |
+
"val_max_steps": 20,
|
| 25 |
+
"save_every": 500,
|
| 26 |
+
"raw": false,
|
| 27 |
+
"wandb_flag": true,
|
| 28 |
+
"wandb_name": "Stochastic EoS"
|
| 29 |
+
}
|
fineweb-10B/gpt2/eos/adam/linear/linear-256-2048-15000-0.1/lr_0.0018-wd_0.1-betas_0.9_0.95-bs_32-seq_1024-iters_15000/250622_035242/res.log
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
final train loss: 3.382492
|
| 2 |
+
final val loss: 3.453866958618164
|
| 3 |
+
final 20 iters avg: 478.670ms
|
| 4 |
+
peak memory consumption: 50014 MiB
|
fineweb-10B/gpt2/eos/adam/linear/linear-256-2048-15000-0.1/lr_0.0018-wd_0.1-betas_0.9_0.95-bs_32-seq_1024-iters_15000/250622_035242/training.log
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/checkpoints/step_000000.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:95625cc8310c20a1f3dc10ee5e01796c0229d3093fb8f728ac8e0183558d1108
|
| 3 |
+
size 648830485
|
fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/checkpoints/step_001000.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f148fbb603b49ab7edadbcd5f32041be2753879cd98424d263a9db1cf75d4fed
|
| 3 |
+
size 648830485
|
fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/checkpoints/step_002000.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:22c23d673d2bf5939a9887e3f750f1374725373b909bd868746a49eed34db6d8
|
| 3 |
+
size 648830485
|
fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/checkpoints/step_003000.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:da4b2cf1186868e3a39c6d45282d1f655188eab2098ba35e1099ec3bd638860e
|
| 3 |
+
size 648830485
|
fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/checkpoints/step_004000.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d1292fc74076cfce7b1a42644e517821287628c7eed9d03a944bff0e5d6e6e11
|
| 3 |
+
size 648830485
|
fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/checkpoints/step_005000.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:56e9772eca90cdd5281f93603142c699e33fa68aa1ca880e5f6fbf519f970119
|
| 3 |
+
size 648830485
|
fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/checkpoints/step_006000.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cefd9eb505904f2ca31998c62d33e89139725043c7d6dfaf66e365ff76a762ba
|
| 3 |
+
size 648830485
|
fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/checkpoints/step_007000.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e78e3e5dd923361638bd6de7f3e8d7e9b4bcc889a4e59465236b9047d3662dd7
|
| 3 |
+
size 648830485
|
fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/checkpoints/step_008000.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e114a3864801416cef8bd154280528e585fd90e54edaf66d9161da35c59fa533
|
| 3 |
+
size 648830485
|
fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/checkpoints/step_009000.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5884e3641fbf14fdf1a58708fb780080c784741b03beaeb849517b61cabfa750
|
| 3 |
+
size 648830485
|
fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/checkpoints/step_010000.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:109351e5358ae2f7d7dd297e724545c5f8747122f98fd5c4db60d873a735e297
|
| 3 |
+
size 648830485
|
fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/checkpoints/step_011000.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:acb881147c41775cf45678400e46d2e1a47f18e9afcbc1abaf018e706f2e8c9f
|
| 3 |
+
size 648830485
|
fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/checkpoints/step_012000.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:18cb525c324b3f1f9334babbae55308aed71eb7ca7c0784e01b838816c57ce06
|
| 3 |
+
size 648830485
|
fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/checkpoints/step_013000.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:39f0d1bb8c21c25444b3cf2a537131a66a96075baf58d8d2e9edab2074bec4f2
|
| 3 |
+
size 648830485
|
fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/checkpoints/step_014000.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:54dd8796e4e200ea2768b46b538304f56ca985be8b686007f1c3932bb3f22a90
|
| 3 |
+
size 648830485
|
fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/checkpoints/step_015000.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b0185a822e6cb77a44b37c96b12e7282902a393191aafa3c787d314257a2cd80
|
| 3 |
+
size 648830485
|
fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/checkpoints/step_016000.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2d80adf72dced5b24db1f26bdb8c5eaa0c08aacb07f83b7eae8765df9feb6f94
|
| 3 |
+
size 648830485
|
fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/checkpoints/step_017000.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:19b7540551575e0548ad72b0078d4d511161a614306b05f1dc2b1e0426a11542
|
| 3 |
+
size 648830485
|
fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/checkpoints/step_018000.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:36200ca1cdc8e225454ece94158c7ec8444106a18ae434c5813158e1327680a5
|
| 3 |
+
size 648830485
|
fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/checkpoints/step_019000.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4292ba139611d030f3f62bee14df7502cce1d5de310dd6c99dcc78df8c6f2aa8
|
| 3 |
+
size 648830485
|
fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/checkpoints/step_020000.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2b9f7a3001a876db7b99df0b956571ddad29812b8d2c2a2db1e93ed7a2069f8a
|
| 3 |
+
size 648830485
|
fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/checkpoints/step_021000.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1db35c93d1089875e3d20826fbdca53d7713703c284c8a868e2dfecd54832fc3
|
| 3 |
+
size 648830485
|
fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/checkpoints/step_022000.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ab66e77a3681337c1b487ef59288436c504319f698f06dd8b22c367a9e4c1af0
|
| 3 |
+
size 648830485
|
fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/checkpoints/step_023000.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8c1c9cac22cc80e2503f9cbc698dec47f0a8b19386f07323a625d155b75b957b
|
| 3 |
+
size 648830485
|
fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/checkpoints/step_024000.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7ee7d063baa198b019faf2e0b5c4bb35f58b7278b03ebead60b382cb382d255b
|
| 3 |
+
size 648830485
|
fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/checkpoints/step_025000.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1b4fb2f7bf6ff13e2a5fb08c3b76eabf580d6dfeb8ea12889f90ab7bf24ffea1
|
| 3 |
+
size 648830485
|
fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/checkpoints/step_25001.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:968450f9da4b0fc6761111a769174aefe2bfb544116b05c04e6d4a6b961a5bf4
|
| 3 |
+
size 648830320
|
fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.98-bs_32-seq_1024-iters_25000/250730_022725/eos/step_003000/combined_eos_analysis_ori.png
ADDED
|
Git LFS Details
|
fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.98-bs_32-seq_1024-iters_25000/250730_022725/eos/step_005000/combined_eos_analysis.png
ADDED
|
Git LFS Details
|
fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.98-bs_32-seq_1024-iters_25000/250730_022725/eos/step_005000/combined_eos_analysis_ori.png
ADDED
|
Git LFS Details
|
fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.98-bs_32-seq_1024-iters_25000/250730_022725/eos/step_007000/combined_eos_analysis.png
ADDED
|
Git LFS Details
|
fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.98-bs_32-seq_1024-iters_25000/250730_022725/eos/step_007000/combined_eos_analysis_ori.png
ADDED
|
Git LFS Details
|
fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.98-bs_32-seq_1024-iters_25000/250730_022725/eos/step_015000/combined_eos_analysis.png
ADDED
|
Git LFS Details
|
fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.98-bs_32-seq_1024-iters_25000/250730_022725/eos/step_015000/combined_eos_analysis_ori.png
ADDED
|
Git LFS Details
|
fineweb-10B/gpt2/eos/sgd/constant/lr_0.03-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250729_185249/checkpoints/step_000000.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:def9372de59c174ae4760938e839d648ccb6124a48283f076b08d932d09c6420
|
| 3 |
+
size 648830485
|
fineweb-10B/gpt2/eos/sgd/constant/lr_0.03-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250729_185249/checkpoints/step_001000.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ddd230a2b3c7a18db4a320a273f58ec52f1ba4d2cc82a9f77130a0bfc0d015c9
|
| 3 |
+
size 648830485
|
fineweb-10B/gpt2/eos/sgd/constant/lr_0.03-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250729_185249/checkpoints/step_002000.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0f73ddf4a35b7d65c9d2cb70bfeca23597b0392efba557bb6595639e4cbc811a
|
| 3 |
+
size 648830485
|
fineweb-10B/gpt2/eos/sgd/constant/lr_0.03-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250729_185249/checkpoints/step_003000.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6dcee823af7da845a419d38f1bd6f3cb31738ec72588dd6a4d50315365aaf8d0
|
| 3 |
+
size 648830485
|
fineweb-10B/gpt2/eos/sgd/constant/lr_0.03-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250729_185249/checkpoints/step_004000.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:34c3a71c48729f6b1147323a967c30e7672b6c9617de1bed9de2b5c644727f8e
|
| 3 |
+
size 648830485
|
fineweb-10B/gpt2/eos/sgd/constant/lr_0.03-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250729_185249/checkpoints/step_005000.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e840f0440eb49f39fb46bf75694cb738bb94a72c544371f9f394b3ef1cf88ac8
|
| 3 |
+
size 648830485
|
fineweb-10B/gpt2/eos/sgd/constant/lr_0.03-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250729_185249/checkpoints/step_006000.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0d4480a185ce1acfe04fbc532f47eb36b0434edb94f03846e3bb1de1dd2e5e80
|
| 3 |
+
size 648830485
|
fineweb-10B/gpt2/eos/sgd/constant/lr_0.03-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250729_185249/checkpoints/step_007000.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ca137674d6d81a81ef448ff9cf2eba5744c87830a010266397d3563c93e73752
|
| 3 |
+
size 648830485
|
fineweb-10B/gpt2/eos/sgd/constant/lr_0.03-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250729_185249/checkpoints/step_008000.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a4014c14fbe11b2a5409734f45954872644710096b47643874fa6d5dd15f5af0
|
| 3 |
+
size 648830485
|
fineweb-10B/gpt2/eos/sgd/constant/lr_0.03-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250729_185249/checkpoints/step_009000.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ba515a511980964072f9af26b6b5fffedc02d507bf132d3a574aef88b6609520
|
| 3 |
+
size 648830485
|
fineweb-10B/gpt2/eos/sgd/constant/lr_0.03-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250729_185249/checkpoints/step_010000.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:358a6e6d0180e1a321c38b37468a7e08cae1d312f1c8f9e224357808b225af19
|
| 3 |
+
size 648830485
|
fineweb-10B/gpt2/eos/sgd/constant/lr_0.03-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250729_185249/checkpoints/step_011000.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a97ed12011984b26491754ad36acfb87e7b788c9c1a19bfb446b0101956fe88d
|
| 3 |
+
size 648830485
|