AbstractPhil commited on
Commit
b31ca7a
·
verified ·
1 Parent(s): 09dc994

Upload 3 files

Browse files

>2 billion samples

Files changed (3) hide show
  1. model.safetensors +1 -1
  2. optimizer.pt +1 -1
  3. training_state.json +3 -3
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:31b42566d5d44a0e4c0da5290bf23e9e70cd1ecb79ab8d1f12a15e2632fe657e
3
  size 549501112
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:efc7996412a69b131b5e4f4ae5746ab3b3258572aee42b1185a73134bc323d3a
3
  size 549501112
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a688820740b08ad633413ac665df53156c09acabca40250ae0b5e961f4c86ffe
3
  size 1099075450
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:07923533f825a57d97276ae11c7b57caf8c70fa1d2940f6b3fe908dd5d5555a4
3
  size 1099075450
training_state.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
- "step": 1050000,
3
- "epoch": 27,
4
  "vocab_size": 30574,
5
  "model_vocab_size": 30592,
6
  "config": {
7
  "optimizer_type": "adamw",
8
  "lr": 0.0001,
9
  "weight_decay": 0.01,
10
- "warmup_steps": 8000,
11
  "scheduler_type": "cosine",
12
  "scheduler_params": {
13
  "eta_min": 1e-07
 
1
  {
2
+ "step": 2008000,
3
+ "epoch": 53,
4
  "vocab_size": 30574,
5
  "model_vocab_size": 30592,
6
  "config": {
7
  "optimizer_type": "adamw",
8
  "lr": 0.0001,
9
  "weight_decay": 0.01,
10
+ "warmup_steps": 6000,
11
  "scheduler_type": "cosine",
12
  "scheduler_params": {
13
  "eta_min": 1e-07