mgh6 commited on
Commit
0472d64
·
verified ·
1 Parent(s): 77fe4e3

Training in progress, step 1400, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:38b93229869fbaa7f83dc901e0f80abad3e6409191726851e0d351adbbb23289
3
  size 136000488
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:35286f85be522ab120b67ef86a9b2fea852320beeec84ecb8e980c2c25acc3dc
3
  size 136000488
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:246aacfeeb41eb95982a1aa7a0c2ad30cdaac0774e9162d63f49512902035746
3
  size 268176506
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9186371be341491f8cddef66b45c7672dd44b1d906084bbec228e7ac80cb3411
3
  size 268176506
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5fc8ded50a2551e9c75c08b06e38e07c796d38055e9b1c4ca00d25008510b20b
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:15433f7b2e9dde73085fab120a65bed01df370c9b02a1594a472d70068457ea5
3
  size 15024
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:67da4e9db4800df6c1e9ce979d121954ce0da621dd31f9082fc18d53729e7dbe
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:53a7daa52c58fe1fb49f677ab523d6f9e6c00f2599813aaa026fc90badc494a1
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c1b9be461c679a45d0e9dc290a99f941826b96670541a646e875429ebac4b6d4
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:70429062c9260565eaadd7c683e4ca4b0df3e30951bd8422b10d4ac7e4ffd7b8
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:67d1d3fc519e236cb1a514859da17fb4101fd90584e559ef4092ff69d616f7b4
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ecdce592e98f25cf4a04063424668eaa8d77ff8c0a42490c9215deb661680414
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e95a306d782562de001c16fb3cdca8e82cd47595ef0f485c24a21d851800935b
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a3477b2d9f0cb08adf52a30b7e6192998cc41f2cc3eb4ead00d452c0bcd1756e
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.8838639259338379,
3
- "best_model_checkpoint": "mgh6/TCS_MLM/checkpoint-1300",
4
- "epoch": 1.7402945113788486,
5
  "eval_steps": 100,
6
- "global_step": 1300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -202,6 +202,21 @@
202
  "eval_samples_per_second": 892.596,
203
  "eval_steps_per_second": 3.607,
204
  "step": 1300
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
205
  }
206
  ],
207
  "logging_steps": 100,
@@ -230,7 +245,7 @@
230
  "attributes": {}
231
  }
232
  },
233
- "total_flos": 4.7205951995904e+16,
234
  "train_batch_size": 64,
235
  "trial_name": null,
236
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.8721866607666016,
3
+ "best_model_checkpoint": "mgh6/TCS_MLM/checkpoint-1400",
4
+ "epoch": 1.8741633199464525,
5
  "eval_steps": 100,
6
+ "global_step": 1400,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
202
  "eval_samples_per_second": 892.596,
203
  "eval_steps_per_second": 3.607,
204
  "step": 1300
205
+ },
206
+ {
207
+ "epoch": 1.8741633199464525,
208
+ "grad_norm": 0.196046844124794,
209
+ "learning_rate": 0.0008125836680053548,
210
+ "loss": 0.8933,
211
+ "step": 1400
212
+ },
213
+ {
214
+ "epoch": 1.8741633199464525,
215
+ "eval_loss": 0.8721866607666016,
216
+ "eval_runtime": 6.3772,
217
+ "eval_samples_per_second": 892.551,
218
+ "eval_steps_per_second": 3.607,
219
+ "step": 1400
220
  }
221
  ],
222
  "logging_steps": 100,
 
245
  "attributes": {}
246
  }
247
  },
248
+ "total_flos": 5.0837179072512e+16,
249
  "train_batch_size": 64,
250
  "trial_name": null,
251
  "trial_params": null