mgh6 commited on
Commit
25a67f3
·
verified ·
1 Parent(s): 211d612

Training in progress, epoch 9, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:148108680e1c355b14ff9c8fdb0b036b9dd0b4f80824bbb97e6841ea7bbccd5f
3
  size 2610104820
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e28c72beb1acad11718cf70f833baa11d22a709648c8bcee9bf0c1540a3d3341
3
  size 2610104820
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2a3a5760f9f6911291b319348564ca8aeff757b71959b72ee1ba089d86df01c8
3
  size 5210004271
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7977ea558a8e8da98dd9f5dd4cfd83e2947d4244886ce537ca44c985eb4832a2
3
  size 5210004271
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5a73f4ec8cc461fdcad0bea4ca4995104cb70648550f70c0efd1720de2eef3ad
3
  size 15006
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f75193245e65869ca07a16a2caab41ba86286fcae28542136bb36df639a39935
3
  size 15006
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a76d1e97e3814aeb3d609132d6f8310077a0cb3a51a73e991f39e961364cd916
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:69e7a70b99a4ca390e08d8c14e39ca3fc804d75e54a2b4300e5640fb55822343
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.5642563700675964,
3
- "best_model_checkpoint": "mgh6/HTH_pssm/checkpoint-800",
4
- "epoch": 8.0,
5
  "eval_steps": 50,
6
- "global_step": 872,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -262,6 +262,36 @@
262
  "eval_samples_per_second": 42.927,
263
  "eval_steps_per_second": 21.463,
264
  "step": 850
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
265
  }
266
  ],
267
  "logging_steps": 50,
 
1
  {
2
+ "best_metric": 0.5641968250274658,
3
+ "best_model_checkpoint": "mgh6/HTH_pssm/checkpoint-900",
4
+ "epoch": 9.0,
5
  "eval_steps": 50,
6
+ "global_step": 981,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
262
  "eval_samples_per_second": 42.927,
263
  "eval_steps_per_second": 21.463,
264
  "step": 850
265
+ },
266
+ {
267
+ "epoch": 8.2582504683672,
268
+ "grad_norm": 0.01695585809648037,
269
+ "learning_rate": 1.6666666666666667e-05,
270
+ "loss": 0.5335,
271
+ "step": 900
272
+ },
273
+ {
274
+ "epoch": 8.2582504683672,
275
+ "eval_loss": 0.5641968250274658,
276
+ "eval_runtime": 8.5378,
277
+ "eval_samples_per_second": 42.868,
278
+ "eval_steps_per_second": 21.434,
279
+ "step": 900
280
+ },
281
+ {
282
+ "epoch": 8.719412019022913,
283
+ "grad_norm": 0.03029218688607216,
284
+ "learning_rate": 1.2037037037037037e-05,
285
+ "loss": 0.5407,
286
+ "step": 950
287
+ },
288
+ {
289
+ "epoch": 8.719412019022913,
290
+ "eval_loss": 0.5642545223236084,
291
+ "eval_runtime": 8.5373,
292
+ "eval_samples_per_second": 42.871,
293
+ "eval_steps_per_second": 21.435,
294
+ "step": 950
295
  }
296
  ],
297
  "logging_steps": 50,