mgh6 commited on
Commit
ea71c3e
·
verified ·
1 Parent(s): 72b1405

Training in progress, epoch 21, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f5e4a4964aa52fa5f2e90117528886e8767e7b499da342b51fd1fda631cf7733
3
  size 2610104820
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8e62f21c2524fbe6fa35e7771d7a0f174a91d2590b39bd6f6aeb7cdccfd0659c
3
  size 2610104820
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b34f4d92005ec5aeb9e1e91707e5f1819776ab663d8cc28be1642ba464abbe23
3
  size 5210004271
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6bccecaf037aa026fb3aeecf97033588a8b734ad77c6f3316f4d4d4665be6d75
3
  size 5210004271
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:15a9ca8e9630b65f3d22543e10ee333ae6f602385a9988471ec0251618f4f6a6
3
  size 15006
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:70145026e21afc6ea2717a18ed89206163fc726fb3040617116c08c85b455de2
3
  size 15006
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1cfefeb8f7340b0c0bd6f5e98fbdc1378c288a194b7a754fb6ffb0b657e9a3a4
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2da52fce13790b5d54928ad82a11cde2bbdaabd941b9375b0d9e259039c539e5
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.8495596647262573,
3
  "best_model_checkpoint": "mgh6/HTH_prob/checkpoint-4636",
4
- "epoch": 20.996045413955862,
5
  "eval_steps": 500,
6
- "global_step": 5124,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -322,6 +322,21 @@
322
  "eval_samples_per_second": 32.578,
323
  "eval_steps_per_second": 16.289,
324
  "step": 5124
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
325
  }
326
  ],
327
  "logging_steps": 500,
@@ -336,7 +351,7 @@
336
  "early_stopping_threshold": 0.0
337
  },
338
  "attributes": {
339
- "early_stopping_patience_counter": 2
340
  }
341
  },
342
  "TrainerControl": {
 
1
  {
2
  "best_metric": 0.8495596647262573,
3
  "best_model_checkpoint": "mgh6/HTH_prob/checkpoint-4636",
4
+ "epoch": 21.996045413955862,
5
  "eval_steps": 500,
6
+ "global_step": 5368,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
322
  "eval_samples_per_second": 32.578,
323
  "eval_steps_per_second": 16.289,
324
  "step": 5124
325
+ },
326
+ {
327
+ "epoch": 21.996045413955862,
328
+ "grad_norm": 0.033235229551792145,
329
+ "learning_rate": 6.333333333333333e-05,
330
+ "loss": 0.8128,
331
+ "step": 5368
332
+ },
333
+ {
334
+ "epoch": 21.996045413955862,
335
+ "eval_loss": 0.8513291478157043,
336
+ "eval_runtime": 25.3035,
337
+ "eval_samples_per_second": 32.644,
338
+ "eval_steps_per_second": 16.322,
339
+ "step": 5368
340
  }
341
  ],
342
  "logging_steps": 500,
 
351
  "early_stopping_threshold": 0.0
352
  },
353
  "attributes": {
354
+ "early_stopping_patience_counter": 3
355
  }
356
  },
357
  "TrainerControl": {