johannes-garstenauer commited on
Commit
bf94592
·
1 Parent(s): c7ca1fb

Training in progress, step 1224, checkpoint

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ec0fe031fb601823131bb4dbd2e7ecb0232c0e110be96f31881fae63dfd88823
3
  size 532568837
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:26488b1d1e886839b9ccfa783e04ffc3c52f02bd7f6bb780ff5fa4d29b43ad43
3
  size 532568837
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6ca12dbb2c8c0d3381a15ae0aa1694e0abf6b1df363ea9402476667c70d618d1
3
  size 266276525
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:46c1608acf8459586739b5798f159f3166f9f3e849e9b4f14b519c9b9db81e20
3
  size 266276525
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2cb96b819bbac1ba21e831ad00539761cf97265a7ddcc0bd1ce830fffe8b3026
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:658e78e94ca25df723112bfd1612621e360a0066619a5c6eb7f279f9b04202f2
3
  size 14575
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bc361b8b34ebb69e744e2b97e078b4a6110c31bef4353e916e0c294c5a73d8a8
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1bf2106b57fdf7e889c29a04ddb9b3c8180750e3500abe2f8e432d45c5ab5521
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.5178571428571428,
5
  "eval_steps": 500,
6
- "global_step": 680,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -24,13 +24,30 @@
24
  "learning_rate": 3.1398809523809525e-05,
25
  "loss": 0.1071,
26
  "step": 500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  }
28
  ],
29
  "logging_steps": 500,
30
  "max_steps": 1344,
31
  "num_train_epochs": 3,
32
  "save_steps": 68,
33
- "total_flos": 2880657692851200.0,
34
  "trial_name": null,
35
  "trial_params": null
36
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.732142857142857,
5
  "eval_steps": 500,
6
+ "global_step": 1224,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
24
  "learning_rate": 3.1398809523809525e-05,
25
  "loss": 0.1071,
26
  "step": 500
27
+ },
28
+ {
29
+ "epoch": 2.0,
30
+ "eval_accuracy": {
31
+ "accuracy": 0.9986737400530504
32
+ },
33
+ "eval_loss": 0.007976386696100235,
34
+ "eval_runtime": 4.6911,
35
+ "eval_samples_per_second": 160.729,
36
+ "eval_steps_per_second": 5.116,
37
+ "step": 896
38
+ },
39
+ {
40
+ "epoch": 2.23,
41
+ "learning_rate": 1.2797619047619047e-05,
42
+ "loss": 0.009,
43
+ "step": 1000
44
  }
45
  ],
46
  "logging_steps": 500,
47
  "max_steps": 1344,
48
  "num_train_epochs": 3,
49
  "save_steps": 68,
50
+ "total_flos": 5184786423674880.0,
51
  "trial_name": null,
52
  "trial_params": null
53
  }